Git fork
at reftables-rust 510 lines 16 kB view raw
1/* 2 * diff-delta.c: generate a delta between two buffers 3 * 4 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi 5 * http://www.xmailserver.org/xdiff-lib.html 6 * 7 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007 8 * 9 * This code is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14#define DISABLE_SIGN_COMPARE_WARNINGS 15 16#include "git-compat-util.h" 17#include "delta.h" 18 19/* maximum hash entry list for the same hash bucket */ 20#define HASH_LIMIT 64 21 22#define RABIN_SHIFT 23 23#define RABIN_WINDOW 16 24 25static const unsigned int T[256] = { 26 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344, 27 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259, 28 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85, 29 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2, 30 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a, 31 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db, 32 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753, 33 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964, 34 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8, 35 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5, 36 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81, 37 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6, 38 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e, 39 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77, 40 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff, 41 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8, 42 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc, 43 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1, 44 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d, 45 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a, 46 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2, 47 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02, 48 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a, 49 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd, 50 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61, 51 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c, 52 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08, 53 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f, 54 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7, 55 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe, 56 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76, 57 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141, 58 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65, 59 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78, 60 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4, 61 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93, 62 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b, 63 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa, 64 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872, 65 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645, 66 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99, 67 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84, 68 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811 69}; 70 71static const unsigned int U[256] = { 72 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a, 73 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48, 74 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511, 75 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d, 76 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8, 77 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe, 78 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb, 79 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937, 80 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e, 81 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c, 82 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d, 83 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1, 84 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4, 85 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa, 86 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef, 87 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263, 88 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302, 89 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000, 90 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59, 91 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5, 92 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90, 93 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7, 94 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2, 95 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e, 96 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467, 97 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765, 98 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604, 99 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88, 100 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd, 101 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3, 102 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996, 103 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a, 104 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b, 105 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609, 106 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50, 107 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc, 108 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99, 109 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf, 110 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa, 111 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176, 112 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f, 113 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d, 114 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a 115}; 116 117struct index_entry { 118 const unsigned char *ptr; 119 unsigned int val; 120}; 121 122struct unpacked_index_entry { 123 struct index_entry entry; 124 struct unpacked_index_entry *next; 125}; 126 127struct delta_index { 128 unsigned long memsize; 129 const void *src_buf; 130 unsigned long src_size; 131 unsigned int hash_mask; 132 struct index_entry *hash[FLEX_ARRAY]; 133}; 134 135struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) 136{ 137 unsigned int i, hsize, hmask, entries, prev_val, *hash_count; 138 const unsigned char *data, *buffer = buf; 139 struct delta_index *index; 140 struct unpacked_index_entry *entry, **hash; 141 struct index_entry *packed_entry, **packed_hash; 142 void *mem; 143 unsigned long memsize; 144 145 if (!buf || !bufsize) 146 return NULL; 147 148 /* Determine index hash size. Note that indexing skips the 149 first byte to allow for optimizing the Rabin's polynomial 150 initialization in create_delta(). */ 151 entries = (bufsize - 1) / RABIN_WINDOW; 152 if (bufsize >= 0xffffffffUL) { 153 /* 154 * Current delta format can't encode offsets into 155 * reference buffer with more than 32 bits. 156 */ 157 entries = 0xfffffffeU / RABIN_WINDOW; 158 } 159 hsize = entries / 4; 160 for (i = 4; (1u << i) < hsize; i++); 161 hsize = 1 << i; 162 hmask = hsize - 1; 163 164 /* allocate lookup index */ 165 memsize = sizeof(*hash) * hsize + 166 sizeof(*entry) * entries; 167 mem = malloc(memsize); 168 if (!mem) 169 return NULL; 170 hash = mem; 171 mem = hash + hsize; 172 entry = mem; 173 174 memset(hash, 0, hsize * sizeof(*hash)); 175 176 /* allocate an array to count hash entries */ 177 hash_count = calloc(hsize, sizeof(*hash_count)); 178 if (!hash_count) { 179 free(hash); 180 return NULL; 181 } 182 183 /* then populate the index */ 184 prev_val = ~0; 185 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW; 186 data >= buffer; 187 data -= RABIN_WINDOW) { 188 unsigned int val = 0; 189 for (i = 1; i <= RABIN_WINDOW; i++) 190 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; 191 if (val == prev_val) { 192 /* keep the lowest of consecutive identical blocks */ 193 entry[-1].entry.ptr = data + RABIN_WINDOW; 194 --entries; 195 } else { 196 prev_val = val; 197 i = val & hmask; 198 entry->entry.ptr = data + RABIN_WINDOW; 199 entry->entry.val = val; 200 entry->next = hash[i]; 201 hash[i] = entry++; 202 hash_count[i]++; 203 } 204 } 205 206 /* 207 * Determine a limit on the number of entries in the same hash 208 * bucket. This guards us against pathological data sets causing 209 * really bad hash distribution with most entries in the same hash 210 * bucket that would bring us to O(m*n) computing costs (m and n 211 * corresponding to reference and target buffer sizes). 212 * 213 * Make sure none of the hash buckets has more entries than 214 * we're willing to test. Otherwise we cull the entry list 215 * uniformly to still preserve a good repartition across 216 * the reference buffer. 217 */ 218 for (i = 0; i < hsize; i++) { 219 int acc; 220 221 if (hash_count[i] <= HASH_LIMIT) 222 continue; 223 224 /* We leave exactly HASH_LIMIT entries in the bucket */ 225 entries -= hash_count[i] - HASH_LIMIT; 226 227 entry = hash[i]; 228 acc = 0; 229 230 /* 231 * Assume that this loop is gone through exactly 232 * HASH_LIMIT times and is entered and left with 233 * acc==0. So the first statement in the loop 234 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT 235 * to the accumulator, and the inner loop consequently 236 * is run (hash_count[i]-HASH_LIMIT) times, removing 237 * one element from the list each time. Since acc 238 * balances out to 0 at the final run, the inner loop 239 * body can't be left with entry==NULL. So we indeed 240 * encounter entry==NULL in the outer loop only. 241 */ 242 do { 243 acc += hash_count[i] - HASH_LIMIT; 244 if (acc > 0) { 245 struct unpacked_index_entry *keep = entry; 246 do { 247 entry = entry->next; 248 acc -= HASH_LIMIT; 249 } while (acc > 0); 250 keep->next = entry->next; 251 } 252 entry = entry->next; 253 } while (entry); 254 } 255 free(hash_count); 256 257 /* 258 * Now create the packed index in array form 259 * rather than linked lists. 260 */ 261 memsize = sizeof(*index) 262 + sizeof(*packed_hash) * (hsize+1) 263 + sizeof(*packed_entry) * entries; 264 mem = malloc(memsize); 265 if (!mem) { 266 free(hash); 267 return NULL; 268 } 269 270 index = mem; 271 index->memsize = memsize; 272 index->src_buf = buf; 273 index->src_size = bufsize; 274 index->hash_mask = hmask; 275 276 mem = index->hash; 277 packed_hash = mem; 278 mem = packed_hash + (hsize+1); 279 packed_entry = mem; 280 281 for (i = 0; i < hsize; i++) { 282 /* 283 * Coalesce all entries belonging to one linked list 284 * into consecutive array entries. 285 */ 286 packed_hash[i] = packed_entry; 287 for (entry = hash[i]; entry; entry = entry->next) 288 *packed_entry++ = entry->entry; 289 } 290 291 /* Sentinel value to indicate the length of the last hash bucket */ 292 packed_hash[hsize] = packed_entry; 293 294 assert(packed_entry - (struct index_entry *)mem == entries); 295 free(hash); 296 297 return index; 298} 299 300void free_delta_index(struct delta_index *index) 301{ 302 free(index); 303} 304 305unsigned long sizeof_delta_index(struct delta_index *index) 306{ 307 if (index) 308 return index->memsize; 309 else 310 return 0; 311} 312 313/* 314 * The maximum size for any opcode sequence, including the initial header 315 * plus Rabin window plus biggest copy. 316 */ 317#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) 318 319void * 320create_delta(const struct delta_index *index, 321 const void *trg_buf, unsigned long trg_size, 322 unsigned long *delta_size, unsigned long max_size) 323{ 324 unsigned int i, val; 325 off_t outpos, moff; 326 size_t l, outsize, msize; 327 int inscnt; 328 const unsigned char *ref_data, *ref_top, *data, *top; 329 unsigned char *out; 330 331 *delta_size = 0; 332 333 if (!trg_buf || !trg_size) 334 return NULL; 335 336 outpos = 0; 337 outsize = 8192; 338 if (max_size && outsize >= max_size) 339 outsize = max_size + MAX_OP_SIZE + 1; 340 out = malloc(outsize); 341 if (!out) 342 return NULL; 343 344 /* store reference buffer size */ 345 l = index->src_size; 346 while (l >= 0x80) { 347 out[outpos++] = l | 0x80; 348 l >>= 7; 349 } 350 out[outpos++] = l; 351 352 /* store target buffer size */ 353 l = trg_size; 354 while (l >= 0x80) { 355 out[outpos++] = l | 0x80; 356 l >>= 7; 357 } 358 out[outpos++] = l; 359 360 ref_data = index->src_buf; 361 ref_top = ref_data + index->src_size; 362 data = trg_buf; 363 top = (const unsigned char *) trg_buf + trg_size; 364 365 outpos++; 366 val = 0; 367 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) { 368 out[outpos++] = *data; 369 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; 370 } 371 inscnt = i; 372 373 moff = 0; 374 msize = 0; 375 while (data < top) { 376 if (msize < 4096) { 377 struct index_entry *entry; 378 val ^= U[data[-RABIN_WINDOW]]; 379 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; 380 i = val & index->hash_mask; 381 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) { 382 const unsigned char *ref = entry->ptr; 383 const unsigned char *src = data; 384 unsigned int ref_size = ref_top - ref; 385 if (entry->val != val) 386 continue; 387 if (ref_size > top - src) 388 ref_size = top - src; 389 if (ref_size <= msize) 390 break; 391 while (ref_size-- && *src++ == *ref) 392 ref++; 393 if (msize < ref - entry->ptr) { 394 /* this is our best match so far */ 395 msize = ref - entry->ptr; 396 moff = entry->ptr - ref_data; 397 if (msize >= 4096) /* good enough */ 398 break; 399 } 400 } 401 } 402 403 if (msize < 4) { 404 if (!inscnt) 405 outpos++; 406 out[outpos++] = *data++; 407 inscnt++; 408 if (inscnt == 0x7f) { 409 out[outpos - inscnt - 1] = inscnt; 410 inscnt = 0; 411 } 412 msize = 0; 413 } else { 414 unsigned int left; 415 unsigned char *op; 416 417 if (inscnt) { 418 while (moff && ref_data[moff-1] == data[-1]) { 419 /* we can match one byte back */ 420 msize++; 421 moff--; 422 data--; 423 outpos--; 424 if (--inscnt) 425 continue; 426 outpos--; /* remove count slot */ 427 inscnt--; /* make it -1 */ 428 break; 429 } 430 out[outpos - inscnt - 1] = inscnt; 431 inscnt = 0; 432 } 433 434 /* A copy op is currently limited to 64KB (pack v2) */ 435 left = (msize < 0x10000) ? 0 : (msize - 0x10000); 436 msize -= left; 437 438 op = out + outpos++; 439 i = 0x80; 440 441 if (moff & 0x000000ff) { 442 out[outpos++] = moff >> 0; 443 i |= 0x01; 444 } 445 if (moff & 0x0000ff00) { 446 out[outpos++] = moff >> 8; 447 i |= 0x02; 448 } 449 if (moff & 0x00ff0000) { 450 out[outpos++] = moff >> 16; 451 i |= 0x04; 452 } 453 if (moff & 0xff000000) { 454 out[outpos++] = moff >> 24; 455 i |= 0x08; 456 } 457 458 if (msize & 0x00ff) { 459 out[outpos++] = msize >> 0; 460 i |= 0x10; 461 } 462 if (msize & 0xff00) { 463 out[outpos++] = msize >> 8; 464 i |= 0x20; 465 } 466 467 *op = i; 468 469 data += msize; 470 moff += msize; 471 msize = left; 472 473 if (moff > 0xffffffff) 474 msize = 0; 475 476 if (msize < 4096) { 477 int j; 478 val = 0; 479 for (j = -RABIN_WINDOW; j < 0; j++) 480 val = ((val << 8) | data[j]) 481 ^ T[val >> RABIN_SHIFT]; 482 } 483 } 484 485 if (outpos >= outsize - MAX_OP_SIZE) { 486 void *tmp = out; 487 outsize = outsize * 3 / 2; 488 if (max_size && outsize >= max_size) 489 outsize = max_size + MAX_OP_SIZE + 1; 490 if (max_size && outpos > max_size) 491 break; 492 out = realloc(out, outsize); 493 if (!out) { 494 free(tmp); 495 return NULL; 496 } 497 } 498 } 499 500 if (inscnt) 501 out[outpos - inscnt - 1] = inscnt; 502 503 if (max_size && outpos > max_size) { 504 free(out); 505 return NULL; 506 } 507 508 *delta_size = outpos; 509 return out; 510}