Git fork
at reftables-rust 374 lines 10 kB view raw
1/* 2 * LibXDiff by Davide Libenzi ( File Differential Library ) 3 * Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, see 17 * <http://www.gnu.org/licenses/>. 18 * 19 * Davide Libenzi <davidel@xmailserver.org> 20 * 21 */ 22 23#include "xinclude.h" 24 25/* 26 * The basic idea of patience diff is to find lines that are unique in 27 * both files. These are intuitively the ones that we want to see as 28 * common lines. 29 * 30 * The maximal ordered sequence of such line pairs (where ordered means 31 * that the order in the sequence agrees with the order of the lines in 32 * both files) naturally defines an initial set of common lines. 33 * 34 * Now, the algorithm tries to extend the set of common lines by growing 35 * the line ranges where the files have identical lines. 36 * 37 * Between those common lines, the patience diff algorithm is applied 38 * recursively, until no unique line pairs can be found; these line ranges 39 * are handled by the well-known Myers algorithm. 40 */ 41 42#define NON_UNIQUE ULONG_MAX 43 44/* 45 * This is a hash mapping from line hash to line numbers in the first and 46 * second file. 47 */ 48struct hashmap { 49 int nr, alloc; 50 struct entry { 51 unsigned long hash; 52 /* 53 * 0 = unused entry, 1 = first line, 2 = second, etc. 54 * line2 is NON_UNIQUE if the line is not unique 55 * in either the first or the second file. 56 */ 57 unsigned long line1, line2; 58 /* 59 * "next" & "previous" are used for the longest common 60 * sequence; 61 * initially, "next" reflects only the order in file1. 62 */ 63 struct entry *next, *previous; 64 65 /* 66 * If 1, this entry can serve as an anchor. See 67 * Documentation/diff-options.adoc for more information. 68 */ 69 unsigned anchor : 1; 70 } *entries, *first, *last; 71 /* were common records found? */ 72 unsigned long has_matches; 73 xdfenv_t *env; 74 xpparam_t const *xpp; 75}; 76 77static int is_anchor(xpparam_t const *xpp, const char *line) 78{ 79 size_t i; 80 for (i = 0; i < xpp->anchors_nr; i++) { 81 if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i]))) 82 return 1; 83 } 84 return 0; 85} 86 87/* The argument "pass" is 1 for the first file, 2 for the second. */ 88static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, 89 int pass) 90{ 91 xrecord_t *records = pass == 1 ? 92 map->env->xdf1.recs : map->env->xdf2.recs; 93 xrecord_t *record = &records[line - 1]; 94 /* 95 * After xdl_prepare_env() (or more precisely, due to 96 * xdl_classify_record()), the "ha" member of the records (AKA lines) 97 * is _not_ the hash anymore, but a linearized version of it. In 98 * other words, the "ha" member is guaranteed to start with 0 and 99 * the second record's ha can only be 0 or 1, etc. 100 * 101 * So we multiply ha by 2 in the hope that the hashing was 102 * "unique enough". 103 */ 104 int index = (int)((record->ha << 1) % map->alloc); 105 106 while (map->entries[index].line1) { 107 if (map->entries[index].hash != record->ha) { 108 if (++index >= map->alloc) 109 index = 0; 110 continue; 111 } 112 if (pass == 2) 113 map->has_matches = 1; 114 if (pass == 1 || map->entries[index].line2) 115 map->entries[index].line2 = NON_UNIQUE; 116 else 117 map->entries[index].line2 = line; 118 return; 119 } 120 if (pass == 2) 121 return; 122 map->entries[index].line1 = line; 123 map->entries[index].hash = record->ha; 124 map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); 125 if (!map->first) 126 map->first = map->entries + index; 127 if (map->last) { 128 map->last->next = map->entries + index; 129 map->entries[index].previous = map->last; 130 } 131 map->last = map->entries + index; 132 map->nr++; 133} 134 135/* 136 * This function has to be called for each recursion into the inter-hunk 137 * parts, as previously non-unique lines can become unique when being 138 * restricted to a smaller part of the files. 139 * 140 * It is assumed that env has been prepared using xdl_prepare(). 141 */ 142static int fill_hashmap(xpparam_t const *xpp, xdfenv_t *env, 143 struct hashmap *result, 144 int line1, int count1, int line2, int count2) 145{ 146 result->xpp = xpp; 147 result->env = env; 148 149 /* We know exactly how large we want the hash map */ 150 result->alloc = count1 * 2; 151 if (!XDL_CALLOC_ARRAY(result->entries, result->alloc)) 152 return -1; 153 154 /* First, fill with entries from the first file */ 155 while (count1--) 156 insert_record(xpp, line1++, result, 1); 157 158 /* Then search for matches in the second file */ 159 while (count2--) 160 insert_record(xpp, line2++, result, 2); 161 162 return 0; 163} 164 165/* 166 * Find the longest sequence with a smaller last element (meaning a smaller 167 * line2, as we construct the sequence with entries ordered by line1). 168 */ 169static int binary_search(struct entry **sequence, int longest, 170 struct entry *entry) 171{ 172 int left = -1, right = longest; 173 174 while (left + 1 < right) { 175 int middle = left + (right - left) / 2; 176 /* by construction, no two entries can be equal */ 177 if (sequence[middle]->line2 > entry->line2) 178 right = middle; 179 else 180 left = middle; 181 } 182 /* return the index in "sequence", _not_ the sequence length */ 183 return left; 184} 185 186/* 187 * The idea is to start with the list of common unique lines sorted by 188 * the order in file1. For each of these pairs, the longest (partial) 189 * sequence whose last element's line2 is smaller is determined. 190 * 191 * For efficiency, the sequences are kept in a list containing exactly one 192 * item per sequence length: the sequence with the smallest last 193 * element (in terms of line2). 194 */ 195static int find_longest_common_sequence(struct hashmap *map, struct entry **res) 196{ 197 struct entry **sequence; 198 int longest = 0, i; 199 struct entry *entry; 200 201 /* 202 * If not -1, this entry in sequence must never be overridden. 203 * Therefore, overriding entries before this has no effect, so 204 * do not do that either. 205 */ 206 int anchor_i = -1; 207 208 if (!XDL_ALLOC_ARRAY(sequence, map->nr)) 209 return -1; 210 211 for (entry = map->first; entry; entry = entry->next) { 212 if (!entry->line2 || entry->line2 == NON_UNIQUE) 213 continue; 214 i = binary_search(sequence, longest, entry); 215 entry->previous = i < 0 ? NULL : sequence[i]; 216 ++i; 217 if (i <= anchor_i) 218 continue; 219 sequence[i] = entry; 220 if (entry->anchor) { 221 anchor_i = i; 222 longest = anchor_i + 1; 223 } else if (i == longest) { 224 longest++; 225 } 226 } 227 228 /* No common unique lines were found */ 229 if (!longest) { 230 *res = NULL; 231 xdl_free(sequence); 232 return 0; 233 } 234 235 /* Iterate starting at the last element, adjusting the "next" members */ 236 entry = sequence[longest - 1]; 237 entry->next = NULL; 238 while (entry->previous) { 239 entry->previous->next = entry; 240 entry = entry->previous; 241 } 242 *res = entry; 243 xdl_free(sequence); 244 return 0; 245} 246 247static int match(struct hashmap *map, int line1, int line2) 248{ 249 xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; 250 xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; 251 return record1->ha == record2->ha; 252} 253 254static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, 255 int line1, int count1, int line2, int count2); 256 257static int walk_common_sequence(struct hashmap *map, struct entry *first, 258 int line1, int count1, int line2, int count2) 259{ 260 int end1 = line1 + count1, end2 = line2 + count2; 261 int next1, next2; 262 263 for (;;) { 264 /* Try to grow the line ranges of common lines */ 265 if (first) { 266 next1 = first->line1; 267 next2 = first->line2; 268 while (next1 > line1 && next2 > line2 && 269 match(map, next1 - 1, next2 - 1)) { 270 next1--; 271 next2--; 272 } 273 } else { 274 next1 = end1; 275 next2 = end2; 276 } 277 while (line1 < next1 && line2 < next2 && 278 match(map, line1, line2)) { 279 line1++; 280 line2++; 281 } 282 283 /* Recurse */ 284 if (next1 > line1 || next2 > line2) { 285 if (patience_diff(map->xpp, map->env, 286 line1, next1 - line1, 287 line2, next2 - line2)) 288 return -1; 289 } 290 291 if (!first) 292 return 0; 293 294 while (first->next && 295 first->next->line1 == first->line1 + 1 && 296 first->next->line2 == first->line2 + 1) 297 first = first->next; 298 299 line1 = first->line1 + 1; 300 line2 = first->line2 + 1; 301 302 first = first->next; 303 } 304} 305 306static int fall_back_to_classic_diff(struct hashmap *map, 307 int line1, int count1, int line2, int count2) 308{ 309 xpparam_t xpp; 310 311 memset(&xpp, 0, sizeof(xpp)); 312 xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; 313 314 return xdl_fall_back_diff(map->env, &xpp, 315 line1, count1, line2, count2); 316} 317 318/* 319 * Recursively find the longest common sequence of unique lines, 320 * and if none was found, ask xdl_do_diff() to do the job. 321 * 322 * This function assumes that env was prepared with xdl_prepare_env(). 323 */ 324static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, 325 int line1, int count1, int line2, int count2) 326{ 327 struct hashmap map; 328 struct entry *first; 329 int result = 0; 330 331 /* trivial case: one side is empty */ 332 if (!count1) { 333 while(count2--) 334 env->xdf2.changed[line2++ - 1] = true; 335 return 0; 336 } else if (!count2) { 337 while(count1--) 338 env->xdf1.changed[line1++ - 1] = true; 339 return 0; 340 } 341 342 memset(&map, 0, sizeof(map)); 343 if (fill_hashmap(xpp, env, &map, 344 line1, count1, line2, count2)) 345 return -1; 346 347 /* are there any matching lines at all? */ 348 if (!map.has_matches) { 349 while(count1--) 350 env->xdf1.changed[line1++ - 1] = true; 351 while(count2--) 352 env->xdf2.changed[line2++ - 1] = true; 353 xdl_free(map.entries); 354 return 0; 355 } 356 357 result = find_longest_common_sequence(&map, &first); 358 if (result) 359 goto out; 360 if (first) 361 result = walk_common_sequence(&map, first, 362 line1, count1, line2, count2); 363 else 364 result = fall_back_to_classic_diff(&map, 365 line1, count1, line2, count2); 366 out: 367 xdl_free(map.entries); 368 return result; 369} 370 371int xdl_do_patience_diff(xpparam_t const *xpp, xdfenv_t *env) 372{ 373 return patience_diff(xpp, env, 1, env->xdf1.nrec, 1, env->xdf2.nrec); 374}