Git fork

xdiff: delete chastore from xdfile_t

xdfile_t currently uses chastore_t which is an arena allocator. I
think that xrecord_t used to be a linked list and recs didn't exist
originally. When recs was added I think they forgot to remove
xdfile_t.next, but was overlooked. This dual data structure setup
makes the code somewhat confusing.

Additionally the C type chastore_t isn't FFI friendly, and provides
little to no performance benefit over using realloc to grow an array.

Performance impact of deleting fields from xdfile_t:
Deleting ha is about 5% slower.
Deleting cha is about 5% faster.

Delete ha, but keep cha
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.269 s ± 0.017 s [User: 1.135 s, System: 0.128 s]
Range (min … max): 1.249 s … 1.286 s 10 runs

Benchmark 2: build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.339 s ± 0.017 s [User: 1.234 s, System: 0.099 s]
Range (min … max): 1.320 s … 1.358 s 10 runs

Summary
build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.06 ± 0.02 times faster than build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Delete cha, but keep ha
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.290 s ± 0.001 s [User: 1.154 s, System: 0.130 s]
Range (min … max): 1.288 s … 1.292 s 10 runs

Benchmark 2: build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.232 s ± 0.017 s [User: 1.105 s, System: 0.121 s]
Range (min … max): 1.205 s … 1.249 s 10 runs

Summary
build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.05 ± 0.01 times faster than build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Delete ha AND chastore
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha_and_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.291 s ± 0.002 s [User: 1.156 s, System: 0.129 s]
Range (min … max): 1.287 s … 1.295 s 10 runs

Benchmark 2: build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.306 s ± 0.001 s [User: 1.195 s, System: 0.105 s]
Range (min … max): 1.305 s … 1.308 s 10 runs

Summary
build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.01 ± 0.00 times faster than build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Best-viewed-with: --color-words
Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Ezekiel Newren and committed by
Junio C Hamano
d43d5912 6d507bd4

+63 -69
+12 -12
xdiff/xdiffi.c
··· 24 24 25 25 static unsigned long get_hash(xdfile_t *xdf, long index) 26 26 { 27 - return xdf->recs[xdf->rindex[index]]->ha; 27 + return xdf->recs[xdf->rindex[index]].ha; 28 28 } 29 29 30 30 #define XDL_MAX_COST_MIN 256 ··· 489 489 m->indent = -1; 490 490 } else { 491 491 m->end_of_file = 0; 492 - m->indent = get_indent(xdf->recs[split]); 492 + m->indent = get_indent(&xdf->recs[split]); 493 493 } 494 494 495 495 m->pre_blank = 0; 496 496 m->pre_indent = -1; 497 497 for (i = split - 1; i >= 0; i--) { 498 - m->pre_indent = get_indent(xdf->recs[i]); 498 + m->pre_indent = get_indent(&xdf->recs[i]); 499 499 if (m->pre_indent != -1) 500 500 break; 501 501 m->pre_blank += 1; ··· 508 508 m->post_blank = 0; 509 509 m->post_indent = -1; 510 510 for (i = split + 1; i < xdf->nrec; i++) { 511 - m->post_indent = get_indent(xdf->recs[i]); 511 + m->post_indent = get_indent(&xdf->recs[i]); 512 512 if (m->post_indent != -1) 513 513 break; 514 514 m->post_blank += 1; ··· 752 752 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) 753 753 { 754 754 if (g->end < xdf->nrec && 755 - recs_match(xdf->recs[g->start], xdf->recs[g->end])) { 755 + recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { 756 756 xdf->rchg[g->start++] = 0; 757 757 xdf->rchg[g->end++] = 1; 758 758 ··· 773 773 static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) 774 774 { 775 775 if (g->start > 0 && 776 - recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) { 776 + recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) { 777 777 xdf->rchg[--g->start] = 1; 778 778 xdf->rchg[--g->end] = 0; 779 779 ··· 988 988 989 989 for (xch = xscr; xch; xch = xch->next) { 990 990 int ignore = 1; 991 - xrecord_t **rec; 991 + xrecord_t *rec; 992 992 long i; 993 993 994 994 rec = &xe->xdf1.recs[xch->i1]; 995 995 for (i = 0; i < xch->chg1 && ignore; i++) 996 - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); 996 + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); 997 997 998 998 rec = &xe->xdf2.recs[xch->i2]; 999 999 for (i = 0; i < xch->chg2 && ignore; i++) 1000 - ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); 1000 + ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); 1001 1001 1002 1002 xch->ignore = ignore; 1003 1003 } ··· 1021 1021 xdchange_t *xch; 1022 1022 1023 1023 for (xch = xscr; xch; xch = xch->next) { 1024 - xrecord_t **rec; 1024 + xrecord_t *rec; 1025 1025 int ignore = 1; 1026 1026 long i; 1027 1027 ··· 1033 1033 1034 1034 rec = &xe->xdf1.recs[xch->i1]; 1035 1035 for (i = 0; i < xch->chg1 && ignore; i++) 1036 - ignore = record_matches_regex(rec[i], xpp); 1036 + ignore = record_matches_regex(&rec[i], xpp); 1037 1037 1038 1038 rec = &xe->xdf2.recs[xch->i2]; 1039 1039 for (i = 0; i < xch->chg2 && ignore; i++) 1040 - ignore = record_matches_regex(rec[i], xpp); 1040 + ignore = record_matches_regex(&rec[i], xpp); 1041 1041 1042 1042 xch->ignore = ignore; 1043 1043 }
+3 -3
xdiff/xemit.c
··· 25 25 26 26 static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) 27 27 { 28 - xrecord_t *rec = xdf->recs[ri]; 28 + xrecord_t *rec = &xdf->recs[ri]; 29 29 30 30 if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) 31 31 return -1; ··· 110 110 static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, 111 111 char *buf, long sz) 112 112 { 113 - xrecord_t *rec = xdf->recs[ri]; 113 + xrecord_t *rec = &xdf->recs[ri]; 114 114 115 115 if (!xecfg->find_func) 116 116 return def_ff(rec->ptr, rec->size, buf, sz); ··· 150 150 151 151 static int is_empty_rec(xdfile_t *xdf, long ri) 152 152 { 153 - xrecord_t *rec = xdf->recs[ri]; 153 + xrecord_t *rec = &xdf->recs[ri]; 154 154 long i = 0; 155 155 156 156 for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);
+1 -1
xdiff/xhistogram.c
··· 86 86 ((LINE_MAP(index, ptr))->cnt) 87 87 88 88 #define REC(env, s, l) \ 89 - (env->xdf##s.recs[l - 1]) 89 + (&env->xdf##s.recs[l - 1]) 90 90 91 91 static int cmp_recs(xrecord_t *r1, xrecord_t *r2) 92 92 {
+28 -28
xdiff/xmerge.c
··· 97 97 int line_count, long flags) 98 98 { 99 99 int i; 100 - xrecord_t **rec1 = xe1->xdf2.recs + i1; 101 - xrecord_t **rec2 = xe2->xdf2.recs + i2; 100 + xrecord_t *rec1 = xe1->xdf2.recs + i1; 101 + xrecord_t *rec2 = xe2->xdf2.recs + i2; 102 102 103 103 for (i = 0; i < line_count; i++) { 104 - int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size, 105 - rec2[i]->ptr, rec2[i]->size, flags); 104 + int result = xdl_recmatch(rec1[i].ptr, rec1[i].size, 105 + rec2[i].ptr, rec2[i].size, flags); 106 106 if (!result) 107 107 return -1; 108 108 } ··· 111 111 112 112 static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) 113 113 { 114 - xrecord_t **recs; 114 + xrecord_t *recs; 115 115 int size = 0; 116 116 117 117 recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; ··· 119 119 if (count < 1) 120 120 return 0; 121 121 122 - for (i = 0; i < count; size += recs[i++]->size) 122 + for (i = 0; i < count; size += recs[i++].size) 123 123 if (dest) 124 - memcpy(dest + size, recs[i]->ptr, recs[i]->size); 124 + memcpy(dest + size, recs[i].ptr, recs[i].size); 125 125 if (add_nl) { 126 - i = recs[count - 1]->size; 127 - if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { 126 + i = recs[count - 1].size; 127 + if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') { 128 128 if (needs_cr) { 129 129 if (dest) 130 130 dest[size] = '\r'; ··· 160 160 161 161 if (i < file->nrec - 1) 162 162 /* All lines before the last *must* end in LF */ 163 - return (size = file->recs[i]->size) > 1 && 164 - file->recs[i]->ptr[size - 2] == '\r'; 163 + return (size = file->recs[i].size) > 1 && 164 + file->recs[i].ptr[size - 2] == '\r'; 165 165 if (!file->nrec) 166 166 /* Cannot determine eol style from empty file */ 167 167 return -1; 168 - if ((size = file->recs[i]->size) && 169 - file->recs[i]->ptr[size - 1] == '\n') 168 + if ((size = file->recs[i].size) && 169 + file->recs[i].ptr[size - 1] == '\n') 170 170 /* Last line; ends in LF; Is it CR/LF? */ 171 171 return size > 1 && 172 - file->recs[i]->ptr[size - 2] == '\r'; 172 + file->recs[i].ptr[size - 2] == '\r'; 173 173 if (!i) 174 174 /* The only line has no eol */ 175 175 return -1; 176 176 /* Determine eol from second-to-last line */ 177 - return (size = file->recs[i - 1]->size) > 1 && 178 - file->recs[i - 1]->ptr[size - 2] == '\r'; 177 + return (size = file->recs[i - 1].size) > 1 && 178 + file->recs[i - 1].ptr[size - 2] == '\r'; 179 179 } 180 180 181 181 static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m) ··· 334 334 static void xdl_refine_zdiff3_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, 335 335 xpparam_t const *xpp) 336 336 { 337 - xrecord_t **rec1 = xe1->xdf2.recs, **rec2 = xe2->xdf2.recs; 337 + xrecord_t *rec1 = xe1->xdf2.recs, *rec2 = xe2->xdf2.recs; 338 338 for (; m; m = m->next) { 339 339 /* let's handle just the conflicts */ 340 340 if (m->mode) 341 341 continue; 342 342 343 343 while(m->chg1 && m->chg2 && 344 - recmatch(rec1[m->i1], rec2[m->i2], xpp->flags)) { 344 + recmatch(&rec1[m->i1], &rec2[m->i2], xpp->flags)) { 345 345 m->chg1--; 346 346 m->chg2--; 347 347 m->i1++; 348 348 m->i2++; 349 349 } 350 350 while (m->chg1 && m->chg2 && 351 - recmatch(rec1[m->i1 + m->chg1 - 1], 352 - rec2[m->i2 + m->chg2 - 1], xpp->flags)) { 351 + recmatch(&rec1[m->i1 + m->chg1 - 1], 352 + &rec2[m->i2 + m->chg2 - 1], xpp->flags)) { 353 353 m->chg1--; 354 354 m->chg2--; 355 355 } ··· 381 381 * This probably does not work outside git, since 382 382 * we have a very simple mmfile structure. 383 383 */ 384 - t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr; 385 - t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr 386 - + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr; 387 - t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr; 388 - t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr 389 - + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr; 384 + t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr; 385 + t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr 386 + + xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr; 387 + t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr; 388 + t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr 389 + + xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr; 390 390 if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) 391 391 return -1; 392 392 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || ··· 440 440 static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) 441 441 { 442 442 for (; chg; chg--, i++) 443 - if (line_contains_alnum(xe->xdf2.recs[i]->ptr, 444 - xe->xdf2.recs[i]->size)) 443 + if (line_contains_alnum(xe->xdf2.recs[i].ptr, 444 + xe->xdf2.recs[i].size)) 445 445 return 1; 446 446 return 0; 447 447 }
+5 -5
xdiff/xpatience.c
··· 88 88 static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, 89 89 int pass) 90 90 { 91 - xrecord_t **records = pass == 1 ? 91 + xrecord_t *records = pass == 1 ? 92 92 map->env->xdf1.recs : map->env->xdf2.recs; 93 - xrecord_t *record = records[line - 1]; 93 + xrecord_t *record = &records[line - 1]; 94 94 /* 95 95 * After xdl_prepare_env() (or more precisely, due to 96 96 * xdl_classify_record()), the "ha" member of the records (AKA lines) ··· 121 121 return; 122 122 map->entries[index].line1 = line; 123 123 map->entries[index].hash = record->ha; 124 - map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr); 124 + map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); 125 125 if (!map->first) 126 126 map->first = map->entries + index; 127 127 if (map->last) { ··· 246 246 247 247 static int match(struct hashmap *map, int line1, int line2) 248 248 { 249 - xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; 250 - xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; 249 + xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; 250 + xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; 251 251 return record1->ha == record2->ha; 252 252 } 253 253
+7 -12
xdiff/xprepare.c
··· 128 128 xdl_free(xdf->rindex); 129 129 xdl_free(xdf->rchg - 1); 130 130 xdl_free(xdf->recs); 131 - xdl_cha_free(&xdf->rcha); 132 131 } 133 132 134 133 ··· 143 142 xdf->rchg = NULL; 144 143 xdf->recs = NULL; 145 144 146 - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) 147 - goto abort; 148 145 if (!XDL_ALLOC_ARRAY(xdf->recs, narec)) 149 146 goto abort; 150 147 ··· 155 152 hav = xdl_hash_record(&cur, top, xpp->flags); 156 153 if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) 157 154 goto abort; 158 - if (!(crec = xdl_cha_alloc(&xdf->rcha))) 159 - goto abort; 155 + crec = &xdf->recs[xdf->nrec++]; 160 156 crec->ptr = prev; 161 157 crec->size = (long) (cur - prev); 162 158 crec->ha = hav; 163 - xdf->recs[xdf->nrec++] = crec; 164 159 if (xdl_classify_record(pass, cf, crec) < 0) 165 160 goto abort; 166 161 } ··· 260 255 */ 261 256 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { 262 257 long i, nm, nreff, mlim; 263 - xrecord_t **recs; 258 + xrecord_t *recs; 264 259 xdlclass_t *rcrec; 265 260 char *dis, *dis1, *dis2; 266 261 int need_min = !!(cf->flags & XDF_NEED_MINIMAL); ··· 273 268 if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) 274 269 mlim = XDL_MAX_EQLIMIT; 275 270 for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { 276 - rcrec = cf->rcrecs[(*recs)->ha]; 271 + rcrec = cf->rcrecs[recs->ha]; 277 272 nm = rcrec ? rcrec->len2 : 0; 278 273 dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; 279 274 } ··· 281 276 if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) 282 277 mlim = XDL_MAX_EQLIMIT; 283 278 for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { 284 - rcrec = cf->rcrecs[(*recs)->ha]; 279 + rcrec = cf->rcrecs[recs->ha]; 285 280 nm = rcrec ? rcrec->len1 : 0; 286 281 dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; 287 282 } ··· 317 312 */ 318 313 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { 319 314 long i, lim; 320 - xrecord_t **recs1, **recs2; 315 + xrecord_t *recs1, *recs2; 321 316 322 317 recs1 = xdf1->recs; 323 318 recs2 = xdf2->recs; 324 319 for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; 325 320 i++, recs1++, recs2++) 326 - if ((*recs1)->ha != (*recs2)->ha) 321 + if (recs1->ha != recs2->ha) 327 322 break; 328 323 329 324 xdf1->dstart = xdf2->dstart = i; ··· 331 326 recs1 = xdf1->recs + xdf1->nrec - 1; 332 327 recs2 = xdf2->recs + xdf2->nrec - 1; 333 328 for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) 334 - if ((*recs1)->ha != (*recs2)->ha) 329 + if (recs1->ha != recs2->ha) 335 330 break; 336 331 337 332 xdf1->dend = xdf1->nrec - i - 1;
+1 -2
xdiff/xtypes.h
··· 45 45 } xrecord_t; 46 46 47 47 typedef struct s_xdfile { 48 - chastore_t rcha; 48 + xrecord_t *recs; 49 49 long nrec; 50 50 long dstart, dend; 51 - xrecord_t **recs; 52 51 char *rchg; 53 52 long *rindex; 54 53 long nreff;
+6 -6
xdiff/xutils.c
··· 416 416 mmfile_t subfile1, subfile2; 417 417 xdfenv_t env; 418 418 419 - subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; 420 - subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + 421 - diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; 422 - subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; 423 - subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + 424 - diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; 419 + subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr; 420 + subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr + 421 + diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr; 422 + subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr; 423 + subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr + 424 + diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr; 425 425 if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) 426 426 return -1; 427 427