Git fork

Merge branch 'jk/fast-import-use-hashmap'

The custom hash function used by "git fast-import" has been
replaced with the one from hashmap.c, which gave us a nice
performance boost.

* jk/fast-import-use-hashmap:
fast-import: replace custom hash with hashmap.c

+59 -25
+36 -25
fast-import.c
··· 39 39 40 40 struct object_entry { 41 41 struct pack_idx_entry idx; 42 - struct object_entry *next; 42 + struct hashmap_entry ent; 43 43 uint32_t type : TYPE_BITS, 44 44 pack_id : PACK_ID_BITS, 45 45 depth : DEPTH_BITS; 46 46 }; 47 + 48 + static int object_entry_hashcmp(const void *map_data, 49 + const struct hashmap_entry *eptr, 50 + const struct hashmap_entry *entry_or_key, 51 + const void *keydata) 52 + { 53 + const struct object_id *oid = keydata; 54 + const struct object_entry *e1, *e2; 55 + 56 + e1 = container_of(eptr, const struct object_entry, ent); 57 + if (oid) 58 + return oidcmp(&e1->idx.oid, oid); 59 + 60 + e2 = container_of(entry_or_key, const struct object_entry, ent); 61 + return oidcmp(&e1->idx.oid, &e2->idx.oid); 62 + } 47 63 48 64 struct object_entry_pool { 49 65 struct object_entry_pool *next_pool; ··· 178 194 /* Table of objects we've written. */ 179 195 static unsigned int object_entry_alloc = 5000; 180 196 static struct object_entry_pool *blocks; 181 - static struct object_entry *object_table[1 << 16]; 197 + static struct hashmap object_table; 182 198 static struct mark_set *marks; 183 199 static const char *export_marks_file; 184 200 static const char *import_marks_file; ··· 455 471 456 472 static struct object_entry *find_object(struct object_id *oid) 457 473 { 458 - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; 459 - struct object_entry *e; 460 - for (e = object_table[h]; e; e = e->next) 461 - if (oideq(oid, &e->idx.oid)) 462 - return e; 463 - return NULL; 474 + return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid, 475 + struct object_entry, ent); 464 476 } 465 477 466 478 static struct object_entry *insert_object(struct object_id *oid) 467 479 { 468 - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; 469 - struct object_entry *e = object_table[h]; 480 + struct object_entry *e; 481 + unsigned int hash = oidhash(oid); 470 482 471 - while (e) { 472 - if (oideq(oid, &e->idx.oid)) 473 - return e; 474 - e = e->next; 483 + e = hashmap_get_entry_from_hash(&object_table, hash, oid, 484 + struct object_entry, ent); 485 + if (!e) { 486 + e = new_object(oid); 487 + e->idx.offset = 0; 488 + hashmap_entry_init(&e->ent, hash); 489 + hashmap_add(&object_table, &e->ent); 475 490 } 476 491 477 - e = new_object(oid); 478 - e->next = object_table[h]; 479 - e->idx.offset = 0; 480 - object_table[h] = e; 481 492 return e; 482 493 } 483 494 484 495 static void invalidate_pack_id(unsigned int id) 485 496 { 486 - unsigned int h; 487 497 unsigned long lu; 488 498 struct tag *t; 499 + struct hashmap_iter iter; 500 + struct object_entry *e; 489 501 490 - for (h = 0; h < ARRAY_SIZE(object_table); h++) { 491 - struct object_entry *e; 492 - 493 - for (e = object_table[h]; e; e = e->next) 494 - if (e->pack_id == id) 495 - e->pack_id = MAX_PACK_ID; 502 + hashmap_for_each_entry(&object_table, &iter, e, ent) { 503 + if (e->pack_id == id) 504 + e->pack_id = MAX_PACK_ID; 496 505 } 497 506 498 507 for (lu = 0; lu < branch_table_sz; lu++) { ··· 3510 3519 branch_table = xcalloc(branch_table_sz, sizeof(struct branch*)); 3511 3520 avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*)); 3512 3521 marks = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); 3522 + 3523 + hashmap_init(&object_table, object_entry_hashcmp, NULL, 0); 3513 3524 3514 3525 /* 3515 3526 * We don't parse most options until after we've seen the set of
+23
t/perf/p9300-fast-import-export.sh
··· 1 + #!/bin/sh 2 + 3 + test_description='test fast-import and fast-export performance' 4 + . ./perf-lib.sh 5 + 6 + test_perf_default_repo 7 + 8 + # Use --no-data here to produce a vastly smaller export file. 9 + # This is much cheaper to work with but should still exercise 10 + # fast-import pretty well (we'll still process all commits and 11 + # trees, which account for 60% or more of objects in most repos). 12 + # 13 + # Use --reencode to avoid the default of aborting on non-utf8 commits, 14 + # which lets this test run against a wider variety of sample repos. 15 + test_perf 'export (no-blobs)' ' 16 + git fast-export --reencode=yes --no-data HEAD >export 17 + ' 18 + 19 + test_perf 'import (no-blobs)' ' 20 + git fast-import --force <export 21 + ' 22 + 23 + test_done