Git fork
at reftables-rust 1786 lines 47 kB view raw
1#include "git-compat-util.h" 2#include "abspath.h" 3#include "config.h" 4#include "hex.h" 5#include "lockfile.h" 6#include "packfile.h" 7#include "object-file.h" 8#include "hash-lookup.h" 9#include "midx.h" 10#include "progress.h" 11#include "trace2.h" 12#include "run-command.h" 13#include "chunk-format.h" 14#include "pack-bitmap.h" 15#include "refs.h" 16#include "revision.h" 17#include "list-objects.h" 18#include "path.h" 19#include "pack-revindex.h" 20 21#define PACK_EXPIRED UINT_MAX 22#define BITMAP_POS_UNKNOWN (~((uint32_t)0)) 23#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) 24#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) 25#define NO_PREFERRED_PACK (~((uint32_t)0)) 26 27extern int midx_checksum_valid(struct multi_pack_index *m); 28extern void clear_midx_files_ext(struct odb_source *source, const char *ext, 29 const char *keep_hash); 30extern void clear_incremental_midx_files_ext(struct odb_source *source, 31 const char *ext, 32 const char **keep_hashes, 33 uint32_t hashes_nr); 34extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, 35 const char *idx_name); 36 37static size_t write_midx_header(const struct git_hash_algo *hash_algo, 38 struct hashfile *f, unsigned char num_chunks, 39 uint32_t num_packs) 40{ 41 hashwrite_be32(f, MIDX_SIGNATURE); 42 hashwrite_u8(f, MIDX_VERSION); 43 hashwrite_u8(f, oid_version(hash_algo)); 44 hashwrite_u8(f, num_chunks); 45 hashwrite_u8(f, 0); /* unused */ 46 hashwrite_be32(f, num_packs); 47 48 return MIDX_HEADER_SIZE; 49} 50 51struct pack_info { 52 uint32_t orig_pack_int_id; 53 char *pack_name; 54 struct packed_git *p; 55 56 uint32_t bitmap_pos; 57 uint32_t bitmap_nr; 58 59 unsigned expired : 1; 60}; 61 62static void fill_pack_info(struct pack_info *info, 63 struct packed_git *p, const char *pack_name, 64 uint32_t orig_pack_int_id) 65{ 66 memset(info, 0, sizeof(struct pack_info)); 67 68 info->orig_pack_int_id = orig_pack_int_id; 69 info->pack_name = xstrdup(pack_name); 70 info->p = p; 71 info->bitmap_pos = BITMAP_POS_UNKNOWN; 72} 73 74static int pack_info_compare(const void *_a, const void *_b) 75{ 76 struct pack_info *a = (struct pack_info *)_a; 77 struct pack_info *b = (struct pack_info *)_b; 78 return strcmp(a->pack_name, b->pack_name); 79} 80 81static int idx_or_pack_name_cmp(const void *_va, const void *_vb) 82{ 83 const char *pack_name = _va; 84 const struct pack_info *compar = _vb; 85 86 return cmp_idx_or_pack_name(pack_name, compar->pack_name); 87} 88 89struct write_midx_context { 90 struct pack_info *info; 91 size_t nr; 92 size_t alloc; 93 struct multi_pack_index *m; 94 struct multi_pack_index *base_midx; 95 struct progress *progress; 96 unsigned pack_paths_checked; 97 98 struct pack_midx_entry *entries; 99 size_t entries_nr; 100 101 uint32_t *pack_perm; 102 uint32_t *pack_order; 103 unsigned large_offsets_needed:1; 104 uint32_t num_large_offsets; 105 106 uint32_t preferred_pack_idx; 107 108 int incremental; 109 uint32_t num_multi_pack_indexes_before; 110 111 struct string_list *to_include; 112 113 struct repository *repo; 114 struct odb_source *source; 115}; 116 117static int should_include_pack(const struct write_midx_context *ctx, 118 const char *file_name) 119{ 120 /* 121 * Note that at most one of ctx->m and ctx->to_include are set, 122 * so we are testing midx_contains_pack() and 123 * string_list_has_string() independently (guarded by the 124 * appropriate NULL checks). 125 * 126 * We could support passing to_include while reusing an existing 127 * MIDX, but don't currently since the reuse process drags 128 * forward all packs from an existing MIDX (without checking 129 * whether or not they appear in the to_include list). 130 * 131 * If we added support for that, these next two conditional 132 * should be performed independently (likely checking 133 * to_include before the existing MIDX). 134 */ 135 if (ctx->m && midx_contains_pack(ctx->m, file_name)) 136 return 0; 137 else if (ctx->base_midx && midx_contains_pack(ctx->base_midx, 138 file_name)) 139 return 0; 140 else if (ctx->to_include && 141 !string_list_has_string(ctx->to_include, file_name)) 142 return 0; 143 return 1; 144} 145 146static void add_pack_to_midx(const char *full_path, size_t full_path_len, 147 const char *file_name, void *data) 148{ 149 struct write_midx_context *ctx = data; 150 struct packed_git *p; 151 152 if (ends_with(file_name, ".idx")) { 153 display_progress(ctx->progress, ++ctx->pack_paths_checked); 154 155 if (!should_include_pack(ctx, file_name)) 156 return; 157 158 ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); 159 p = add_packed_git(ctx->repo, full_path, full_path_len, 0); 160 if (!p) { 161 warning(_("failed to add packfile '%s'"), 162 full_path); 163 return; 164 } 165 166 if (open_pack_index(p)) { 167 warning(_("failed to open pack-index '%s'"), 168 full_path); 169 close_pack(p); 170 free(p); 171 return; 172 } 173 174 fill_pack_info(&ctx->info[ctx->nr], p, file_name, ctx->nr); 175 ctx->nr++; 176 } 177} 178 179struct pack_midx_entry { 180 struct object_id oid; 181 uint32_t pack_int_id; 182 time_t pack_mtime; 183 uint64_t offset; 184 unsigned preferred : 1; 185}; 186 187static int midx_oid_compare(const void *_a, const void *_b) 188{ 189 const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a; 190 const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b; 191 int cmp = oidcmp(&a->oid, &b->oid); 192 193 if (cmp) 194 return cmp; 195 196 /* Sort objects in a preferred pack first when multiple copies exist. */ 197 if (a->preferred > b->preferred) 198 return -1; 199 if (a->preferred < b->preferred) 200 return 1; 201 202 if (a->pack_mtime > b->pack_mtime) 203 return -1; 204 else if (a->pack_mtime < b->pack_mtime) 205 return 1; 206 207 return a->pack_int_id - b->pack_int_id; 208} 209 210static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, 211 struct pack_midx_entry *e, 212 uint32_t pos) 213{ 214 if (pos >= m->num_objects + m->num_objects_in_base) 215 return 1; 216 217 nth_midxed_object_oid(&e->oid, m, pos); 218 e->pack_int_id = nth_midxed_pack_int_id(m, pos); 219 e->offset = nth_midxed_offset(m, pos); 220 221 /* consider objects in midx to be from "old" packs */ 222 e->pack_mtime = 0; 223 return 0; 224} 225 226static void fill_pack_entry(uint32_t pack_int_id, 227 struct packed_git *p, 228 uint32_t cur_object, 229 struct pack_midx_entry *entry, 230 int preferred) 231{ 232 if (nth_packed_object_id(&entry->oid, p, cur_object) < 0) 233 die(_("failed to locate object %d in packfile"), cur_object); 234 235 entry->pack_int_id = pack_int_id; 236 entry->pack_mtime = p->mtime; 237 238 entry->offset = nth_packed_object_offset(p, cur_object); 239 entry->preferred = !!preferred; 240} 241 242struct midx_fanout { 243 struct pack_midx_entry *entries; 244 size_t nr, alloc; 245}; 246 247static void midx_fanout_grow(struct midx_fanout *fanout, size_t nr) 248{ 249 if (nr < fanout->nr) 250 BUG("negative growth in midx_fanout_grow() (%"PRIuMAX" < %"PRIuMAX")", 251 (uintmax_t)nr, (uintmax_t)fanout->nr); 252 ALLOC_GROW(fanout->entries, nr, fanout->alloc); 253} 254 255static void midx_fanout_sort(struct midx_fanout *fanout) 256{ 257 QSORT(fanout->entries, fanout->nr, midx_oid_compare); 258} 259 260static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, 261 struct multi_pack_index *m, 262 uint32_t cur_fanout, 263 uint32_t preferred_pack) 264{ 265 uint32_t start = m->num_objects_in_base, end; 266 uint32_t cur_object; 267 268 if (m->base_midx) 269 midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout, 270 preferred_pack); 271 272 if (cur_fanout) 273 start += ntohl(m->chunk_oid_fanout[cur_fanout - 1]); 274 end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]); 275 276 for (cur_object = start; cur_object < end; cur_object++) { 277 if ((preferred_pack != NO_PREFERRED_PACK) && 278 (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) { 279 /* 280 * Objects from preferred packs are added 281 * separately. 282 */ 283 continue; 284 } 285 286 midx_fanout_grow(fanout, fanout->nr + 1); 287 nth_midxed_pack_midx_entry(m, 288 &fanout->entries[fanout->nr], 289 cur_object); 290 fanout->entries[fanout->nr].preferred = 0; 291 fanout->nr++; 292 } 293} 294 295static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, 296 struct pack_info *info, 297 uint32_t cur_pack, 298 int preferred, 299 uint32_t cur_fanout) 300{ 301 struct packed_git *pack = info[cur_pack].p; 302 uint32_t start = 0, end; 303 uint32_t cur_object; 304 305 if (cur_fanout) 306 start = get_pack_fanout(pack, cur_fanout - 1); 307 end = get_pack_fanout(pack, cur_fanout); 308 309 for (cur_object = start; cur_object < end; cur_object++) { 310 midx_fanout_grow(fanout, fanout->nr + 1); 311 fill_pack_entry(cur_pack, 312 info[cur_pack].p, 313 cur_object, 314 &fanout->entries[fanout->nr], 315 preferred); 316 fanout->nr++; 317 } 318} 319 320/* 321 * It is possible to artificially get into a state where there are many 322 * duplicate copies of objects. That can create high memory pressure if 323 * we are to create a list of all objects before de-duplication. To reduce 324 * this memory pressure without a significant performance drop, automatically 325 * group objects by the first byte of their object id. Use the IDX fanout 326 * tables to group the data, copy to a local array, then sort. 327 * 328 * Copy only the de-duplicated entries (selected by most-recent modified time 329 * of a packfile containing the object). 330 */ 331static void compute_sorted_entries(struct write_midx_context *ctx, 332 uint32_t start_pack) 333{ 334 uint32_t cur_fanout, cur_pack, cur_object; 335 size_t alloc_objects, total_objects = 0; 336 struct midx_fanout fanout = { 0 }; 337 338 for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) 339 total_objects = st_add(total_objects, 340 ctx->info[cur_pack].p->num_objects); 341 342 /* 343 * As we de-duplicate by fanout value, we expect the fanout 344 * slices to be evenly distributed, with some noise. Hence, 345 * allocate slightly more than one 256th. 346 */ 347 alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16; 348 349 ALLOC_ARRAY(fanout.entries, fanout.alloc); 350 ALLOC_ARRAY(ctx->entries, alloc_objects); 351 ctx->entries_nr = 0; 352 353 for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { 354 fanout.nr = 0; 355 356 if (ctx->m && !ctx->incremental) 357 midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, 358 ctx->preferred_pack_idx); 359 360 for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { 361 int preferred = cur_pack == ctx->preferred_pack_idx; 362 midx_fanout_add_pack_fanout(&fanout, 363 ctx->info, cur_pack, 364 preferred, cur_fanout); 365 } 366 367 if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && 368 ctx->preferred_pack_idx < start_pack) 369 midx_fanout_add_pack_fanout(&fanout, ctx->info, 370 ctx->preferred_pack_idx, 1, 371 cur_fanout); 372 373 midx_fanout_sort(&fanout); 374 375 /* 376 * The batch is now sorted by OID and then mtime (descending). 377 * Take only the first duplicate. 378 */ 379 for (cur_object = 0; cur_object < fanout.nr; cur_object++) { 380 if (cur_object && oideq(&fanout.entries[cur_object - 1].oid, 381 &fanout.entries[cur_object].oid)) 382 continue; 383 if (ctx->incremental && ctx->base_midx && 384 midx_has_oid(ctx->base_midx, 385 &fanout.entries[cur_object].oid)) 386 continue; 387 388 ALLOC_GROW(ctx->entries, st_add(ctx->entries_nr, 1), 389 alloc_objects); 390 memcpy(&ctx->entries[ctx->entries_nr], 391 &fanout.entries[cur_object], 392 sizeof(struct pack_midx_entry)); 393 ctx->entries_nr++; 394 } 395 } 396 397 free(fanout.entries); 398} 399 400static int write_midx_pack_names(struct hashfile *f, void *data) 401{ 402 struct write_midx_context *ctx = data; 403 uint32_t i; 404 unsigned char padding[MIDX_CHUNK_ALIGNMENT]; 405 size_t written = 0; 406 407 for (i = 0; i < ctx->nr; i++) { 408 size_t writelen; 409 410 if (ctx->info[i].expired) 411 continue; 412 413 if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0) 414 BUG("incorrect pack-file order: %s before %s", 415 ctx->info[i - 1].pack_name, 416 ctx->info[i].pack_name); 417 418 writelen = strlen(ctx->info[i].pack_name) + 1; 419 hashwrite(f, ctx->info[i].pack_name, writelen); 420 written += writelen; 421 } 422 423 /* add padding to be aligned */ 424 i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT); 425 if (i < MIDX_CHUNK_ALIGNMENT) { 426 memset(padding, 0, sizeof(padding)); 427 hashwrite(f, padding, i); 428 } 429 430 return 0; 431} 432 433static int write_midx_bitmapped_packs(struct hashfile *f, void *data) 434{ 435 struct write_midx_context *ctx = data; 436 size_t i; 437 438 for (i = 0; i < ctx->nr; i++) { 439 struct pack_info *pack = &ctx->info[i]; 440 if (pack->expired) 441 continue; 442 443 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN && pack->bitmap_nr) 444 BUG("pack '%s' has no bitmap position, but has %d bitmapped object(s)", 445 pack->pack_name, pack->bitmap_nr); 446 447 hashwrite_be32(f, pack->bitmap_pos); 448 hashwrite_be32(f, pack->bitmap_nr); 449 } 450 return 0; 451} 452 453static int write_midx_oid_fanout(struct hashfile *f, 454 void *data) 455{ 456 struct write_midx_context *ctx = data; 457 struct pack_midx_entry *list = ctx->entries; 458 struct pack_midx_entry *last = ctx->entries + ctx->entries_nr; 459 uint32_t count = 0; 460 uint32_t i; 461 462 /* 463 * Write the first-level table (the list is sorted, 464 * but we use a 256-entry lookup to be able to avoid 465 * having to do eight extra binary search iterations). 466 */ 467 for (i = 0; i < 256; i++) { 468 struct pack_midx_entry *next = list; 469 470 while (next < last && next->oid.hash[0] == i) { 471 count++; 472 next++; 473 } 474 475 hashwrite_be32(f, count); 476 list = next; 477 } 478 479 return 0; 480} 481 482static int write_midx_oid_lookup(struct hashfile *f, 483 void *data) 484{ 485 struct write_midx_context *ctx = data; 486 unsigned char hash_len = ctx->repo->hash_algo->rawsz; 487 struct pack_midx_entry *list = ctx->entries; 488 uint32_t i; 489 490 for (i = 0; i < ctx->entries_nr; i++) { 491 struct pack_midx_entry *obj = list++; 492 493 if (i < ctx->entries_nr - 1) { 494 struct pack_midx_entry *next = list; 495 if (oidcmp(&obj->oid, &next->oid) >= 0) 496 BUG("OIDs not in order: %s >= %s", 497 oid_to_hex(&obj->oid), 498 oid_to_hex(&next->oid)); 499 } 500 501 hashwrite(f, obj->oid.hash, (int)hash_len); 502 } 503 504 return 0; 505} 506 507static int write_midx_object_offsets(struct hashfile *f, 508 void *data) 509{ 510 struct write_midx_context *ctx = data; 511 struct pack_midx_entry *list = ctx->entries; 512 uint32_t i, nr_large_offset = 0; 513 514 for (i = 0; i < ctx->entries_nr; i++) { 515 struct pack_midx_entry *obj = list++; 516 517 if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) 518 BUG("object %s is in an expired pack with int-id %d", 519 oid_to_hex(&obj->oid), 520 obj->pack_int_id); 521 522 hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); 523 524 if (ctx->large_offsets_needed && obj->offset >> 31) 525 hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); 526 else if (!ctx->large_offsets_needed && obj->offset >> 32) 527 BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!", 528 oid_to_hex(&obj->oid), 529 obj->offset); 530 else 531 hashwrite_be32(f, (uint32_t)obj->offset); 532 } 533 534 return 0; 535} 536 537static int write_midx_large_offsets(struct hashfile *f, 538 void *data) 539{ 540 struct write_midx_context *ctx = data; 541 struct pack_midx_entry *list = ctx->entries; 542 struct pack_midx_entry *end = ctx->entries + ctx->entries_nr; 543 uint32_t nr_large_offset = ctx->num_large_offsets; 544 545 while (nr_large_offset) { 546 struct pack_midx_entry *obj; 547 uint64_t offset; 548 549 if (list >= end) 550 BUG("too many large-offset objects"); 551 552 obj = list++; 553 offset = obj->offset; 554 555 if (!(offset >> 31)) 556 continue; 557 558 hashwrite_be64(f, offset); 559 560 nr_large_offset--; 561 } 562 563 return 0; 564} 565 566static int write_midx_revindex(struct hashfile *f, 567 void *data) 568{ 569 struct write_midx_context *ctx = data; 570 uint32_t i, nr_base; 571 572 if (ctx->incremental && ctx->base_midx) 573 nr_base = ctx->base_midx->num_objects + 574 ctx->base_midx->num_objects_in_base; 575 else 576 nr_base = 0; 577 578 for (i = 0; i < ctx->entries_nr; i++) 579 hashwrite_be32(f, ctx->pack_order[i] + nr_base); 580 581 return 0; 582} 583 584struct midx_pack_order_data { 585 uint32_t nr; 586 uint32_t pack; 587 off_t offset; 588}; 589 590static int midx_pack_order_cmp(const void *va, const void *vb) 591{ 592 const struct midx_pack_order_data *a = va, *b = vb; 593 if (a->pack < b->pack) 594 return -1; 595 else if (a->pack > b->pack) 596 return 1; 597 else if (a->offset < b->offset) 598 return -1; 599 else if (a->offset > b->offset) 600 return 1; 601 else 602 return 0; 603} 604 605static uint32_t *midx_pack_order(struct write_midx_context *ctx) 606{ 607 struct midx_pack_order_data *data; 608 uint32_t *pack_order, base_objects = 0; 609 uint32_t i; 610 611 trace2_region_enter("midx", "midx_pack_order", ctx->repo); 612 613 if (ctx->incremental && ctx->base_midx) 614 base_objects = ctx->base_midx->num_objects + 615 ctx->base_midx->num_objects_in_base; 616 617 ALLOC_ARRAY(pack_order, ctx->entries_nr); 618 ALLOC_ARRAY(data, ctx->entries_nr); 619 620 for (i = 0; i < ctx->entries_nr; i++) { 621 struct pack_midx_entry *e = &ctx->entries[i]; 622 data[i].nr = i; 623 data[i].pack = ctx->pack_perm[e->pack_int_id]; 624 if (!e->preferred) 625 data[i].pack |= (1U << 31); 626 data[i].offset = e->offset; 627 } 628 629 QSORT(data, ctx->entries_nr, midx_pack_order_cmp); 630 631 for (i = 0; i < ctx->entries_nr; i++) { 632 struct pack_midx_entry *e = &ctx->entries[data[i].nr]; 633 struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; 634 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) 635 pack->bitmap_pos = i + base_objects; 636 pack->bitmap_nr++; 637 pack_order[i] = data[i].nr; 638 } 639 for (i = 0; i < ctx->nr; i++) { 640 struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; 641 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) 642 pack->bitmap_pos = 0; 643 } 644 free(data); 645 646 trace2_region_leave("midx", "midx_pack_order", ctx->repo); 647 648 return pack_order; 649} 650 651static void write_midx_reverse_index(struct write_midx_context *ctx, 652 unsigned char *midx_hash) 653{ 654 struct strbuf buf = STRBUF_INIT; 655 char *tmp_file; 656 657 trace2_region_enter("midx", "write_midx_reverse_index", ctx->repo); 658 659 if (ctx->incremental) 660 get_split_midx_filename_ext(ctx->source, &buf, 661 midx_hash, MIDX_EXT_REV); 662 else 663 get_midx_filename_ext(ctx->source, &buf, 664 midx_hash, MIDX_EXT_REV); 665 666 tmp_file = write_rev_file_order(ctx->repo, NULL, ctx->pack_order, 667 ctx->entries_nr, midx_hash, WRITE_REV); 668 669 if (finalize_object_file(ctx->repo, tmp_file, buf.buf)) 670 die(_("cannot store reverse index file")); 671 672 strbuf_release(&buf); 673 free(tmp_file); 674 675 trace2_region_leave("midx", "write_midx_reverse_index", ctx->repo); 676} 677 678static void prepare_midx_packing_data(struct packing_data *pdata, 679 struct write_midx_context *ctx) 680{ 681 uint32_t i; 682 683 trace2_region_enter("midx", "prepare_midx_packing_data", ctx->repo); 684 685 memset(pdata, 0, sizeof(struct packing_data)); 686 prepare_packing_data(ctx->repo, pdata); 687 688 for (i = 0; i < ctx->entries_nr; i++) { 689 uint32_t pos = ctx->pack_order[i]; 690 struct pack_midx_entry *from = &ctx->entries[pos]; 691 struct object_entry *to = packlist_alloc(pdata, &from->oid); 692 693 oe_set_in_pack(pdata, to, 694 ctx->info[ctx->pack_perm[from->pack_int_id]].p); 695 } 696 697 trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); 698} 699 700static int add_ref_to_pending(const char *refname, const char *referent UNUSED, 701 const struct object_id *oid, 702 int flag, void *cb_data) 703{ 704 struct rev_info *revs = (struct rev_info*)cb_data; 705 struct object_id peeled; 706 struct object *object; 707 708 if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { 709 warning("symbolic ref is dangling: %s", refname); 710 return 0; 711 } 712 713 if (!peel_iterated_oid(revs->repo, oid, &peeled)) 714 oid = &peeled; 715 716 object = parse_object_or_die(revs->repo, oid, refname); 717 if (object->type != OBJ_COMMIT) 718 return 0; 719 720 add_pending_object(revs, object, ""); 721 if (bitmap_is_preferred_refname(revs->repo, refname)) 722 object->flags |= NEEDS_BITMAP; 723 return 0; 724} 725 726struct bitmap_commit_cb { 727 struct commit **commits; 728 size_t commits_nr, commits_alloc; 729 730 struct write_midx_context *ctx; 731}; 732 733static const struct object_id *bitmap_oid_access(size_t index, 734 const void *_entries) 735{ 736 const struct pack_midx_entry *entries = _entries; 737 return &entries[index].oid; 738} 739 740static void bitmap_show_commit(struct commit *commit, void *_data) 741{ 742 struct bitmap_commit_cb *data = _data; 743 int pos = oid_pos(&commit->object.oid, data->ctx->entries, 744 data->ctx->entries_nr, 745 bitmap_oid_access); 746 if (pos < 0) 747 return; 748 749 ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc); 750 data->commits[data->commits_nr++] = commit; 751} 752 753static int read_refs_snapshot(const char *refs_snapshot, 754 struct rev_info *revs) 755{ 756 struct strbuf buf = STRBUF_INIT; 757 struct object_id oid; 758 FILE *f = xfopen(refs_snapshot, "r"); 759 760 while (strbuf_getline(&buf, f) != EOF) { 761 struct object *object; 762 int preferred = 0; 763 char *hex = buf.buf; 764 const char *end = NULL; 765 766 if (buf.len && *buf.buf == '+') { 767 preferred = 1; 768 hex = &buf.buf[1]; 769 } 770 771 if (parse_oid_hex_algop(hex, &oid, &end, revs->repo->hash_algo) < 0) 772 die(_("could not parse line: %s"), buf.buf); 773 if (*end) 774 die(_("malformed line: %s"), buf.buf); 775 776 object = parse_object_or_die(revs->repo, &oid, NULL); 777 if (preferred) 778 object->flags |= NEEDS_BITMAP; 779 780 add_pending_object(revs, object, ""); 781 } 782 783 fclose(f); 784 strbuf_release(&buf); 785 return 0; 786} 787 788static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p, 789 const char *refs_snapshot, 790 struct write_midx_context *ctx) 791{ 792 struct rev_info revs; 793 struct bitmap_commit_cb cb = {0}; 794 795 trace2_region_enter("midx", "find_commits_for_midx_bitmap", ctx->repo); 796 797 cb.ctx = ctx; 798 799 repo_init_revisions(ctx->repo, &revs, NULL); 800 if (refs_snapshot) { 801 read_refs_snapshot(refs_snapshot, &revs); 802 } else { 803 setup_revisions(0, NULL, &revs, NULL); 804 refs_for_each_ref(get_main_ref_store(ctx->repo), 805 add_ref_to_pending, &revs); 806 } 807 808 /* 809 * Skipping promisor objects here is intentional, since it only excludes 810 * them from the list of reachable commits that we want to select from 811 * when computing the selection of MIDX'd commits to receive bitmaps. 812 * 813 * Reachability bitmaps do require that their objects be closed under 814 * reachability, but fetching any objects missing from promisors at this 815 * point is too late. But, if one of those objects can be reached from 816 * an another object that is included in the bitmap, then we will 817 * complain later that we don't have reachability closure (and fail 818 * appropriately). 819 */ 820 fetch_if_missing = 0; 821 revs.exclude_promisor_objects = 1; 822 823 if (prepare_revision_walk(&revs)) 824 die(_("revision walk setup failed")); 825 826 traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb); 827 if (indexed_commits_nr_p) 828 *indexed_commits_nr_p = cb.commits_nr; 829 830 release_revisions(&revs); 831 832 trace2_region_leave("midx", "find_commits_for_midx_bitmap", ctx->repo); 833 834 return cb.commits; 835} 836 837static int write_midx_bitmap(struct write_midx_context *ctx, 838 const unsigned char *midx_hash, 839 struct packing_data *pdata, 840 struct commit **commits, 841 uint32_t commits_nr, 842 unsigned flags) 843{ 844 int ret; 845 uint16_t options = 0; 846 struct bitmap_writer writer; 847 struct pack_idx_entry **index; 848 struct strbuf bitmap_name = STRBUF_INIT; 849 850 trace2_region_enter("midx", "write_midx_bitmap", ctx->repo); 851 852 if (ctx->incremental) 853 get_split_midx_filename_ext(ctx->source, &bitmap_name, 854 midx_hash, MIDX_EXT_BITMAP); 855 else 856 get_midx_filename_ext(ctx->source, &bitmap_name, 857 midx_hash, MIDX_EXT_BITMAP); 858 859 if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) 860 options |= BITMAP_OPT_HASH_CACHE; 861 862 if (flags & MIDX_WRITE_BITMAP_LOOKUP_TABLE) 863 options |= BITMAP_OPT_LOOKUP_TABLE; 864 865 /* 866 * Build the MIDX-order index based on pdata.objects (which is already 867 * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of 868 * this order). 869 */ 870 ALLOC_ARRAY(index, pdata->nr_objects); 871 for (uint32_t i = 0; i < pdata->nr_objects; i++) 872 index[i] = &pdata->objects[i].idx; 873 874 bitmap_writer_init(&writer, ctx->repo, pdata, 875 ctx->incremental ? ctx->base_midx : NULL); 876 bitmap_writer_show_progress(&writer, flags & MIDX_PROGRESS); 877 bitmap_writer_build_type_index(&writer, index); 878 879 /* 880 * bitmap_writer_finish expects objects in lex order, but pack_order 881 * gives us exactly that. use it directly instead of re-sorting the 882 * array. 883 * 884 * This changes the order of objects in 'index' between 885 * bitmap_writer_build_type_index and bitmap_writer_finish. 886 * 887 * The same re-ordering takes place in the single-pack bitmap code via 888 * write_idx_file(), which is called by finish_tmp_packfile(), which 889 * happens between bitmap_writer_build_type_index() and 890 * bitmap_writer_finish(). 891 */ 892 for (uint32_t i = 0; i < pdata->nr_objects; i++) 893 index[ctx->pack_order[i]] = &pdata->objects[i].idx; 894 895 bitmap_writer_select_commits(&writer, commits, commits_nr); 896 ret = bitmap_writer_build(&writer); 897 if (ret < 0) 898 goto cleanup; 899 900 bitmap_writer_set_checksum(&writer, midx_hash); 901 bitmap_writer_finish(&writer, index, bitmap_name.buf, options); 902 903cleanup: 904 free(index); 905 strbuf_release(&bitmap_name); 906 bitmap_writer_free(&writer); 907 908 trace2_region_leave("midx", "write_midx_bitmap", ctx->repo); 909 910 return ret; 911} 912 913static int fill_packs_from_midx(struct write_midx_context *ctx) 914{ 915 struct multi_pack_index *m; 916 917 for (m = ctx->m; m; m = m->base_midx) { 918 uint32_t i; 919 920 for (i = 0; i < m->num_packs; i++) { 921 if (prepare_midx_pack(m, m->num_packs_in_base + i)) 922 return error(_("could not load pack")); 923 924 ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); 925 fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], 926 m->pack_names[i], 927 m->num_packs_in_base + i); 928 } 929 } 930 return 0; 931} 932 933static struct { 934 const char *non_split; 935 const char *split; 936} midx_exts[] = { 937 {NULL, MIDX_EXT_MIDX}, 938 {MIDX_EXT_BITMAP, MIDX_EXT_BITMAP}, 939 {MIDX_EXT_REV, MIDX_EXT_REV}, 940}; 941 942static int link_midx_to_chain(struct multi_pack_index *m) 943{ 944 struct strbuf from = STRBUF_INIT; 945 struct strbuf to = STRBUF_INIT; 946 int ret = 0; 947 size_t i; 948 949 if (!m || m->has_chain) { 950 /* 951 * Either no MIDX previously existed, or it was already 952 * part of a MIDX chain. In both cases, we have nothing 953 * to link, so return early. 954 */ 955 goto done; 956 } 957 958 for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { 959 const unsigned char *hash = get_midx_checksum(m); 960 961 get_midx_filename_ext(m->source, &from, 962 hash, midx_exts[i].non_split); 963 get_split_midx_filename_ext(m->source, &to, hash, 964 midx_exts[i].split); 965 966 if (link(from.buf, to.buf) < 0 && errno != ENOENT) { 967 ret = error_errno(_("unable to link '%s' to '%s'"), 968 from.buf, to.buf); 969 goto done; 970 } 971 972 strbuf_reset(&from); 973 strbuf_reset(&to); 974 } 975 976done: 977 strbuf_release(&from); 978 strbuf_release(&to); 979 return ret; 980} 981 982static void clear_midx_files(struct odb_source *source, 983 const char **hashes, uint32_t hashes_nr, 984 unsigned incremental) 985{ 986 /* 987 * if incremental: 988 * - remove all non-incremental MIDX files 989 * - remove any incremental MIDX files not in the current one 990 * 991 * if non-incremental: 992 * - remove all incremental MIDX files 993 * - remove any non-incremental MIDX files not matching the current 994 * hash 995 */ 996 struct strbuf buf = STRBUF_INIT; 997 const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX }; 998 uint32_t i, j; 999 1000 for (i = 0; i < ARRAY_SIZE(exts); i++) { 1001 clear_incremental_midx_files_ext(source, exts[i], 1002 hashes, hashes_nr); 1003 for (j = 0; j < hashes_nr; j++) 1004 clear_midx_files_ext(source, exts[i], hashes[j]); 1005 } 1006 1007 if (incremental) 1008 get_midx_filename(source, &buf); 1009 else 1010 get_midx_chain_filename(source, &buf); 1011 1012 if (unlink(buf.buf) && errno != ENOENT) 1013 die_errno(_("failed to clear multi-pack-index at %s"), buf.buf); 1014 1015 strbuf_release(&buf); 1016} 1017 1018static int write_midx_internal(struct odb_source *source, 1019 struct string_list *packs_to_include, 1020 struct string_list *packs_to_drop, 1021 const char *preferred_pack_name, 1022 const char *refs_snapshot, 1023 unsigned flags) 1024{ 1025 struct repository *r = source->odb->repo; 1026 struct strbuf midx_name = STRBUF_INIT; 1027 unsigned char midx_hash[GIT_MAX_RAWSZ]; 1028 uint32_t start_pack; 1029 struct hashfile *f = NULL; 1030 struct lock_file lk; 1031 struct tempfile *incr; 1032 struct write_midx_context ctx = { 1033 .preferred_pack_idx = NO_PREFERRED_PACK, 1034 }; 1035 int bitmapped_packs_concat_len = 0; 1036 int pack_name_concat_len = 0; 1037 int dropped_packs = 0; 1038 int result = -1; 1039 const char **keep_hashes = NULL; 1040 struct chunkfile *cf; 1041 1042 trace2_region_enter("midx", "write_midx_internal", r); 1043 1044 ctx.repo = r; 1045 ctx.source = source; 1046 1047 ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); 1048 1049 if (ctx.incremental) 1050 strbuf_addf(&midx_name, 1051 "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", 1052 source->path); 1053 else 1054 get_midx_filename(source, &midx_name); 1055 if (safe_create_leading_directories(r, midx_name.buf)) 1056 die_errno(_("unable to create leading directories of %s"), 1057 midx_name.buf); 1058 1059 if (!packs_to_include || ctx.incremental) { 1060 struct multi_pack_index *m = get_multi_pack_index(source); 1061 if (m && !midx_checksum_valid(m)) { 1062 warning(_("ignoring existing multi-pack-index; checksum mismatch")); 1063 m = NULL; 1064 } 1065 1066 if (m) { 1067 /* 1068 * Only reference an existing MIDX when not filtering 1069 * which packs to include, since all packs and objects 1070 * are copied blindly from an existing MIDX if one is 1071 * present. 1072 */ 1073 if (ctx.incremental) 1074 ctx.base_midx = m; 1075 else if (!packs_to_include) 1076 ctx.m = m; 1077 } 1078 } 1079 1080 ctx.nr = 0; 1081 ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; 1082 ctx.info = NULL; 1083 ALLOC_ARRAY(ctx.info, ctx.alloc); 1084 1085 if (ctx.incremental) { 1086 struct multi_pack_index *m = ctx.base_midx; 1087 while (m) { 1088 if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { 1089 error(_("could not load reverse index for MIDX %s"), 1090 hash_to_hex_algop(get_midx_checksum(m), 1091 m->source->odb->repo->hash_algo)); 1092 goto cleanup; 1093 } 1094 ctx.num_multi_pack_indexes_before++; 1095 m = m->base_midx; 1096 } 1097 } else if (ctx.m && fill_packs_from_midx(&ctx)) { 1098 goto cleanup; 1099 } 1100 1101 start_pack = ctx.nr; 1102 1103 ctx.pack_paths_checked = 0; 1104 if (flags & MIDX_PROGRESS) 1105 ctx.progress = start_delayed_progress(r, 1106 _("Adding packfiles to multi-pack-index"), 0); 1107 else 1108 ctx.progress = NULL; 1109 1110 ctx.to_include = packs_to_include; 1111 1112 for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); 1113 stop_progress(&ctx.progress); 1114 1115 if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) && 1116 !ctx.incremental && 1117 !(packs_to_include || packs_to_drop)) { 1118 struct bitmap_index *bitmap_git; 1119 int bitmap_exists; 1120 int want_bitmap = flags & MIDX_WRITE_BITMAP; 1121 1122 bitmap_git = prepare_midx_bitmap_git(ctx.m); 1123 bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); 1124 free_bitmap_index(bitmap_git); 1125 1126 if (bitmap_exists || !want_bitmap) { 1127 /* 1128 * The correct MIDX already exists, and so does a 1129 * corresponding bitmap (or one wasn't requested). 1130 */ 1131 if (!want_bitmap) 1132 clear_midx_files_ext(source, "bitmap", NULL); 1133 result = 0; 1134 goto cleanup; 1135 } 1136 } 1137 1138 if (ctx.incremental && !ctx.nr) { 1139 result = 0; 1140 goto cleanup; /* nothing to do */ 1141 } 1142 1143 if (preferred_pack_name) { 1144 ctx.preferred_pack_idx = NO_PREFERRED_PACK; 1145 1146 for (size_t i = 0; i < ctx.nr; i++) { 1147 if (!cmp_idx_or_pack_name(preferred_pack_name, 1148 ctx.info[i].pack_name)) { 1149 ctx.preferred_pack_idx = i; 1150 break; 1151 } 1152 } 1153 1154 if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) 1155 warning(_("unknown preferred pack: '%s'"), 1156 preferred_pack_name); 1157 } else if (ctx.nr && 1158 (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { 1159 struct packed_git *oldest = ctx.info[0].p; 1160 ctx.preferred_pack_idx = 0; 1161 1162 /* 1163 * Attempt opening the pack index to populate num_objects. 1164 * Ignore failiures as they can be expected and are not 1165 * fatal during this selection time. 1166 */ 1167 open_pack_index(oldest); 1168 1169 if (packs_to_drop && packs_to_drop->nr) 1170 BUG("cannot write a MIDX bitmap during expiration"); 1171 1172 /* 1173 * set a preferred pack when writing a bitmap to ensure that 1174 * the pack from which the first object is selected in pseudo 1175 * pack-order has all of its objects selected from that pack 1176 * (and not another pack containing a duplicate) 1177 */ 1178 for (size_t i = 1; i < ctx.nr; i++) { 1179 struct packed_git *p = ctx.info[i].p; 1180 1181 if (!oldest->num_objects || p->mtime < oldest->mtime) { 1182 oldest = p; 1183 open_pack_index(oldest); 1184 ctx.preferred_pack_idx = i; 1185 } 1186 } 1187 1188 if (!oldest->num_objects) { 1189 /* 1190 * If all packs are empty; unset the preferred index. 1191 * This is acceptable since there will be no duplicate 1192 * objects to resolve, so the preferred value doesn't 1193 * matter. 1194 */ 1195 ctx.preferred_pack_idx = NO_PREFERRED_PACK; 1196 } 1197 } else { 1198 /* 1199 * otherwise don't mark any pack as preferred to avoid 1200 * interfering with expiration logic below 1201 */ 1202 ctx.preferred_pack_idx = NO_PREFERRED_PACK; 1203 } 1204 1205 if (ctx.preferred_pack_idx != NO_PREFERRED_PACK) { 1206 struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; 1207 1208 if (open_pack_index(preferred)) 1209 die(_("failed to open preferred pack %s"), 1210 ctx.info[ctx.preferred_pack_idx].pack_name); 1211 1212 if (!preferred->num_objects) { 1213 error(_("cannot select preferred pack %s with no objects"), 1214 preferred->pack_name); 1215 goto cleanup; 1216 } 1217 } 1218 1219 compute_sorted_entries(&ctx, start_pack); 1220 1221 ctx.large_offsets_needed = 0; 1222 for (size_t i = 0; i < ctx.entries_nr; i++) { 1223 if (ctx.entries[i].offset > 0x7fffffff) 1224 ctx.num_large_offsets++; 1225 if (ctx.entries[i].offset > 0xffffffff) 1226 ctx.large_offsets_needed = 1; 1227 } 1228 1229 QSORT(ctx.info, ctx.nr, pack_info_compare); 1230 1231 if (packs_to_drop && packs_to_drop->nr) { 1232 size_t drop_index = 0; 1233 int missing_drops = 0; 1234 1235 for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { 1236 int cmp = strcmp(ctx.info[i].pack_name, 1237 packs_to_drop->items[drop_index].string); 1238 1239 if (!cmp) { 1240 drop_index++; 1241 ctx.info[i].expired = 1; 1242 } else if (cmp > 0) { 1243 error(_("did not see pack-file %s to drop"), 1244 packs_to_drop->items[drop_index].string); 1245 drop_index++; 1246 missing_drops++; 1247 i--; 1248 } else { 1249 ctx.info[i].expired = 0; 1250 } 1251 } 1252 1253 if (missing_drops) 1254 goto cleanup; 1255 } 1256 1257 /* 1258 * pack_perm stores a permutation between pack-int-ids from the 1259 * previous multi-pack-index to the new one we are writing: 1260 * 1261 * pack_perm[old_id] = new_id 1262 */ 1263 ALLOC_ARRAY(ctx.pack_perm, ctx.nr); 1264 for (size_t i = 0; i < ctx.nr; i++) { 1265 if (ctx.info[i].expired) { 1266 dropped_packs++; 1267 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; 1268 } else { 1269 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; 1270 } 1271 } 1272 1273 for (size_t i = 0; i < ctx.nr; i++) { 1274 if (ctx.info[i].expired) 1275 continue; 1276 pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; 1277 bitmapped_packs_concat_len += 2 * sizeof(uint32_t); 1278 } 1279 1280 /* Check that the preferred pack wasn't expired (if given). */ 1281 if (preferred_pack_name) { 1282 struct pack_info *preferred = bsearch(preferred_pack_name, 1283 ctx.info, ctx.nr, 1284 sizeof(*ctx.info), 1285 idx_or_pack_name_cmp); 1286 if (preferred) { 1287 uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; 1288 if (perm == PACK_EXPIRED) 1289 warning(_("preferred pack '%s' is expired"), 1290 preferred_pack_name); 1291 } 1292 } 1293 1294 if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) 1295 pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - 1296 (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); 1297 1298 if (ctx.nr - dropped_packs == 0) { 1299 error(_("no pack files to index.")); 1300 goto cleanup; 1301 } 1302 1303 if (!ctx.entries_nr) { 1304 if (flags & MIDX_WRITE_BITMAP) 1305 warning(_("refusing to write multi-pack .bitmap without any objects")); 1306 flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); 1307 } 1308 1309 if (ctx.incremental) { 1310 struct strbuf lock_name = STRBUF_INIT; 1311 1312 get_midx_chain_filename(source, &lock_name); 1313 hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); 1314 strbuf_release(&lock_name); 1315 1316 incr = mks_tempfile_m(midx_name.buf, 0444); 1317 if (!incr) { 1318 error(_("unable to create temporary MIDX layer")); 1319 goto cleanup; 1320 } 1321 1322 if (adjust_shared_perm(r, get_tempfile_path(incr))) { 1323 error(_("unable to adjust shared permissions for '%s'"), 1324 get_tempfile_path(incr)); 1325 goto cleanup; 1326 } 1327 1328 f = hashfd(r->hash_algo, get_tempfile_fd(incr), 1329 get_tempfile_path(incr)); 1330 } else { 1331 hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR); 1332 f = hashfd(r->hash_algo, get_lock_file_fd(&lk), 1333 get_lock_file_path(&lk)); 1334 } 1335 1336 cf = init_chunkfile(f); 1337 1338 add_chunk(cf, MIDX_CHUNKID_PACKNAMES, pack_name_concat_len, 1339 write_midx_pack_names); 1340 add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE, 1341 write_midx_oid_fanout); 1342 add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, 1343 st_mult(ctx.entries_nr, r->hash_algo->rawsz), 1344 write_midx_oid_lookup); 1345 add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, 1346 st_mult(ctx.entries_nr, MIDX_CHUNK_OFFSET_WIDTH), 1347 write_midx_object_offsets); 1348 1349 if (ctx.large_offsets_needed) 1350 add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, 1351 st_mult(ctx.num_large_offsets, 1352 MIDX_CHUNK_LARGE_OFFSET_WIDTH), 1353 write_midx_large_offsets); 1354 1355 if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { 1356 ctx.pack_order = midx_pack_order(&ctx); 1357 add_chunk(cf, MIDX_CHUNKID_REVINDEX, 1358 st_mult(ctx.entries_nr, sizeof(uint32_t)), 1359 write_midx_revindex); 1360 add_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, 1361 bitmapped_packs_concat_len, 1362 write_midx_bitmapped_packs); 1363 } 1364 1365 write_midx_header(r->hash_algo, f, get_num_chunks(cf), 1366 ctx.nr - dropped_packs); 1367 write_chunkfile(cf, &ctx); 1368 1369 finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA, 1370 CSUM_FSYNC | CSUM_HASH_IN_STREAM); 1371 free_chunkfile(cf); 1372 1373 if (flags & MIDX_WRITE_REV_INDEX && 1374 git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) 1375 write_midx_reverse_index(&ctx, midx_hash); 1376 1377 if (flags & MIDX_WRITE_BITMAP) { 1378 struct packing_data pdata; 1379 struct commit **commits; 1380 uint32_t commits_nr; 1381 1382 if (!ctx.entries_nr) 1383 BUG("cannot write a bitmap without any objects"); 1384 1385 prepare_midx_packing_data(&pdata, &ctx); 1386 1387 commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); 1388 1389 /* 1390 * The previous steps translated the information from 1391 * 'entries' into information suitable for constructing 1392 * bitmaps. We no longer need that array, so clear it to 1393 * reduce memory pressure. 1394 */ 1395 FREE_AND_NULL(ctx.entries); 1396 ctx.entries_nr = 0; 1397 1398 if (write_midx_bitmap(&ctx, 1399 midx_hash, &pdata, commits, commits_nr, 1400 flags) < 0) { 1401 error(_("could not write multi-pack bitmap")); 1402 clear_packing_data(&pdata); 1403 free(commits); 1404 goto cleanup; 1405 } 1406 1407 clear_packing_data(&pdata); 1408 free(commits); 1409 } 1410 /* 1411 * NOTE: Do not use ctx.entries beyond this point, since it might 1412 * have been freed in the previous if block. 1413 */ 1414 1415 if (ctx.num_multi_pack_indexes_before == UINT32_MAX) 1416 die(_("too many multi-pack-indexes")); 1417 1418 CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); 1419 1420 if (ctx.incremental) { 1421 FILE *chainf = fdopen_lock_file(&lk, "w"); 1422 struct strbuf final_midx_name = STRBUF_INIT; 1423 struct multi_pack_index *m = ctx.base_midx; 1424 1425 if (!chainf) { 1426 error_errno(_("unable to open multi-pack-index chain file")); 1427 goto cleanup; 1428 } 1429 1430 if (link_midx_to_chain(ctx.base_midx) < 0) 1431 goto cleanup; 1432 1433 get_split_midx_filename_ext(source, &final_midx_name, 1434 midx_hash, MIDX_EXT_MIDX); 1435 1436 if (rename_tempfile(&incr, final_midx_name.buf) < 0) { 1437 error_errno(_("unable to rename new multi-pack-index layer")); 1438 goto cleanup; 1439 } 1440 1441 strbuf_release(&final_midx_name); 1442 1443 keep_hashes[ctx.num_multi_pack_indexes_before] = 1444 xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); 1445 1446 for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { 1447 uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; 1448 1449 keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), 1450 r->hash_algo)); 1451 m = m->base_midx; 1452 } 1453 1454 for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) 1455 fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); 1456 } else { 1457 keep_hashes[ctx.num_multi_pack_indexes_before] = 1458 xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); 1459 } 1460 1461 if (ctx.m || ctx.base_midx) 1462 close_object_store(ctx.repo->objects); 1463 1464 if (commit_lock_file(&lk) < 0) 1465 die_errno(_("could not write multi-pack-index")); 1466 1467 clear_midx_files(source, keep_hashes, 1468 ctx.num_multi_pack_indexes_before + 1, 1469 ctx.incremental); 1470 result = 0; 1471 1472cleanup: 1473 for (size_t i = 0; i < ctx.nr; i++) { 1474 if (ctx.info[i].p) { 1475 close_pack(ctx.info[i].p); 1476 free(ctx.info[i].p); 1477 } 1478 free(ctx.info[i].pack_name); 1479 } 1480 1481 free(ctx.info); 1482 free(ctx.entries); 1483 free(ctx.pack_perm); 1484 free(ctx.pack_order); 1485 if (keep_hashes) { 1486 for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) 1487 free((char *)keep_hashes[i]); 1488 free(keep_hashes); 1489 } 1490 strbuf_release(&midx_name); 1491 1492 trace2_region_leave("midx", "write_midx_internal", r); 1493 1494 return result; 1495} 1496 1497int write_midx_file(struct odb_source *source, 1498 const char *preferred_pack_name, 1499 const char *refs_snapshot, unsigned flags) 1500{ 1501 return write_midx_internal(source, NULL, NULL, 1502 preferred_pack_name, refs_snapshot, 1503 flags); 1504} 1505 1506int write_midx_file_only(struct odb_source *source, 1507 struct string_list *packs_to_include, 1508 const char *preferred_pack_name, 1509 const char *refs_snapshot, unsigned flags) 1510{ 1511 return write_midx_internal(source, packs_to_include, NULL, 1512 preferred_pack_name, refs_snapshot, flags); 1513} 1514 1515int expire_midx_packs(struct odb_source *source, unsigned flags) 1516{ 1517 uint32_t i, *count, result = 0; 1518 struct string_list packs_to_drop = STRING_LIST_INIT_DUP; 1519 struct multi_pack_index *m = get_multi_pack_index(source); 1520 struct progress *progress = NULL; 1521 1522 if (!m) 1523 return 0; 1524 1525 if (m->base_midx) 1526 die(_("cannot expire packs from an incremental multi-pack-index")); 1527 1528 CALLOC_ARRAY(count, m->num_packs); 1529 1530 if (flags & MIDX_PROGRESS) 1531 progress = start_delayed_progress( 1532 source->odb->repo, 1533 _("Counting referenced objects"), 1534 m->num_objects); 1535 for (i = 0; i < m->num_objects; i++) { 1536 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); 1537 count[pack_int_id]++; 1538 display_progress(progress, i + 1); 1539 } 1540 stop_progress(&progress); 1541 1542 if (flags & MIDX_PROGRESS) 1543 progress = start_delayed_progress( 1544 source->odb->repo, 1545 _("Finding and deleting unreferenced packfiles"), 1546 m->num_packs); 1547 for (i = 0; i < m->num_packs; i++) { 1548 char *pack_name; 1549 display_progress(progress, i + 1); 1550 1551 if (count[i]) 1552 continue; 1553 1554 if (prepare_midx_pack(m, i)) 1555 continue; 1556 1557 if (m->packs[i]->pack_keep || m->packs[i]->is_cruft) 1558 continue; 1559 1560 pack_name = xstrdup(m->packs[i]->pack_name); 1561 close_pack(m->packs[i]); 1562 1563 string_list_insert(&packs_to_drop, m->pack_names[i]); 1564 unlink_pack_path(pack_name, 0); 1565 free(pack_name); 1566 } 1567 stop_progress(&progress); 1568 1569 free(count); 1570 1571 if (packs_to_drop.nr) 1572 result = write_midx_internal(source, NULL, 1573 &packs_to_drop, NULL, NULL, flags); 1574 1575 string_list_clear(&packs_to_drop, 0); 1576 1577 return result; 1578} 1579 1580struct repack_info { 1581 timestamp_t mtime; 1582 uint32_t referenced_objects; 1583 uint32_t pack_int_id; 1584}; 1585 1586static int compare_by_mtime(const void *a_, const void *b_) 1587{ 1588 const struct repack_info *a, *b; 1589 1590 a = (const struct repack_info *)a_; 1591 b = (const struct repack_info *)b_; 1592 1593 if (a->mtime < b->mtime) 1594 return -1; 1595 if (a->mtime > b->mtime) 1596 return 1; 1597 return 0; 1598} 1599 1600static int want_included_pack(struct multi_pack_index *m, 1601 int pack_kept_objects, 1602 uint32_t pack_int_id) 1603{ 1604 struct packed_git *p; 1605 if (prepare_midx_pack(m, pack_int_id)) 1606 return 0; 1607 p = m->packs[pack_int_id]; 1608 if (!pack_kept_objects && p->pack_keep) 1609 return 0; 1610 if (p->is_cruft) 1611 return 0; 1612 if (open_pack_index(p) || !p->num_objects) 1613 return 0; 1614 return 1; 1615} 1616 1617static void fill_included_packs_all(struct repository *r, 1618 struct multi_pack_index *m, 1619 unsigned char *include_pack) 1620{ 1621 uint32_t i; 1622 int pack_kept_objects = 0; 1623 1624 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects); 1625 1626 for (i = 0; i < m->num_packs; i++) { 1627 if (!want_included_pack(m, pack_kept_objects, i)) 1628 continue; 1629 1630 include_pack[i] = 1; 1631 } 1632} 1633 1634static void fill_included_packs_batch(struct repository *r, 1635 struct multi_pack_index *m, 1636 unsigned char *include_pack, 1637 size_t batch_size) 1638{ 1639 uint32_t i; 1640 size_t total_size; 1641 struct repack_info *pack_info; 1642 int pack_kept_objects = 0; 1643 1644 CALLOC_ARRAY(pack_info, m->num_packs); 1645 1646 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects); 1647 1648 for (i = 0; i < m->num_packs; i++) { 1649 pack_info[i].pack_int_id = i; 1650 1651 if (prepare_midx_pack(m, i)) 1652 continue; 1653 1654 pack_info[i].mtime = m->packs[i]->mtime; 1655 } 1656 1657 for (i = 0; i < m->num_objects; i++) { 1658 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); 1659 pack_info[pack_int_id].referenced_objects++; 1660 } 1661 1662 QSORT(pack_info, m->num_packs, compare_by_mtime); 1663 1664 total_size = 0; 1665 for (i = 0; total_size < batch_size && i < m->num_packs; i++) { 1666 uint32_t pack_int_id = pack_info[i].pack_int_id; 1667 struct packed_git *p = m->packs[pack_int_id]; 1668 uint64_t expected_size; 1669 1670 if (!want_included_pack(m, pack_kept_objects, pack_int_id)) 1671 continue; 1672 1673 /* 1674 * Use shifted integer arithmetic to calculate the 1675 * expected pack size to ~4 significant digits without 1676 * overflow for packsizes less that 1PB. 1677 */ 1678 expected_size = (uint64_t)pack_info[i].referenced_objects << 14; 1679 expected_size /= p->num_objects; 1680 expected_size = u64_mult(expected_size, p->pack_size); 1681 expected_size = u64_add(expected_size, 1u << 13) >> 14; 1682 1683 if (expected_size >= batch_size) 1684 continue; 1685 1686 if (unsigned_add_overflows(total_size, (size_t)expected_size)) 1687 total_size = SIZE_MAX; 1688 else 1689 total_size += expected_size; 1690 1691 include_pack[pack_int_id] = 1; 1692 } 1693 1694 free(pack_info); 1695} 1696 1697int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) 1698{ 1699 struct repository *r = source->odb->repo; 1700 int result = 0; 1701 uint32_t i, packs_to_repack = 0; 1702 unsigned char *include_pack; 1703 struct child_process cmd = CHILD_PROCESS_INIT; 1704 FILE *cmd_in; 1705 struct multi_pack_index *m = get_multi_pack_index(source); 1706 1707 /* 1708 * When updating the default for these configuration 1709 * variables in builtin/repack.c, these must be adjusted 1710 * to match. 1711 */ 1712 int delta_base_offset = 1; 1713 int use_delta_islands = 0; 1714 1715 if (!m) 1716 return 0; 1717 if (m->base_midx) 1718 die(_("cannot repack an incremental multi-pack-index")); 1719 1720 CALLOC_ARRAY(include_pack, m->num_packs); 1721 1722 if (batch_size) 1723 fill_included_packs_batch(r, m, include_pack, batch_size); 1724 else 1725 fill_included_packs_all(r, m, include_pack); 1726 1727 for (i = 0; i < m->num_packs; i++) { 1728 if (include_pack[i]) 1729 packs_to_repack++; 1730 } 1731 if (packs_to_repack <= 1) 1732 goto cleanup; 1733 1734 repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset); 1735 repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands); 1736 1737 strvec_push(&cmd.args, "pack-objects"); 1738 1739 strvec_pushf(&cmd.args, "%s/pack/pack", source->path); 1740 1741 if (delta_base_offset) 1742 strvec_push(&cmd.args, "--delta-base-offset"); 1743 if (use_delta_islands) 1744 strvec_push(&cmd.args, "--delta-islands"); 1745 1746 if (flags & MIDX_PROGRESS) 1747 strvec_push(&cmd.args, "--progress"); 1748 else 1749 strvec_push(&cmd.args, "-q"); 1750 1751 cmd.git_cmd = 1; 1752 cmd.in = cmd.out = -1; 1753 1754 if (start_command(&cmd)) { 1755 error(_("could not start pack-objects")); 1756 result = 1; 1757 goto cleanup; 1758 } 1759 1760 cmd_in = xfdopen(cmd.in, "w"); 1761 1762 for (i = 0; i < m->num_objects; i++) { 1763 struct object_id oid; 1764 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); 1765 1766 if (!include_pack[pack_int_id]) 1767 continue; 1768 1769 nth_midxed_object_oid(&oid, m, i); 1770 fprintf(cmd_in, "%s\n", oid_to_hex(&oid)); 1771 } 1772 fclose(cmd_in); 1773 1774 if (finish_command(&cmd)) { 1775 error(_("could not finish pack-objects")); 1776 result = 1; 1777 goto cleanup; 1778 } 1779 1780 result = write_midx_internal(source, NULL, NULL, NULL, NULL, 1781 flags); 1782 1783cleanup: 1784 free(include_pack); 1785 return result; 1786}