Git fork
at reftables-rust 3373 lines 89 kB view raw
1#define DISABLE_SIGN_COMPARE_WARNINGS 2 3#include "git-compat-util.h" 4#include "commit.h" 5#include "gettext.h" 6#include "hex.h" 7#include "strbuf.h" 8#include "tag.h" 9#include "diff.h" 10#include "revision.h" 11#include "progress.h" 12#include "list-objects.h" 13#include "pack.h" 14#include "pack-bitmap.h" 15#include "pack-revindex.h" 16#include "pack-objects.h" 17#include "packfile.h" 18#include "repository.h" 19#include "trace2.h" 20#include "odb.h" 21#include "list-objects-filter-options.h" 22#include "midx.h" 23#include "config.h" 24#include "pseudo-merge.h" 25 26/* 27 * An entry on the bitmap index, representing the bitmap for a given 28 * commit. 29 */ 30struct stored_bitmap { 31 struct object_id oid; 32 struct ewah_bitmap *root; 33 struct stored_bitmap *xor; 34 size_t map_pos; 35 int flags; 36}; 37 38/* 39 * The active bitmap index for a repository. By design, repositories only have 40 * a single bitmap index available (the index for the biggest packfile in 41 * the repository), since bitmap indexes need full closure. 42 * 43 * If there is more than one bitmap index available (e.g. because of alternates), 44 * the active bitmap index is the largest one. 45 */ 46struct bitmap_index { 47 /* 48 * The pack or multi-pack index (MIDX) that this bitmap index belongs 49 * to. 50 * 51 * Exactly one of these must be non-NULL; this specifies the object 52 * order used to interpret this bitmap. 53 */ 54 struct packed_git *pack; 55 struct multi_pack_index *midx; 56 57 /* 58 * If using a multi-pack index chain, 'base' points to the 59 * bitmap index corresponding to this bitmap's midx->base_midx. 60 * 61 * base_nr indicates how many layers precede this one, and is 62 * zero when base is NULL. 63 */ 64 struct bitmap_index *base; 65 uint32_t base_nr; 66 67 /* mmapped buffer of the whole bitmap index */ 68 unsigned char *map; 69 size_t map_size; /* size of the mmaped buffer */ 70 size_t map_pos; /* current position when loading the index */ 71 72 /* 73 * Type indexes. 74 * 75 * Each bitmap marks which objects in the packfile are of the given 76 * type. This provides type information when yielding the objects from 77 * the packfile during a walk, which allows for better delta bases. 78 */ 79 struct ewah_bitmap *commits; 80 struct ewah_bitmap *trees; 81 struct ewah_bitmap *blobs; 82 struct ewah_bitmap *tags; 83 84 /* 85 * Type index arrays when this bitmap is associated with an 86 * incremental multi-pack index chain. 87 * 88 * If n is the number of unique layers in the MIDX chain, then 89 * commits_all[n-1] is this structs 'commits' field, 90 * commits_all[n-2] is the commits field of this bitmap's 91 * 'base', and so on. 92 * 93 * When associated either with a non-incremental MIDX or a 94 * single packfile, these arrays each contain a single element. 95 */ 96 struct ewah_bitmap **commits_all; 97 struct ewah_bitmap **trees_all; 98 struct ewah_bitmap **blobs_all; 99 struct ewah_bitmap **tags_all; 100 101 /* Map from object ID -> `stored_bitmap` for all the bitmapped commits */ 102 kh_oid_map_t *bitmaps; 103 104 /* Number of bitmapped commits */ 105 uint32_t entry_count; 106 107 /* If not NULL, this is a name-hash cache pointing into map. */ 108 uint32_t *hashes; 109 110 /* The checksum of the packfile or MIDX; points into map. */ 111 const unsigned char *checksum; 112 113 /* 114 * If not NULL, this point into the commit table extension 115 * (within the memory mapped region `map`). 116 */ 117 unsigned char *table_lookup; 118 119 /* This contains the pseudo-merge cache within 'map' (if found). */ 120 struct pseudo_merge_map pseudo_merges; 121 122 /* 123 * Extended index. 124 * 125 * When trying to perform bitmap operations with objects that are not 126 * packed in `pack`, these objects are added to this "fake index" and 127 * are assumed to appear at the end of the packfile for all operations 128 */ 129 struct eindex { 130 struct object **objects; 131 uint32_t *hashes; 132 uint32_t count, alloc; 133 kh_oid_pos_t *positions; 134 } ext_index; 135 136 /* Bitmap result of the last performed walk */ 137 struct bitmap *result; 138 139 /* "have" bitmap from the last performed walk */ 140 struct bitmap *haves; 141 142 /* Version of the bitmap index */ 143 unsigned int version; 144}; 145 146static int pseudo_merges_satisfied_nr; 147static int pseudo_merges_cascades_nr; 148static int existing_bitmaps_hits_nr; 149static int existing_bitmaps_misses_nr; 150static int roots_with_bitmaps_nr; 151static int roots_without_bitmaps_nr; 152 153static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) 154{ 155 struct ewah_bitmap *parent; 156 struct ewah_bitmap *composed; 157 158 if (!st->xor) 159 return st->root; 160 161 composed = ewah_pool_new(); 162 parent = lookup_stored_bitmap(st->xor); 163 ewah_xor(st->root, parent, composed); 164 165 ewah_pool_free(st->root); 166 st->root = composed; 167 st->xor = NULL; 168 169 return composed; 170} 171 172struct ewah_bitmap *read_bitmap(const unsigned char *map, 173 size_t map_size, size_t *map_pos) 174{ 175 struct ewah_bitmap *b = ewah_pool_new(); 176 177 ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos, 178 map_size - *map_pos); 179 180 if (bitmap_size < 0) { 181 error(_("failed to load bitmap index (corrupted?)")); 182 ewah_pool_free(b); 183 return NULL; 184 } 185 186 *map_pos += bitmap_size; 187 188 return b; 189} 190 191/* 192 * Read a bitmap from the current read position on the mmaped 193 * index, and increase the read position accordingly 194 */ 195static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) 196{ 197 return read_bitmap(index->map, index->map_size, &index->map_pos); 198} 199 200static uint32_t bitmap_num_objects_total(struct bitmap_index *index) 201{ 202 if (index->midx) { 203 struct multi_pack_index *m = index->midx; 204 return m->num_objects + m->num_objects_in_base; 205 } 206 return index->pack->num_objects; 207} 208 209static uint32_t bitmap_num_objects(struct bitmap_index *index) 210{ 211 if (index->midx) 212 return index->midx->num_objects; 213 return index->pack->num_objects; 214} 215 216static struct repository *bitmap_repo(struct bitmap_index *bitmap_git) 217{ 218 if (bitmap_is_midx(bitmap_git)) 219 return bitmap_git->midx->source->odb->repo; 220 return bitmap_git->pack->repo; 221} 222 223static int load_bitmap_header(struct bitmap_index *index) 224{ 225 struct bitmap_disk_header *header = (void *)index->map; 226 const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo; 227 228 size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz; 229 230 if (index->map_size < header_size + hash_algo->rawsz) 231 return error(_("corrupted bitmap index (too small)")); 232 233 if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0) 234 return error(_("corrupted bitmap index file (wrong header)")); 235 236 index->version = ntohs(header->version); 237 if (index->version != 1) 238 return error(_("unsupported version '%d' for bitmap index file"), index->version); 239 240 /* Parse known bitmap format options */ 241 { 242 uint32_t flags = ntohs(header->options); 243 size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t)); 244 unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz; 245 246 if ((flags & BITMAP_OPT_FULL_DAG) == 0) 247 BUG("unsupported options for bitmap index file " 248 "(Git requires BITMAP_OPT_FULL_DAG)"); 249 250 if (flags & BITMAP_OPT_HASH_CACHE) { 251 if (cache_size > index_end - index->map - header_size) 252 return error(_("corrupted bitmap index file (too short to fit hash cache)")); 253 index->hashes = (void *)(index_end - cache_size); 254 index_end -= cache_size; 255 } 256 257 if (flags & BITMAP_OPT_LOOKUP_TABLE) { 258 size_t table_size = st_mult(ntohl(header->entry_count), 259 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); 260 if (table_size > index_end - index->map - header_size) 261 return error(_("corrupted bitmap index file (too short to fit lookup table)")); 262 if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1)) 263 index->table_lookup = (void *)(index_end - table_size); 264 index_end -= table_size; 265 } 266 267 if (flags & BITMAP_OPT_PSEUDO_MERGES) { 268 unsigned char *pseudo_merge_ofs; 269 size_t table_size; 270 uint32_t i; 271 272 if (sizeof(table_size) > index_end - index->map - header_size) 273 return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)")); 274 275 table_size = get_be64(index_end - 8); 276 if (table_size > index_end - index->map - header_size) 277 return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)")); 278 279 if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) { 280 const unsigned char *ext = (index_end - table_size); 281 282 index->pseudo_merges.map = index->map; 283 index->pseudo_merges.map_size = index->map_size; 284 index->pseudo_merges.commits = ext + get_be64(index_end - 16); 285 index->pseudo_merges.commits_nr = get_be32(index_end - 20); 286 index->pseudo_merges.nr = get_be32(index_end - 24); 287 288 if (st_add(st_mult(index->pseudo_merges.nr, 289 sizeof(uint64_t)), 290 24) > table_size) 291 return error(_("corrupted bitmap index file, pseudo-merge table too short")); 292 293 CALLOC_ARRAY(index->pseudo_merges.v, 294 index->pseudo_merges.nr); 295 296 pseudo_merge_ofs = index_end - 24 - 297 (index->pseudo_merges.nr * sizeof(uint64_t)); 298 for (i = 0; i < index->pseudo_merges.nr; i++) { 299 index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs); 300 pseudo_merge_ofs += sizeof(uint64_t); 301 } 302 } 303 304 index_end -= table_size; 305 } 306 } 307 308 index->entry_count = ntohl(header->entry_count); 309 index->checksum = header->checksum; 310 index->map_pos += header_size; 311 return 0; 312} 313 314static struct stored_bitmap *store_bitmap(struct bitmap_index *index, 315 struct ewah_bitmap *root, 316 const struct object_id *oid, 317 struct stored_bitmap *xor_with, 318 int flags, size_t map_pos) 319{ 320 struct stored_bitmap *stored; 321 khiter_t hash_pos; 322 int ret; 323 324 stored = xmalloc(sizeof(struct stored_bitmap)); 325 stored->map_pos = map_pos; 326 stored->root = root; 327 stored->xor = xor_with; 328 stored->flags = flags; 329 oidcpy(&stored->oid, oid); 330 331 hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); 332 333 /* 334 * A 0 return code means the insertion succeeded with no changes, 335 * because the SHA1 already existed on the map. This is bad, there 336 * shouldn't be duplicated commits in the index. 337 */ 338 if (ret == 0) { 339 error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid)); 340 return NULL; 341 } 342 343 kh_value(index->bitmaps, hash_pos) = stored; 344 return stored; 345} 346 347static inline uint32_t read_be32(const unsigned char *buffer, size_t *pos) 348{ 349 uint32_t result = get_be32(buffer + *pos); 350 (*pos) += sizeof(result); 351 return result; 352} 353 354static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos) 355{ 356 return buffer[(*pos)++]; 357} 358 359#define MAX_XOR_OFFSET 160 360 361static int nth_bitmap_object_oid(struct bitmap_index *index, 362 struct object_id *oid, 363 uint32_t n) 364{ 365 if (index->midx) 366 return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1; 367 return nth_packed_object_id(oid, index->pack, n); 368} 369 370static int load_bitmap_entries_v1(struct bitmap_index *index) 371{ 372 uint32_t i; 373 struct stored_bitmap *recent_bitmaps[MAX_XOR_OFFSET] = { NULL }; 374 375 for (i = 0; i < index->entry_count; ++i) { 376 int xor_offset, flags; 377 struct ewah_bitmap *bitmap = NULL; 378 struct stored_bitmap *xor_bitmap = NULL; 379 uint32_t commit_idx_pos; 380 struct object_id oid; 381 size_t entry_map_pos; 382 383 if (index->map_size - index->map_pos < 6) 384 return error(_("corrupt ewah bitmap: truncated header for entry %d"), i); 385 386 entry_map_pos = index->map_pos; 387 commit_idx_pos = read_be32(index->map, &index->map_pos); 388 xor_offset = read_u8(index->map, &index->map_pos); 389 flags = read_u8(index->map, &index->map_pos); 390 391 if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0) 392 return error(_("corrupt ewah bitmap: commit index %u out of range"), 393 (unsigned)commit_idx_pos); 394 395 if (xor_offset > MAX_XOR_OFFSET || xor_offset > i) 396 return error(_("corrupted bitmap pack index")); 397 398 if (xor_offset > 0) { 399 xor_bitmap = recent_bitmaps[(i - xor_offset) % MAX_XOR_OFFSET]; 400 401 if (!xor_bitmap) 402 return error(_("invalid XOR offset in bitmap pack index")); 403 } 404 405 bitmap = read_bitmap_1(index); 406 if (!bitmap) 407 return -1; 408 409 recent_bitmaps[i % MAX_XOR_OFFSET] = 410 store_bitmap(index, bitmap, &oid, xor_bitmap, flags, 411 entry_map_pos); 412 } 413 414 return 0; 415} 416 417char *midx_bitmap_filename(struct multi_pack_index *midx) 418{ 419 struct strbuf buf = STRBUF_INIT; 420 if (midx->has_chain) 421 get_split_midx_filename_ext(midx->source, &buf, 422 get_midx_checksum(midx), 423 MIDX_EXT_BITMAP); 424 else 425 get_midx_filename_ext(midx->source, &buf, 426 get_midx_checksum(midx), 427 MIDX_EXT_BITMAP); 428 429 return strbuf_detach(&buf, NULL); 430} 431 432char *pack_bitmap_filename(struct packed_git *p) 433{ 434 size_t len; 435 436 if (!strip_suffix(p->pack_name, ".pack", &len)) 437 BUG("pack_name does not end in .pack"); 438 return xstrfmt("%.*s.bitmap", (int)len, p->pack_name); 439} 440 441static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, 442 struct multi_pack_index *midx) 443{ 444 struct stat st; 445 char *bitmap_name = midx_bitmap_filename(midx); 446 int fd = git_open(bitmap_name); 447 uint32_t i; 448 449 if (fd < 0) { 450 if (errno != ENOENT) 451 warning_errno("cannot open '%s'", bitmap_name); 452 free(bitmap_name); 453 return -1; 454 } 455 free(bitmap_name); 456 457 if (fstat(fd, &st)) { 458 error_errno(_("cannot fstat bitmap file")); 459 close(fd); 460 return -1; 461 } 462 463 if (bitmap_git->pack || bitmap_git->midx) { 464 struct strbuf buf = STRBUF_INIT; 465 get_midx_filename(midx->source, &buf); 466 trace2_data_string("bitmap", bitmap_repo(bitmap_git), 467 "ignoring extra midx bitmap file", buf.buf); 468 close(fd); 469 strbuf_release(&buf); 470 return -1; 471 } 472 473 bitmap_git->midx = midx; 474 bitmap_git->map_size = xsize_t(st.st_size); 475 bitmap_git->map_pos = 0; 476 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, 477 MAP_PRIVATE, fd, 0); 478 close(fd); 479 480 if (load_bitmap_header(bitmap_git) < 0) 481 goto cleanup; 482 483 if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, 484 bitmap_repo(bitmap_git)->hash_algo)) { 485 error(_("checksum doesn't match in MIDX and bitmap")); 486 goto cleanup; 487 } 488 489 if (load_midx_revindex(bitmap_git->midx)) { 490 warning(_("multi-pack bitmap is missing required reverse index")); 491 goto cleanup; 492 } 493 494 for (i = 0; i < bitmap_git->midx->num_packs + bitmap_git->midx->num_packs_in_base; i++) { 495 if (prepare_midx_pack(bitmap_git->midx, i)) { 496 warning(_("could not open pack %s"), 497 bitmap_git->midx->pack_names[i]); 498 goto cleanup; 499 } 500 } 501 502 if (midx->base_midx) { 503 bitmap_git->base = prepare_midx_bitmap_git(midx->base_midx); 504 bitmap_git->base_nr = bitmap_git->base->base_nr + 1; 505 } else { 506 bitmap_git->base_nr = 0; 507 } 508 509 return 0; 510 511cleanup: 512 munmap(bitmap_git->map, bitmap_git->map_size); 513 bitmap_git->map_size = 0; 514 bitmap_git->map_pos = 0; 515 bitmap_git->map = NULL; 516 bitmap_git->midx = NULL; 517 return -1; 518} 519 520static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile) 521{ 522 int fd; 523 struct stat st; 524 char *bitmap_name; 525 526 bitmap_name = pack_bitmap_filename(packfile); 527 fd = git_open(bitmap_name); 528 529 if (fd < 0) { 530 if (errno != ENOENT) 531 warning_errno("cannot open '%s'", bitmap_name); 532 free(bitmap_name); 533 return -1; 534 } 535 free(bitmap_name); 536 537 if (fstat(fd, &st)) { 538 error_errno(_("cannot fstat bitmap file")); 539 close(fd); 540 return -1; 541 } 542 543 if (bitmap_git->pack || bitmap_git->midx) { 544 trace2_data_string("bitmap", bitmap_repo(bitmap_git), 545 "ignoring extra bitmap file", 546 packfile->pack_name); 547 close(fd); 548 return -1; 549 } 550 551 if (!is_pack_valid(packfile)) { 552 close(fd); 553 return -1; 554 } 555 556 bitmap_git->pack = packfile; 557 bitmap_git->map_size = xsize_t(st.st_size); 558 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, MAP_PRIVATE, fd, 0); 559 bitmap_git->map_pos = 0; 560 bitmap_git->base_nr = 0; 561 close(fd); 562 563 if (load_bitmap_header(bitmap_git) < 0) { 564 munmap(bitmap_git->map, bitmap_git->map_size); 565 bitmap_git->map = NULL; 566 bitmap_git->map_size = 0; 567 bitmap_git->map_pos = 0; 568 bitmap_git->pack = NULL; 569 return -1; 570 } 571 572 trace2_data_string("bitmap", bitmap_repo(bitmap_git), 573 "opened bitmap file", packfile->pack_name); 574 return 0; 575} 576 577static int load_reverse_index(struct repository *r, struct bitmap_index *bitmap_git) 578{ 579 if (bitmap_is_midx(bitmap_git)) { 580 struct multi_pack_index *m; 581 582 /* 583 * The multi-pack-index's .rev file is already loaded via 584 * open_pack_bitmap_1(). 585 * 586 * But we still need to open the individual pack .rev files, 587 * since we will need to make use of them in pack-objects. 588 */ 589 for (m = bitmap_git->midx; m; m = m->base_midx) { 590 uint32_t i; 591 int ret; 592 593 for (i = 0; i < m->num_packs; i++) { 594 ret = load_pack_revindex(r, m->packs[i]); 595 if (ret) 596 return ret; 597 } 598 } 599 return 0; 600 } 601 return load_pack_revindex(r, bitmap_git->pack); 602} 603 604static void load_all_type_bitmaps(struct bitmap_index *bitmap_git) 605{ 606 struct bitmap_index *curr = bitmap_git; 607 size_t i = bitmap_git->base_nr; 608 609 ALLOC_ARRAY(bitmap_git->commits_all, bitmap_git->base_nr + 1); 610 ALLOC_ARRAY(bitmap_git->trees_all, bitmap_git->base_nr + 1); 611 ALLOC_ARRAY(bitmap_git->blobs_all, bitmap_git->base_nr + 1); 612 ALLOC_ARRAY(bitmap_git->tags_all, bitmap_git->base_nr + 1); 613 614 while (curr) { 615 bitmap_git->commits_all[i] = curr->commits; 616 bitmap_git->trees_all[i] = curr->trees; 617 bitmap_git->blobs_all[i] = curr->blobs; 618 bitmap_git->tags_all[i] = curr->tags; 619 620 curr = curr->base; 621 if (curr && !i) 622 BUG("unexpected number of bitmap layers, expected %"PRIu32, 623 bitmap_git->base_nr + 1); 624 i -= 1; 625 } 626} 627 628static int load_bitmap(struct repository *r, struct bitmap_index *bitmap_git, 629 int recursing) 630{ 631 assert(bitmap_git->map); 632 633 bitmap_git->bitmaps = kh_init_oid_map(); 634 bitmap_git->ext_index.positions = kh_init_oid_pos(); 635 636 if (load_reverse_index(r, bitmap_git)) 637 return -1; 638 639 if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || 640 !(bitmap_git->trees = read_bitmap_1(bitmap_git)) || 641 !(bitmap_git->blobs = read_bitmap_1(bitmap_git)) || 642 !(bitmap_git->tags = read_bitmap_1(bitmap_git))) 643 return -1; 644 645 if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0) 646 return -1; 647 648 if (bitmap_git->base) { 649 if (!bitmap_is_midx(bitmap_git)) 650 BUG("non-MIDX bitmap has non-NULL base bitmap index"); 651 if (load_bitmap(r, bitmap_git->base, 1) < 0) 652 return -1; 653 } 654 655 if (!recursing) 656 load_all_type_bitmaps(bitmap_git); 657 658 return 0; 659} 660 661static int open_pack_bitmap(struct repository *r, 662 struct bitmap_index *bitmap_git) 663{ 664 struct packed_git *p; 665 int ret = -1; 666 667 for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { 668 if (open_pack_bitmap_1(bitmap_git, p) == 0) { 669 ret = 0; 670 /* 671 * The only reason to keep looking is to report 672 * duplicates. 673 */ 674 if (!trace2_is_enabled()) 675 break; 676 } 677 } 678 679 return ret; 680} 681 682static int open_midx_bitmap(struct repository *r, 683 struct bitmap_index *bitmap_git) 684{ 685 struct odb_source *source; 686 int ret = -1; 687 688 assert(!bitmap_git->map); 689 690 odb_prepare_alternates(r->objects); 691 for (source = r->objects->sources; source; source = source->next) { 692 struct multi_pack_index *midx = get_multi_pack_index(source); 693 if (midx && !open_midx_bitmap_1(bitmap_git, midx)) 694 ret = 0; 695 } 696 return ret; 697} 698 699static int open_bitmap(struct repository *r, 700 struct bitmap_index *bitmap_git) 701{ 702 int found; 703 704 assert(!bitmap_git->map); 705 706 found = !open_midx_bitmap(r, bitmap_git); 707 708 /* 709 * these will all be skipped if we opened a midx bitmap; but run it 710 * anyway if tracing is enabled to report the duplicates 711 */ 712 if (!found || trace2_is_enabled()) 713 found |= !open_pack_bitmap(r, bitmap_git); 714 715 return found ? 0 : -1; 716} 717 718struct bitmap_index *prepare_bitmap_git(struct repository *r) 719{ 720 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); 721 722 if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git, 0)) 723 return bitmap_git; 724 725 free_bitmap_index(bitmap_git); 726 return NULL; 727} 728 729struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx) 730{ 731 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); 732 733 if (!open_midx_bitmap_1(bitmap_git, midx)) 734 return bitmap_git; 735 736 free_bitmap_index(bitmap_git); 737 return NULL; 738} 739 740int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack) 741{ 742 for (; bitmap; bitmap = bitmap->base) { 743 if (bitmap_is_midx(bitmap)) { 744 for (size_t i = 0; i < bitmap->midx->num_packs; i++) 745 if (bitmap->midx->packs[i] == pack) 746 return 1; 747 } else if (bitmap->pack == pack) { 748 return 1; 749 } 750 } 751 752 return 0; 753} 754 755struct include_data { 756 struct bitmap_index *bitmap_git; 757 struct bitmap *base; 758 struct bitmap *seen; 759}; 760 761struct bitmap_lookup_table_triplet { 762 uint32_t commit_pos; 763 uint64_t offset; 764 uint32_t xor_row; 765}; 766 767struct bitmap_lookup_table_xor_item { 768 struct object_id oid; 769 uint64_t offset; 770}; 771 772/* 773 * Given a `triplet` struct pointer and pointer `p`, this 774 * function reads the triplet beginning at `p` into the struct. 775 * Note that this function assumes that there is enough memory 776 * left for filling the `triplet` struct from `p`. 777 */ 778static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet, 779 const unsigned char *p) 780{ 781 if (!triplet) 782 return -1; 783 784 triplet->commit_pos = get_be32(p); 785 p += sizeof(uint32_t); 786 triplet->offset = get_be64(p); 787 p += sizeof(uint64_t); 788 triplet->xor_row = get_be32(p); 789 return 0; 790} 791 792/* 793 * This function gets the raw triplet from `row`'th row in the 794 * lookup table and fills that data to the `triplet`. 795 */ 796static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git, 797 uint32_t pos, 798 struct bitmap_lookup_table_triplet *triplet) 799{ 800 unsigned char *p = NULL; 801 if (pos >= bitmap_git->entry_count) 802 return error(_("corrupt bitmap lookup table: triplet position out of index")); 803 804 p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); 805 806 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); 807} 808 809/* 810 * Searches for a matching triplet. `commit_pos` is a pointer 811 * to the wanted commit position value. `table_entry` points to 812 * a triplet in lookup table. The first 4 bytes of each 813 * triplet (pointed by `table_entry`) are compared with `*commit_pos`. 814 */ 815static int triplet_cmp(const void *commit_pos, const void *table_entry) 816{ 817 818 uint32_t a = *(uint32_t *)commit_pos; 819 uint32_t b = get_be32(table_entry); 820 if (a > b) 821 return 1; 822 else if (a < b) 823 return -1; 824 825 return 0; 826} 827 828static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git, 829 struct object_id *oid, 830 uint32_t *result) 831{ 832 int found; 833 834 if (bitmap_is_midx(bitmap_git)) 835 found = bsearch_midx(oid, bitmap_git->midx, result); 836 else 837 found = bsearch_pack(oid, bitmap_git->pack, result); 838 839 return found; 840} 841 842/* 843 * `bsearch_triplet_by_pos` function searches for the raw triplet 844 * having commit position same as `commit_pos` and fills `triplet` 845 * object from the raw triplet. Returns 1 on success and 0 on 846 * failure. 847 */ 848static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos, 849 struct bitmap_index *bitmap_git, 850 struct bitmap_lookup_table_triplet *triplet) 851{ 852 unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count, 853 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp); 854 855 if (!p) 856 return -1; 857 858 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); 859} 860 861static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git, 862 struct commit *commit) 863{ 864 uint32_t commit_pos, xor_row; 865 uint64_t offset; 866 int flags; 867 struct bitmap_lookup_table_triplet triplet; 868 struct object_id *oid = &commit->object.oid; 869 struct ewah_bitmap *bitmap; 870 struct stored_bitmap *xor_bitmap = NULL; 871 const int bitmap_header_size = 6; 872 static struct bitmap_lookup_table_xor_item *xor_items = NULL; 873 static size_t xor_items_nr = 0, xor_items_alloc = 0; 874 static int is_corrupt = 0; 875 int xor_flags; 876 khiter_t hash_pos; 877 struct bitmap_lookup_table_xor_item *xor_item; 878 size_t entry_map_pos; 879 880 if (is_corrupt) 881 return NULL; 882 883 if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos)) 884 return NULL; 885 886 if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0) 887 return NULL; 888 889 xor_items_nr = 0; 890 offset = triplet.offset; 891 xor_row = triplet.xor_row; 892 893 while (xor_row != 0xffffffff) { 894 ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc); 895 896 if (xor_items_nr + 1 >= bitmap_git->entry_count) { 897 error(_("corrupt bitmap lookup table: xor chain exceeds entry count")); 898 goto corrupt; 899 } 900 901 if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0) 902 goto corrupt; 903 904 xor_item = &xor_items[xor_items_nr]; 905 xor_item->offset = triplet.offset; 906 907 if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) { 908 error(_("corrupt bitmap lookup table: commit index %u out of range"), 909 triplet.commit_pos); 910 goto corrupt; 911 } 912 913 hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid); 914 915 /* 916 * If desired bitmap is already stored, we don't need 917 * to iterate further. Because we know that bitmaps 918 * that are needed to be parsed to parse this bitmap 919 * has already been stored. So, assign this stored bitmap 920 * to the xor_bitmap. 921 */ 922 if (hash_pos < kh_end(bitmap_git->bitmaps) && 923 (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) 924 break; 925 xor_items_nr++; 926 xor_row = triplet.xor_row; 927 } 928 929 while (xor_items_nr) { 930 xor_item = &xor_items[xor_items_nr - 1]; 931 bitmap_git->map_pos = xor_item->offset; 932 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { 933 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), 934 oid_to_hex(&xor_item->oid)); 935 goto corrupt; 936 } 937 938 entry_map_pos = bitmap_git->map_pos; 939 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); 940 xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); 941 bitmap = read_bitmap_1(bitmap_git); 942 943 if (!bitmap) 944 goto corrupt; 945 946 xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, 947 xor_bitmap, xor_flags, entry_map_pos); 948 xor_items_nr--; 949 } 950 951 bitmap_git->map_pos = offset; 952 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { 953 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), 954 oid_to_hex(oid)); 955 goto corrupt; 956 } 957 958 /* 959 * Don't bother reading the commit's index position or its xor 960 * offset: 961 * 962 * - The commit's index position is irrelevant to us, since 963 * load_bitmap_entries_v1 only uses it to learn the object 964 * id which is used to compute the hashmap's key. We already 965 * have an object id, so no need to look it up again. 966 * 967 * - The xor_offset is unusable for us, since it specifies how 968 * many entries previous to ours we should look at. This 969 * makes sense when reading the bitmaps sequentially (as in 970 * load_bitmap_entries_v1()), since we can keep track of 971 * each bitmap as we read them. 972 * 973 * But it can't work for us, since the bitmap's don't have a 974 * fixed size. So we learn the position of the xor'd bitmap 975 * from the commit table (and resolve it to a bitmap in the 976 * above if-statement). 977 * 978 * Instead, we can skip ahead and immediately read the flags and 979 * ewah bitmap. 980 */ 981 entry_map_pos = bitmap_git->map_pos; 982 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); 983 flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); 984 bitmap = read_bitmap_1(bitmap_git); 985 986 if (!bitmap) 987 goto corrupt; 988 989 return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags, 990 entry_map_pos); 991 992corrupt: 993 free(xor_items); 994 is_corrupt = 1; 995 return NULL; 996} 997 998static struct ewah_bitmap *find_bitmap_for_commit(struct bitmap_index *bitmap_git, 999 struct commit *commit, 1000 struct bitmap_index **found) 1001{ 1002 khiter_t hash_pos; 1003 if (!bitmap_git) 1004 return NULL; 1005 1006 hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); 1007 if (hash_pos >= kh_end(bitmap_git->bitmaps)) { 1008 struct stored_bitmap *bitmap = NULL; 1009 if (!bitmap_git->table_lookup) 1010 return find_bitmap_for_commit(bitmap_git->base, commit, 1011 found); 1012 1013 /* this is a fairly hot codepath - no trace2_region please */ 1014 /* NEEDSWORK: cache misses aren't recorded */ 1015 bitmap = lazy_bitmap_for_commit(bitmap_git, commit); 1016 if (!bitmap) 1017 return find_bitmap_for_commit(bitmap_git->base, commit, 1018 found); 1019 if (found) 1020 *found = bitmap_git; 1021 return lookup_stored_bitmap(bitmap); 1022 } 1023 if (found) 1024 *found = bitmap_git; 1025 return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); 1026} 1027 1028struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, 1029 struct commit *commit) 1030{ 1031 return find_bitmap_for_commit(bitmap_git, commit, NULL); 1032} 1033 1034static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, 1035 const struct object_id *oid) 1036{ 1037 kh_oid_pos_t *positions = bitmap_git->ext_index.positions; 1038 khiter_t pos = kh_get_oid_pos(positions, *oid); 1039 1040 if (pos < kh_end(positions)) { 1041 int bitmap_pos = kh_value(positions, pos); 1042 return bitmap_pos + bitmap_num_objects_total(bitmap_git); 1043 } 1044 1045 return -1; 1046} 1047 1048static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, 1049 const struct object_id *oid) 1050{ 1051 uint32_t pos; 1052 off_t offset = find_pack_entry_one(oid, bitmap_git->pack); 1053 if (!offset) 1054 return -1; 1055 1056 if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0) 1057 return -1; 1058 return pos; 1059} 1060 1061static int bitmap_position_midx(struct bitmap_index *bitmap_git, 1062 const struct object_id *oid) 1063{ 1064 uint32_t want, got; 1065 if (!bsearch_midx(oid, bitmap_git->midx, &want)) 1066 return -1; 1067 1068 if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0) 1069 return -1; 1070 return got; 1071} 1072 1073static int bitmap_position(struct bitmap_index *bitmap_git, 1074 const struct object_id *oid) 1075{ 1076 int pos; 1077 if (bitmap_is_midx(bitmap_git)) 1078 pos = bitmap_position_midx(bitmap_git, oid); 1079 else 1080 pos = bitmap_position_packfile(bitmap_git, oid); 1081 return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid); 1082} 1083 1084static int ext_index_add_object(struct bitmap_index *bitmap_git, 1085 struct object *object, const char *name) 1086{ 1087 struct eindex *eindex = &bitmap_git->ext_index; 1088 1089 khiter_t hash_pos; 1090 int hash_ret; 1091 int bitmap_pos; 1092 1093 hash_pos = kh_put_oid_pos(eindex->positions, object->oid, &hash_ret); 1094 if (hash_ret > 0) { 1095 if (eindex->count >= eindex->alloc) { 1096 eindex->alloc = (eindex->alloc + 16) * 3 / 2; 1097 REALLOC_ARRAY(eindex->objects, eindex->alloc); 1098 REALLOC_ARRAY(eindex->hashes, eindex->alloc); 1099 } 1100 1101 bitmap_pos = eindex->count; 1102 eindex->objects[eindex->count] = object; 1103 eindex->hashes[eindex->count] = pack_name_hash(name); 1104 kh_value(eindex->positions, hash_pos) = bitmap_pos; 1105 eindex->count++; 1106 } else { 1107 bitmap_pos = kh_value(eindex->positions, hash_pos); 1108 } 1109 1110 return bitmap_pos + bitmap_num_objects_total(bitmap_git); 1111} 1112 1113struct bitmap_show_data { 1114 struct bitmap_index *bitmap_git; 1115 struct bitmap *base; 1116}; 1117 1118static void show_object(struct object *object, const char *name, void *data_) 1119{ 1120 struct bitmap_show_data *data = data_; 1121 int bitmap_pos; 1122 1123 bitmap_pos = bitmap_position(data->bitmap_git, &object->oid); 1124 1125 if (bitmap_pos < 0) 1126 bitmap_pos = ext_index_add_object(data->bitmap_git, object, 1127 name); 1128 1129 bitmap_set(data->base, bitmap_pos); 1130} 1131 1132static void show_commit(struct commit *commit UNUSED, 1133 void *data UNUSED) 1134{ 1135} 1136 1137static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git, 1138 struct bitmap *result, 1139 struct commit *commit, 1140 uint32_t commit_pos) 1141{ 1142 struct bitmap_index *curr = bitmap_git; 1143 int ret = 0; 1144 1145 while (curr) { 1146 ret += apply_pseudo_merges_for_commit(&curr->pseudo_merges, 1147 result, commit, 1148 commit_pos); 1149 curr = curr->base; 1150 } 1151 1152 if (ret) 1153 pseudo_merges_satisfied_nr += ret; 1154 1155 return ret; 1156} 1157 1158static int add_to_include_set(struct bitmap_index *bitmap_git, 1159 struct include_data *data, 1160 struct commit *commit, 1161 int bitmap_pos) 1162{ 1163 struct ewah_bitmap *partial; 1164 1165 if (data->seen && bitmap_get(data->seen, bitmap_pos)) 1166 return 0; 1167 1168 if (bitmap_get(data->base, bitmap_pos)) 1169 return 0; 1170 1171 partial = bitmap_for_commit(bitmap_git, commit); 1172 if (partial) { 1173 existing_bitmaps_hits_nr++; 1174 1175 bitmap_or_ewah(data->base, partial); 1176 return 0; 1177 } 1178 1179 existing_bitmaps_misses_nr++; 1180 1181 bitmap_set(data->base, bitmap_pos); 1182 if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit, 1183 bitmap_pos)) 1184 return 0; 1185 1186 return 1; 1187} 1188 1189static int should_include(struct commit *commit, void *_data) 1190{ 1191 struct include_data *data = _data; 1192 int bitmap_pos; 1193 1194 bitmap_pos = bitmap_position(data->bitmap_git, &commit->object.oid); 1195 if (bitmap_pos < 0) 1196 bitmap_pos = ext_index_add_object(data->bitmap_git, 1197 (struct object *)commit, 1198 NULL); 1199 1200 if (!add_to_include_set(data->bitmap_git, data, commit, bitmap_pos)) { 1201 struct commit_list *parent = commit->parents; 1202 1203 while (parent) { 1204 parent->item->object.flags |= SEEN; 1205 parent = parent->next; 1206 } 1207 1208 return 0; 1209 } 1210 1211 return 1; 1212} 1213 1214static int should_include_obj(struct object *obj, void *_data) 1215{ 1216 struct include_data *data = _data; 1217 int bitmap_pos; 1218 1219 bitmap_pos = bitmap_position(data->bitmap_git, &obj->oid); 1220 if (bitmap_pos < 0) 1221 return 1; 1222 if ((data->seen && bitmap_get(data->seen, bitmap_pos)) || 1223 bitmap_get(data->base, bitmap_pos)) { 1224 obj->flags |= SEEN; 1225 return 0; 1226 } 1227 return 1; 1228} 1229 1230static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, 1231 struct bitmap **base, 1232 struct commit *commit) 1233{ 1234 struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit); 1235 1236 if (!or_with) { 1237 existing_bitmaps_misses_nr++; 1238 return 0; 1239 } 1240 1241 existing_bitmaps_hits_nr++; 1242 1243 if (!*base) 1244 *base = ewah_to_bitmap(or_with); 1245 else 1246 bitmap_or_ewah(*base, or_with); 1247 1248 return 1; 1249} 1250 1251static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git, 1252 struct rev_info *revs, 1253 struct bitmap *base, 1254 struct bitmap *seen) 1255{ 1256 struct include_data incdata; 1257 struct bitmap_show_data show_data; 1258 1259 if (!base) 1260 base = bitmap_new(); 1261 1262 incdata.bitmap_git = bitmap_git; 1263 incdata.base = base; 1264 incdata.seen = seen; 1265 1266 revs->include_check = should_include; 1267 revs->include_check_obj = should_include_obj; 1268 revs->include_check_data = &incdata; 1269 1270 if (prepare_revision_walk(revs)) 1271 die(_("revision walk setup failed")); 1272 1273 show_data.bitmap_git = bitmap_git; 1274 show_data.base = base; 1275 1276 traverse_commit_list(revs, show_commit, show_object, &show_data); 1277 1278 revs->include_check = NULL; 1279 revs->include_check_obj = NULL; 1280 revs->include_check_data = NULL; 1281 1282 return base; 1283} 1284 1285struct bitmap_boundary_cb { 1286 struct bitmap_index *bitmap_git; 1287 struct bitmap *base; 1288 1289 struct object_array boundary; 1290}; 1291 1292static void show_boundary_commit(struct commit *commit, void *_data) 1293{ 1294 struct bitmap_boundary_cb *data = _data; 1295 1296 if (commit->object.flags & BOUNDARY) 1297 add_object_array(&commit->object, "", &data->boundary); 1298 1299 if (commit->object.flags & UNINTERESTING) { 1300 if (bitmap_walk_contains(data->bitmap_git, data->base, 1301 &commit->object.oid)) 1302 return; 1303 1304 add_commit_to_bitmap(data->bitmap_git, &data->base, commit); 1305 } 1306} 1307 1308static void show_boundary_object(struct object *object UNUSED, 1309 const char *name UNUSED, 1310 void *data UNUSED) 1311{ 1312 BUG("should not be called"); 1313} 1314 1315static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git, 1316 struct bitmap *result, 1317 struct bitmap *roots) 1318{ 1319 int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges, 1320 result, roots); 1321 if (ret) { 1322 pseudo_merges_cascades_nr++; 1323 pseudo_merges_satisfied_nr += ret; 1324 } 1325 1326 return ret; 1327} 1328 1329static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, 1330 struct rev_info *revs, 1331 struct object_list *roots) 1332{ 1333 struct bitmap_boundary_cb cb; 1334 struct object_list *root; 1335 struct repository *repo; 1336 unsigned int i; 1337 unsigned int tmp_blobs, tmp_trees, tmp_tags; 1338 int any_missing = 0; 1339 int existing_bitmaps = 0; 1340 1341 cb.bitmap_git = bitmap_git; 1342 cb.base = bitmap_new(); 1343 object_array_init(&cb.boundary); 1344 1345 repo = bitmap_repo(bitmap_git); 1346 1347 revs->ignore_missing_links = 1; 1348 1349 if (bitmap_git->pseudo_merges.nr) { 1350 struct bitmap *roots_bitmap = bitmap_new(); 1351 struct object_list *objects = NULL; 1352 1353 for (objects = roots; objects; objects = objects->next) { 1354 struct object *object = objects->item; 1355 int pos; 1356 1357 pos = bitmap_position(bitmap_git, &object->oid); 1358 if (pos < 0) 1359 continue; 1360 1361 bitmap_set(roots_bitmap, pos); 1362 } 1363 1364 cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap); 1365 bitmap_free(roots_bitmap); 1366 } 1367 1368 /* 1369 * OR in any existing reachability bitmaps among `roots` into 1370 * `cb.base`. 1371 */ 1372 for (root = roots; root; root = root->next) { 1373 struct object *object = root->item; 1374 if (object->type != OBJ_COMMIT || 1375 bitmap_walk_contains(bitmap_git, cb.base, &object->oid)) 1376 continue; 1377 1378 if (add_commit_to_bitmap(bitmap_git, &cb.base, 1379 (struct commit *)object)) { 1380 existing_bitmaps = 1; 1381 continue; 1382 } 1383 1384 any_missing = 1; 1385 } 1386 1387 if (!any_missing) 1388 goto cleanup; 1389 1390 if (existing_bitmaps) 1391 cascade_pseudo_merges_1(bitmap_git, cb.base, NULL); 1392 1393 tmp_blobs = revs->blob_objects; 1394 tmp_trees = revs->tree_objects; 1395 tmp_tags = revs->tag_objects; 1396 revs->blob_objects = 0; 1397 revs->tree_objects = 0; 1398 revs->tag_objects = 0; 1399 1400 /* 1401 * We didn't have complete coverage of the roots. First setup a 1402 * revision walk to (a) OR in any bitmaps that are UNINTERESTING 1403 * between the tips and boundary, and (b) record the boundary. 1404 */ 1405 trace2_region_enter("pack-bitmap", "boundary-prepare", repo); 1406 if (prepare_revision_walk(revs)) 1407 die("revision walk setup failed"); 1408 trace2_region_leave("pack-bitmap", "boundary-prepare", repo); 1409 1410 trace2_region_enter("pack-bitmap", "boundary-traverse", repo); 1411 revs->boundary = 1; 1412 traverse_commit_list_filtered(revs, 1413 show_boundary_commit, 1414 show_boundary_object, 1415 &cb, NULL); 1416 revs->boundary = 0; 1417 trace2_region_leave("pack-bitmap", "boundary-traverse", repo); 1418 1419 revs->blob_objects = tmp_blobs; 1420 revs->tree_objects = tmp_trees; 1421 revs->tag_objects = tmp_tags; 1422 1423 reset_revision_walk(); 1424 clear_object_flags(repo, UNINTERESTING); 1425 1426 /* 1427 * Then add the boundary commit(s) as fill-in traversal tips. 1428 */ 1429 trace2_region_enter("pack-bitmap", "boundary-fill-in", repo); 1430 for (i = 0; i < cb.boundary.nr; i++) { 1431 struct object *obj = cb.boundary.objects[i].item; 1432 if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid)) 1433 obj->flags |= SEEN; 1434 else 1435 add_pending_object(revs, obj, ""); 1436 } 1437 if (revs->pending.nr) 1438 cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL); 1439 trace2_region_leave("pack-bitmap", "boundary-fill-in", repo); 1440 1441cleanup: 1442 object_array_clear(&cb.boundary); 1443 revs->ignore_missing_links = 0; 1444 1445 return cb.base; 1446} 1447 1448struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, 1449 struct commit *commit) 1450{ 1451 struct commit_list *p; 1452 struct bitmap *parents; 1453 struct pseudo_merge *match = NULL; 1454 1455 if (!bitmap_git->pseudo_merges.nr) 1456 return NULL; 1457 1458 parents = bitmap_new(); 1459 1460 for (p = commit->parents; p; p = p->next) { 1461 int pos = bitmap_position(bitmap_git, &p->item->object.oid); 1462 if (pos < 0 || pos >= bitmap_num_objects(bitmap_git)) 1463 goto done; 1464 1465 /* 1466 * Use bitmap-relative positions instead of offsetting 1467 * by bitmap_git->num_objects_in_base because we use 1468 * this to find a match in pseudo_merge_for_parents(), 1469 * and pseudo-merge groups cannot span multiple bitmap 1470 * layers. 1471 */ 1472 bitmap_set(parents, pos); 1473 } 1474 1475 match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, parents); 1476 1477done: 1478 bitmap_free(parents); 1479 if (match) 1480 return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match); 1481 1482 return NULL; 1483} 1484 1485static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git) 1486{ 1487 uint32_t i; 1488 for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) 1489 bitmap_git->pseudo_merges.v[i].satisfied = 0; 1490} 1491 1492static struct bitmap *find_objects(struct bitmap_index *bitmap_git, 1493 struct rev_info *revs, 1494 struct object_list *roots, 1495 struct bitmap *seen) 1496{ 1497 struct bitmap *base = NULL; 1498 int needs_walk = 0; 1499 unsigned existing_bitmaps = 0; 1500 1501 struct object_list *not_mapped = NULL; 1502 1503 unsatisfy_all_pseudo_merges(bitmap_git); 1504 1505 if (bitmap_git->pseudo_merges.nr) { 1506 struct bitmap *roots_bitmap = bitmap_new(); 1507 struct object_list *objects = NULL; 1508 1509 for (objects = roots; objects; objects = objects->next) { 1510 struct object *object = objects->item; 1511 int pos; 1512 1513 pos = bitmap_position(bitmap_git, &object->oid); 1514 if (pos < 0) 1515 continue; 1516 1517 bitmap_set(roots_bitmap, pos); 1518 } 1519 1520 base = bitmap_new(); 1521 cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap); 1522 bitmap_free(roots_bitmap); 1523 } 1524 1525 /* 1526 * Go through all the roots for the walk. The ones that have bitmaps 1527 * on the bitmap index will be `or`ed together to form an initial 1528 * global reachability analysis. 1529 * 1530 * The ones without bitmaps in the index will be stored in the 1531 * `not_mapped_list` for further processing. 1532 */ 1533 while (roots) { 1534 struct object *object = roots->item; 1535 1536 roots = roots->next; 1537 1538 if (base) { 1539 int pos = bitmap_position(bitmap_git, &object->oid); 1540 if (pos > 0 && bitmap_get(base, pos)) { 1541 object->flags |= SEEN; 1542 continue; 1543 } 1544 } 1545 1546 if (object->type == OBJ_COMMIT && 1547 add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) { 1548 object->flags |= SEEN; 1549 existing_bitmaps = 1; 1550 continue; 1551 } 1552 1553 object_list_insert(object, &not_mapped); 1554 } 1555 1556 /* 1557 * Best case scenario: We found bitmaps for all the roots, 1558 * so the resulting `or` bitmap has the full reachability analysis 1559 */ 1560 if (!not_mapped) 1561 return base; 1562 1563 roots = not_mapped; 1564 1565 if (existing_bitmaps) 1566 cascade_pseudo_merges_1(bitmap_git, base, NULL); 1567 1568 /* 1569 * Let's iterate through all the roots that don't have bitmaps to 1570 * check if we can determine them to be reachable from the existing 1571 * global bitmap. 1572 * 1573 * If we cannot find them in the existing global bitmap, we'll need 1574 * to push them to an actual walk and run it until we can confirm 1575 * they are reachable 1576 */ 1577 while (roots) { 1578 struct object *object = roots->item; 1579 int pos; 1580 1581 roots = roots->next; 1582 pos = bitmap_position(bitmap_git, &object->oid); 1583 1584 if (pos < 0 || base == NULL || !bitmap_get(base, pos)) { 1585 object->flags &= ~UNINTERESTING; 1586 add_pending_object(revs, object, ""); 1587 needs_walk = 1; 1588 1589 roots_without_bitmaps_nr++; 1590 } else { 1591 object->flags |= SEEN; 1592 1593 roots_with_bitmaps_nr++; 1594 } 1595 } 1596 1597 if (needs_walk) { 1598 /* 1599 * This fill-in traversal may walk over some objects 1600 * again, since we have already traversed in order to 1601 * find the boundary. 1602 * 1603 * But this extra walk should be extremely cheap, since 1604 * all commit objects are loaded into memory, and 1605 * because we skip walking to parents that are 1606 * UNINTERESTING, since it will be marked in the haves 1607 * bitmap already (or it has an on-disk bitmap, since 1608 * OR-ing it in covers all of its ancestors). 1609 */ 1610 base = fill_in_bitmap(bitmap_git, revs, base, seen); 1611 } 1612 1613 object_list_free(&not_mapped); 1614 1615 return base; 1616} 1617 1618static void show_extended_objects(struct bitmap_index *bitmap_git, 1619 struct rev_info *revs, 1620 show_reachable_fn show_reach) 1621{ 1622 struct bitmap *objects = bitmap_git->result; 1623 struct eindex *eindex = &bitmap_git->ext_index; 1624 uint32_t i; 1625 1626 for (i = 0; i < eindex->count; ++i) { 1627 struct object *obj; 1628 1629 if (!bitmap_get(objects, 1630 st_add(bitmap_num_objects_total(bitmap_git), 1631 i))) 1632 continue; 1633 1634 obj = eindex->objects[i]; 1635 if ((obj->type == OBJ_BLOB && !revs->blob_objects) || 1636 (obj->type == OBJ_TREE && !revs->tree_objects) || 1637 (obj->type == OBJ_TAG && !revs->tag_objects)) 1638 continue; 1639 1640 show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL); 1641 } 1642} 1643 1644static void init_type_iterator(struct ewah_or_iterator *it, 1645 struct bitmap_index *bitmap_git, 1646 enum object_type type) 1647{ 1648 switch (type) { 1649 case OBJ_COMMIT: 1650 ewah_or_iterator_init(it, bitmap_git->commits_all, 1651 bitmap_git->base_nr + 1); 1652 break; 1653 1654 case OBJ_TREE: 1655 ewah_or_iterator_init(it, bitmap_git->trees_all, 1656 bitmap_git->base_nr + 1); 1657 break; 1658 1659 case OBJ_BLOB: 1660 ewah_or_iterator_init(it, bitmap_git->blobs_all, 1661 bitmap_git->base_nr + 1); 1662 break; 1663 1664 case OBJ_TAG: 1665 ewah_or_iterator_init(it, bitmap_git->tags_all, 1666 bitmap_git->base_nr + 1); 1667 break; 1668 1669 default: 1670 BUG("object type %d not stored by bitmap type index", type); 1671 break; 1672 } 1673} 1674 1675static void show_objects_for_type( 1676 struct bitmap_index *bitmap_git, 1677 struct bitmap *objects, 1678 enum object_type object_type, 1679 show_reachable_fn show_reach, 1680 void *payload) 1681{ 1682 size_t i = 0; 1683 uint32_t offset; 1684 1685 struct ewah_or_iterator it; 1686 eword_t filter; 1687 1688 init_type_iterator(&it, bitmap_git, object_type); 1689 1690 for (i = 0; i < objects->word_alloc && 1691 ewah_or_iterator_next(&filter, &it); i++) { 1692 eword_t word = objects->words[i] & filter; 1693 size_t pos = (i * BITS_IN_EWORD); 1694 1695 if (!word) 1696 continue; 1697 1698 for (offset = 0; offset < BITS_IN_EWORD; ++offset) { 1699 struct packed_git *pack; 1700 struct object_id oid; 1701 uint32_t hash = 0, index_pos; 1702 off_t ofs; 1703 1704 if ((word >> offset) == 0) 1705 break; 1706 1707 offset += ewah_bit_ctz64(word >> offset); 1708 1709 if (bitmap_is_midx(bitmap_git)) { 1710 struct multi_pack_index *m = bitmap_git->midx; 1711 uint32_t pack_id; 1712 1713 index_pos = pack_pos_to_midx(m, pos + offset); 1714 ofs = nth_midxed_offset(m, index_pos); 1715 nth_midxed_object_oid(&oid, m, index_pos); 1716 1717 pack_id = nth_midxed_pack_int_id(m, index_pos); 1718 pack = nth_midxed_pack(bitmap_git->midx, pack_id); 1719 } else { 1720 index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); 1721 ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); 1722 nth_bitmap_object_oid(bitmap_git, &oid, index_pos); 1723 1724 pack = bitmap_git->pack; 1725 } 1726 1727 if (bitmap_git->hashes) 1728 hash = get_be32(bitmap_git->hashes + index_pos); 1729 1730 show_reach(&oid, object_type, 0, hash, pack, ofs, payload); 1731 } 1732 } 1733 1734 ewah_or_iterator_release(&it); 1735} 1736 1737static int in_bitmapped_pack(struct bitmap_index *bitmap_git, 1738 struct object_list *roots) 1739{ 1740 while (roots) { 1741 struct object *object = roots->item; 1742 roots = roots->next; 1743 1744 if (bitmap_is_midx(bitmap_git)) { 1745 if (bsearch_midx(&object->oid, bitmap_git->midx, NULL)) 1746 return 1; 1747 } else { 1748 if (find_pack_entry_one(&object->oid, bitmap_git->pack) > 0) 1749 return 1; 1750 } 1751 } 1752 1753 return 0; 1754} 1755 1756static struct bitmap *find_tip_objects(struct bitmap_index *bitmap_git, 1757 struct object_list *tip_objects, 1758 enum object_type type) 1759{ 1760 struct bitmap *result = bitmap_new(); 1761 struct object_list *p; 1762 1763 for (p = tip_objects; p; p = p->next) { 1764 int pos; 1765 1766 if (p->item->type != type) 1767 continue; 1768 1769 pos = bitmap_position(bitmap_git, &p->item->oid); 1770 if (pos < 0) 1771 continue; 1772 1773 bitmap_set(result, pos); 1774 } 1775 1776 return result; 1777} 1778 1779static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, 1780 struct object_list *tip_objects, 1781 struct bitmap *to_filter, 1782 enum object_type type) 1783{ 1784 struct eindex *eindex = &bitmap_git->ext_index; 1785 struct bitmap *tips; 1786 struct ewah_or_iterator it; 1787 eword_t mask; 1788 uint32_t i; 1789 1790 /* 1791 * The non-bitmap version of this filter never removes 1792 * objects which the other side specifically asked for, 1793 * so we must match that behavior. 1794 */ 1795 tips = find_tip_objects(bitmap_git, tip_objects, type); 1796 1797 /* 1798 * We can use the type-level bitmap for 'type' to work in whole 1799 * words for the objects that are actually in the bitmapped 1800 * packfile. 1801 */ 1802 for (i = 0, init_type_iterator(&it, bitmap_git, type); 1803 i < to_filter->word_alloc && ewah_or_iterator_next(&mask, &it); 1804 i++) { 1805 if (i < tips->word_alloc) 1806 mask &= ~tips->words[i]; 1807 to_filter->words[i] &= ~mask; 1808 } 1809 1810 /* 1811 * Clear any objects that weren't in the packfile (and so would 1812 * not have been caught by the loop above. We'll have to check 1813 * them individually. 1814 */ 1815 for (i = 0; i < eindex->count; i++) { 1816 size_t pos = st_add(i, bitmap_num_objects_total(bitmap_git)); 1817 if (eindex->objects[i]->type == type && 1818 bitmap_get(to_filter, pos) && 1819 !bitmap_get(tips, pos)) 1820 bitmap_unset(to_filter, pos); 1821 } 1822 1823 ewah_or_iterator_release(&it); 1824 bitmap_free(tips); 1825} 1826 1827static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, 1828 struct object_list *tip_objects, 1829 struct bitmap *to_filter) 1830{ 1831 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, 1832 OBJ_BLOB); 1833} 1834 1835static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, 1836 uint32_t pos) 1837{ 1838 unsigned long size; 1839 struct object_info oi = OBJECT_INFO_INIT; 1840 1841 oi.sizep = &size; 1842 1843 if (pos < bitmap_num_objects_total(bitmap_git)) { 1844 struct packed_git *pack; 1845 off_t ofs; 1846 1847 if (bitmap_is_midx(bitmap_git)) { 1848 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos); 1849 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); 1850 1851 pack = nth_midxed_pack(bitmap_git->midx, pack_id); 1852 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); 1853 } else { 1854 pack = bitmap_git->pack; 1855 ofs = pack_pos_to_offset(pack, pos); 1856 } 1857 1858 if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs, 1859 &oi) < 0) { 1860 struct object_id oid; 1861 nth_bitmap_object_oid(bitmap_git, &oid, 1862 pack_pos_to_index(pack, pos)); 1863 die(_("unable to get size of %s"), oid_to_hex(&oid)); 1864 } 1865 } else { 1866 size_t eindex_pos = pos - bitmap_num_objects_total(bitmap_git); 1867 struct eindex *eindex = &bitmap_git->ext_index; 1868 struct object *obj = eindex->objects[eindex_pos]; 1869 if (odb_read_object_info_extended(bitmap_repo(bitmap_git)->objects, &obj->oid, 1870 &oi, 0) < 0) 1871 die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); 1872 } 1873 1874 return size; 1875} 1876 1877static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, 1878 struct object_list *tip_objects, 1879 struct bitmap *to_filter, 1880 unsigned long limit) 1881{ 1882 struct eindex *eindex = &bitmap_git->ext_index; 1883 struct bitmap *tips; 1884 struct ewah_or_iterator it; 1885 eword_t mask; 1886 uint32_t i; 1887 1888 tips = find_tip_objects(bitmap_git, tip_objects, OBJ_BLOB); 1889 1890 for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); 1891 i < to_filter->word_alloc && ewah_or_iterator_next(&mask, &it); 1892 i++) { 1893 eword_t word = to_filter->words[i] & mask; 1894 unsigned offset; 1895 1896 for (offset = 0; offset < BITS_IN_EWORD; offset++) { 1897 uint32_t pos; 1898 1899 if ((word >> offset) == 0) 1900 break; 1901 offset += ewah_bit_ctz64(word >> offset); 1902 pos = i * BITS_IN_EWORD + offset; 1903 1904 if (!bitmap_get(tips, pos) && 1905 get_size_by_pos(bitmap_git, pos) >= limit) 1906 bitmap_unset(to_filter, pos); 1907 } 1908 } 1909 1910 for (i = 0; i < eindex->count; i++) { 1911 size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); 1912 if (eindex->objects[i]->type == OBJ_BLOB && 1913 bitmap_get(to_filter, pos) && 1914 !bitmap_get(tips, pos) && 1915 get_size_by_pos(bitmap_git, pos) >= limit) 1916 bitmap_unset(to_filter, pos); 1917 } 1918 1919 ewah_or_iterator_release(&it); 1920 bitmap_free(tips); 1921} 1922 1923static void filter_bitmap_tree_depth(struct bitmap_index *bitmap_git, 1924 struct object_list *tip_objects, 1925 struct bitmap *to_filter, 1926 unsigned long limit) 1927{ 1928 if (limit) 1929 BUG("filter_bitmap_tree_depth given non-zero limit"); 1930 1931 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, 1932 OBJ_TREE); 1933 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, 1934 OBJ_BLOB); 1935} 1936 1937static void filter_bitmap_object_type(struct bitmap_index *bitmap_git, 1938 struct object_list *tip_objects, 1939 struct bitmap *to_filter, 1940 enum object_type object_type) 1941{ 1942 if (object_type < OBJ_COMMIT || object_type > OBJ_TAG) 1943 BUG("filter_bitmap_object_type given invalid object"); 1944 1945 if (object_type != OBJ_TAG) 1946 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TAG); 1947 if (object_type != OBJ_COMMIT) 1948 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_COMMIT); 1949 if (object_type != OBJ_TREE) 1950 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TREE); 1951 if (object_type != OBJ_BLOB) 1952 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_BLOB); 1953} 1954 1955static int filter_bitmap(struct bitmap_index *bitmap_git, 1956 struct object_list *tip_objects, 1957 struct bitmap *to_filter, 1958 struct list_objects_filter_options *filter) 1959{ 1960 if (!filter || filter->choice == LOFC_DISABLED) 1961 return 0; 1962 1963 if (filter->choice == LOFC_BLOB_NONE) { 1964 if (bitmap_git) 1965 filter_bitmap_blob_none(bitmap_git, tip_objects, 1966 to_filter); 1967 return 0; 1968 } 1969 1970 if (filter->choice == LOFC_BLOB_LIMIT) { 1971 if (bitmap_git) 1972 filter_bitmap_blob_limit(bitmap_git, tip_objects, 1973 to_filter, 1974 filter->blob_limit_value); 1975 return 0; 1976 } 1977 1978 if (filter->choice == LOFC_TREE_DEPTH && 1979 filter->tree_exclude_depth == 0) { 1980 if (bitmap_git) 1981 filter_bitmap_tree_depth(bitmap_git, tip_objects, 1982 to_filter, 1983 filter->tree_exclude_depth); 1984 return 0; 1985 } 1986 1987 if (filter->choice == LOFC_OBJECT_TYPE) { 1988 if (bitmap_git) 1989 filter_bitmap_object_type(bitmap_git, tip_objects, 1990 to_filter, 1991 filter->object_type); 1992 return 0; 1993 } 1994 1995 if (filter->choice == LOFC_COMBINE) { 1996 int i; 1997 for (i = 0; i < filter->sub_nr; i++) { 1998 if (filter_bitmap(bitmap_git, tip_objects, to_filter, 1999 &filter->sub[i]) < 0) 2000 return -1; 2001 } 2002 return 0; 2003 } 2004 2005 /* filter choice not handled */ 2006 return -1; 2007} 2008 2009static int can_filter_bitmap(struct list_objects_filter_options *filter) 2010{ 2011 return !filter_bitmap(NULL, NULL, NULL, filter); 2012} 2013 2014 2015static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git, 2016 struct bitmap *result) 2017{ 2018 struct eindex *eindex = &bitmap_git->ext_index; 2019 uint32_t objects_nr; 2020 size_t i, pos; 2021 2022 objects_nr = bitmap_num_objects_total(bitmap_git); 2023 pos = objects_nr / BITS_IN_EWORD; 2024 2025 if (pos > result->word_alloc) 2026 pos = result->word_alloc; 2027 2028 memset(result->words, 0x00, sizeof(eword_t) * pos); 2029 for (i = pos * BITS_IN_EWORD; i < objects_nr; i++) 2030 bitmap_unset(result, i); 2031 2032 for (i = 0; i < eindex->count; ++i) { 2033 if (has_object_pack(bitmap_repo(bitmap_git), 2034 &eindex->objects[i]->oid)) 2035 bitmap_unset(result, objects_nr + i); 2036 } 2037} 2038 2039int for_each_bitmapped_object(struct bitmap_index *bitmap_git, 2040 struct list_objects_filter_options *filter, 2041 show_reachable_fn show_reach, 2042 void *payload) 2043{ 2044 struct bitmap *filtered_bitmap = NULL; 2045 uint32_t objects_nr; 2046 size_t full_word_count; 2047 int ret; 2048 2049 if (!can_filter_bitmap(filter)) { 2050 ret = -1; 2051 goto out; 2052 } 2053 2054 objects_nr = bitmap_num_objects(bitmap_git); 2055 full_word_count = objects_nr / BITS_IN_EWORD; 2056 2057 /* We start from the all-1 bitmap and then filter down from there. */ 2058 filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD)); 2059 memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words)); 2060 for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++) 2061 bitmap_set(filtered_bitmap, i); 2062 2063 if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) { 2064 ret = -1; 2065 goto out; 2066 } 2067 2068 show_objects_for_type(bitmap_git, filtered_bitmap, 2069 OBJ_COMMIT, show_reach, payload); 2070 show_objects_for_type(bitmap_git, filtered_bitmap, 2071 OBJ_TREE, show_reach, payload); 2072 show_objects_for_type(bitmap_git, filtered_bitmap, 2073 OBJ_BLOB, show_reach, payload); 2074 show_objects_for_type(bitmap_git, filtered_bitmap, 2075 OBJ_TAG, show_reach, payload); 2076 2077 ret = 0; 2078out: 2079 bitmap_free(filtered_bitmap); 2080 return ret; 2081} 2082 2083struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, 2084 int filter_provided_objects) 2085{ 2086 unsigned int i; 2087 int use_boundary_traversal; 2088 2089 struct object_list *wants = NULL; 2090 struct object_list *haves = NULL; 2091 2092 struct bitmap *wants_bitmap = NULL; 2093 struct bitmap *haves_bitmap = NULL; 2094 2095 struct bitmap_index *bitmap_git; 2096 struct repository *repo; 2097 2098 /* 2099 * We can't do pathspec limiting with bitmaps, because we don't know 2100 * which commits are associated with which object changes (let alone 2101 * even which objects are associated with which paths). 2102 */ 2103 if (revs->prune) 2104 return NULL; 2105 2106 if (!can_filter_bitmap(&revs->filter)) 2107 return NULL; 2108 2109 /* try to open a bitmapped pack, but don't parse it yet 2110 * because we may not need to use it */ 2111 CALLOC_ARRAY(bitmap_git, 1); 2112 if (open_bitmap(revs->repo, bitmap_git) < 0) 2113 goto cleanup; 2114 2115 for (i = 0; i < revs->pending.nr; ++i) { 2116 struct object *object = revs->pending.objects[i].item; 2117 2118 if (object->type == OBJ_NONE) 2119 parse_object_or_die(revs->repo, &object->oid, NULL); 2120 2121 while (object->type == OBJ_TAG) { 2122 struct tag *tag = (struct tag *) object; 2123 2124 if (object->flags & UNINTERESTING) 2125 object_list_insert(object, &haves); 2126 else 2127 object_list_insert(object, &wants); 2128 2129 object = parse_object_or_die(revs->repo, get_tagged_oid(tag), NULL); 2130 object->flags |= (tag->object.flags & UNINTERESTING); 2131 } 2132 2133 if (object->flags & UNINTERESTING) 2134 object_list_insert(object, &haves); 2135 else 2136 object_list_insert(object, &wants); 2137 } 2138 2139 use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1); 2140 if (use_boundary_traversal < 0) { 2141 prepare_repo_settings(revs->repo); 2142 use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal; 2143 } 2144 2145 if (!use_boundary_traversal) { 2146 /* 2147 * if we have a HAVES list, but none of those haves is contained 2148 * in the packfile that has a bitmap, we don't have anything to 2149 * optimize here 2150 */ 2151 if (haves && !in_bitmapped_pack(bitmap_git, haves)) 2152 goto cleanup; 2153 } 2154 2155 /* if we don't want anything, we're done here */ 2156 if (!wants) 2157 goto cleanup; 2158 2159 /* 2160 * now we're going to use bitmaps, so load the actual bitmap entries 2161 * from disk. this is the point of no return; after this the rev_list 2162 * becomes invalidated and we must perform the revwalk through bitmaps 2163 */ 2164 if (load_bitmap(revs->repo, bitmap_git, 0) < 0) 2165 goto cleanup; 2166 2167 if (!use_boundary_traversal) 2168 object_array_clear(&revs->pending); 2169 2170 repo = bitmap_repo(bitmap_git); 2171 2172 if (haves) { 2173 if (use_boundary_traversal) { 2174 trace2_region_enter("pack-bitmap", "haves/boundary", repo); 2175 haves_bitmap = find_boundary_objects(bitmap_git, revs, haves); 2176 trace2_region_leave("pack-bitmap", "haves/boundary", repo); 2177 } else { 2178 trace2_region_enter("pack-bitmap", "haves/classic", repo); 2179 revs->ignore_missing_links = 1; 2180 haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); 2181 reset_revision_walk(); 2182 revs->ignore_missing_links = 0; 2183 trace2_region_leave("pack-bitmap", "haves/classic", repo); 2184 } 2185 2186 if (!haves_bitmap) 2187 BUG("failed to perform bitmap walk"); 2188 } 2189 2190 if (use_boundary_traversal) { 2191 object_array_clear(&revs->pending); 2192 reset_revision_walk(); 2193 } 2194 2195 wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); 2196 2197 if (!wants_bitmap) 2198 BUG("failed to perform bitmap walk"); 2199 2200 if (haves_bitmap) 2201 bitmap_and_not(wants_bitmap, haves_bitmap); 2202 2203 filter_bitmap(bitmap_git, 2204 (revs->filter.choice && filter_provided_objects) ? NULL : wants, 2205 wants_bitmap, 2206 &revs->filter); 2207 2208 if (revs->unpacked) 2209 filter_packed_objects_from_bitmap(bitmap_git, wants_bitmap); 2210 2211 bitmap_git->result = wants_bitmap; 2212 bitmap_git->haves = haves_bitmap; 2213 2214 object_list_free(&wants); 2215 object_list_free(&haves); 2216 2217 trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied", 2218 pseudo_merges_satisfied_nr); 2219 trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades", 2220 pseudo_merges_cascades_nr); 2221 trace2_data_intmax("bitmap", repo, "bitmap/hits", 2222 existing_bitmaps_hits_nr); 2223 trace2_data_intmax("bitmap", repo, "bitmap/misses", 2224 existing_bitmaps_misses_nr); 2225 trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap", 2226 roots_with_bitmaps_nr); 2227 trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap", 2228 roots_without_bitmaps_nr); 2229 2230 return bitmap_git; 2231 2232cleanup: 2233 free_bitmap_index(bitmap_git); 2234 object_list_free(&wants); 2235 object_list_free(&haves); 2236 return NULL; 2237} 2238 2239/* 2240 * -1 means "stop trying further objects"; 0 means we may or may not have 2241 * reused, but you can keep feeding bits. 2242 */ 2243static int try_partial_reuse(struct bitmap_index *bitmap_git, 2244 struct bitmapped_pack *pack, 2245 size_t bitmap_pos, 2246 uint32_t pack_pos, 2247 off_t offset, 2248 struct bitmap *reuse, 2249 struct pack_window **w_curs) 2250{ 2251 off_t delta_obj_offset; 2252 enum object_type type; 2253 unsigned long size; 2254 2255 if (pack_pos >= pack->p->num_objects) 2256 return -1; /* not actually in the pack */ 2257 2258 delta_obj_offset = offset; 2259 type = unpack_object_header(pack->p, w_curs, &offset, &size); 2260 if (type < 0) 2261 return -1; /* broken packfile, punt */ 2262 2263 if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { 2264 off_t base_offset; 2265 uint32_t base_pos; 2266 uint32_t base_bitmap_pos; 2267 2268 /* 2269 * Find the position of the base object so we can look it up 2270 * in our bitmaps. If we can't come up with an offset, or if 2271 * that offset is not in the revidx, the pack is corrupt. 2272 * There's nothing we can do, so just punt on this object, 2273 * and the normal slow path will complain about it in 2274 * more detail. 2275 */ 2276 base_offset = get_delta_base(pack->p, w_curs, &offset, type, 2277 delta_obj_offset); 2278 if (!base_offset) 2279 return 0; 2280 2281 offset_to_pack_pos(pack->p, base_offset, &base_pos); 2282 2283 if (bitmap_is_midx(bitmap_git)) { 2284 /* 2285 * Cross-pack deltas are rejected for now, but could 2286 * theoretically be supported in the future. 2287 * 2288 * We would need to ensure that we're sending both 2289 * halves of the delta/base pair, regardless of whether 2290 * or not the two cross a pack boundary. If they do, 2291 * then we must convert the delta to an REF_DELTA to 2292 * refer back to the base in the other pack. 2293 * */ 2294 if (midx_pair_to_pack_pos(bitmap_git->midx, 2295 pack->pack_int_id, 2296 base_offset, 2297 &base_bitmap_pos) < 0) { 2298 return 0; 2299 } 2300 } else { 2301 if (offset_to_pack_pos(pack->p, base_offset, 2302 &base_pos) < 0) 2303 return 0; 2304 /* 2305 * We assume delta dependencies always point backwards. 2306 * This lets us do a single pass, and is basically 2307 * always true due to the way OFS_DELTAs work. You would 2308 * not typically find REF_DELTA in a bitmapped pack, 2309 * since we only bitmap packs we write fresh, and 2310 * OFS_DELTA is the default). But let's double check to 2311 * make sure the pack wasn't written with odd 2312 * parameters. 2313 */ 2314 if (base_pos >= pack_pos) 2315 return 0; 2316 base_bitmap_pos = pack->bitmap_pos + base_pos; 2317 } 2318 2319 /* 2320 * And finally, if we're not sending the base as part of our 2321 * reuse chunk, then don't send this object either. The base 2322 * would come after us, along with other objects not 2323 * necessarily in the pack, which means we'd need to convert 2324 * to REF_DELTA on the fly. Better to just let the normal 2325 * object_entry code path handle it. 2326 */ 2327 if (!bitmap_get(reuse, base_bitmap_pos)) 2328 return 0; 2329 } 2330 2331 /* 2332 * If we got here, then the object is OK to reuse. Mark it. 2333 */ 2334 bitmap_set(reuse, bitmap_pos); 2335 return 0; 2336} 2337 2338static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, 2339 struct bitmapped_pack *pack, 2340 struct bitmap *reuse) 2341{ 2342 struct bitmap *result = bitmap_git->result; 2343 struct pack_window *w_curs = NULL; 2344 size_t pos = pack->bitmap_pos / BITS_IN_EWORD; 2345 2346 if (!pack->bitmap_pos) { 2347 /* 2348 * If we're processing the first (in the case of a MIDX, the 2349 * preferred pack) or the only (in the case of single-pack 2350 * bitmaps) pack, then we can reuse whole words at a time. 2351 * 2352 * This is because we know that any deltas in this range *must* 2353 * have their bases chosen from the same pack, since: 2354 * 2355 * - In the single pack case, there is no other pack to choose 2356 * them from. 2357 * 2358 * - In the MIDX case, the first pack is the preferred pack, so 2359 * all ties are broken in favor of that pack (i.e. the one 2360 * we're currently processing). So any duplicate bases will be 2361 * resolved in favor of the pack we're processing. 2362 */ 2363 while (pos < result->word_alloc && 2364 pos < pack->bitmap_nr / BITS_IN_EWORD && 2365 result->words[pos] == (eword_t)~0) 2366 pos++; 2367 memset(reuse->words, 0xFF, pos * sizeof(eword_t)); 2368 } 2369 2370 for (; pos < result->word_alloc; pos++) { 2371 eword_t word = result->words[pos]; 2372 size_t offset; 2373 2374 for (offset = 0; offset < BITS_IN_EWORD; offset++) { 2375 size_t bit_pos; 2376 uint32_t pack_pos; 2377 off_t ofs; 2378 2379 if (word >> offset == 0) 2380 break; 2381 2382 offset += ewah_bit_ctz64(word >> offset); 2383 2384 bit_pos = pos * BITS_IN_EWORD + offset; 2385 if (bit_pos < pack->bitmap_pos) 2386 continue; 2387 if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) 2388 goto done; 2389 2390 if (bitmap_is_midx(bitmap_git)) { 2391 uint32_t midx_pos; 2392 2393 midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); 2394 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); 2395 2396 if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) 2397 BUG("could not find object in pack %s " 2398 "at offset %"PRIuMAX" in MIDX", 2399 pack_basename(pack->p), (uintmax_t)ofs); 2400 } else { 2401 pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); 2402 if (pack_pos >= pack->p->num_objects) 2403 BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", 2404 pack_basename(pack->p), (uintmax_t)pack_pos, 2405 pack->p->num_objects); 2406 2407 ofs = pack_pos_to_offset(pack->p, pack_pos); 2408 } 2409 2410 if (try_partial_reuse(bitmap_git, pack, bit_pos, 2411 pack_pos, ofs, reuse, &w_curs) < 0) { 2412 /* 2413 * try_partial_reuse indicated we couldn't reuse 2414 * any bits, so there is no point in trying more 2415 * bits in the current word, or any other words 2416 * in result. 2417 * 2418 * Jump out of both loops to avoid future 2419 * unnecessary calls to try_partial_reuse. 2420 */ 2421 goto done; 2422 } 2423 } 2424 } 2425 2426done: 2427 unuse_pack(&w_curs); 2428} 2429 2430static int bitmapped_pack_cmp(const void *va, const void *vb) 2431{ 2432 const struct bitmapped_pack *a = va; 2433 const struct bitmapped_pack *b = vb; 2434 2435 if (a->bitmap_pos < b->bitmap_pos) 2436 return -1; 2437 if (a->bitmap_pos > b->bitmap_pos) 2438 return 1; 2439 return 0; 2440} 2441 2442void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, 2443 struct bitmapped_pack **packs_out, 2444 size_t *packs_nr_out, 2445 struct bitmap **reuse_out, 2446 int multi_pack_reuse) 2447{ 2448 struct repository *r = bitmap_repo(bitmap_git); 2449 struct bitmapped_pack *packs = NULL; 2450 struct bitmap *result = bitmap_git->result; 2451 struct bitmap *reuse; 2452 size_t i; 2453 size_t packs_nr = 0, packs_alloc = 0; 2454 size_t word_alloc; 2455 uint32_t objects_nr = 0; 2456 2457 assert(result); 2458 2459 load_reverse_index(r, bitmap_git); 2460 2461 if (!bitmap_is_midx(bitmap_git) || !bitmap_git->midx->chunk_bitmapped_packs) 2462 multi_pack_reuse = 0; 2463 2464 if (multi_pack_reuse) { 2465 struct multi_pack_index *m = bitmap_git->midx; 2466 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { 2467 struct bitmapped_pack pack; 2468 if (nth_bitmapped_pack(bitmap_git->midx, &pack, i) < 0) { 2469 warning(_("unable to load pack: '%s', disabling pack-reuse"), 2470 bitmap_git->midx->pack_names[i]); 2471 free(packs); 2472 return; 2473 } 2474 2475 if (!pack.bitmap_nr) 2476 continue; 2477 2478 if (is_pack_valid(pack.p)) { 2479 ALLOC_GROW(packs, packs_nr + 1, packs_alloc); 2480 memcpy(&packs[packs_nr++], &pack, sizeof(pack)); 2481 } 2482 2483 objects_nr += pack.p->num_objects; 2484 } 2485 2486 QSORT(packs, packs_nr, bitmapped_pack_cmp); 2487 } else { 2488 struct packed_git *pack; 2489 uint32_t pack_int_id; 2490 2491 if (bitmap_is_midx(bitmap_git)) { 2492 struct multi_pack_index *m = bitmap_git->midx; 2493 uint32_t preferred_pack_pos; 2494 2495 while (m->base_midx) 2496 m = m->base_midx; 2497 2498 if (midx_preferred_pack(m, &preferred_pack_pos) < 0) { 2499 warning(_("unable to compute preferred pack, disabling pack-reuse")); 2500 return; 2501 } 2502 2503 pack = nth_midxed_pack(m, preferred_pack_pos); 2504 pack_int_id = preferred_pack_pos; 2505 } else { 2506 pack = bitmap_git->pack; 2507 /* 2508 * Any value for 'pack_int_id' will do here. When we 2509 * process the pack via try_partial_reuse(), we won't 2510 * use the `pack_int_id` field since we have a non-MIDX 2511 * bitmap. 2512 * 2513 * Use '-1' as a sentinel value to make it clear 2514 * that we do not expect to read this field. 2515 */ 2516 pack_int_id = -1; 2517 } 2518 2519 if (is_pack_valid(pack)) { 2520 ALLOC_GROW(packs, packs_nr + 1, packs_alloc); 2521 packs[packs_nr].p = pack; 2522 packs[packs_nr].pack_int_id = pack_int_id; 2523 packs[packs_nr].bitmap_nr = pack->num_objects; 2524 packs[packs_nr].bitmap_pos = 0; 2525 packs[packs_nr].from_midx = bitmap_git->midx; 2526 packs_nr++; 2527 } 2528 2529 objects_nr = pack->num_objects; 2530 } 2531 2532 if (!packs_nr) 2533 return; 2534 2535 word_alloc = objects_nr / BITS_IN_EWORD; 2536 if (objects_nr % BITS_IN_EWORD) 2537 word_alloc++; 2538 reuse = bitmap_word_alloc(word_alloc); 2539 2540 for (i = 0; i < packs_nr; i++) 2541 reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); 2542 2543 if (bitmap_is_empty(reuse)) { 2544 free(packs); 2545 bitmap_free(reuse); 2546 return; 2547 } 2548 2549 /* 2550 * Drop any reused objects from the result, since they will not 2551 * need to be handled separately. 2552 */ 2553 bitmap_and_not(result, reuse); 2554 *packs_out = packs; 2555 *packs_nr_out = packs_nr; 2556 *reuse_out = reuse; 2557} 2558 2559int bitmap_walk_contains(struct bitmap_index *bitmap_git, 2560 struct bitmap *bitmap, const struct object_id *oid) 2561{ 2562 int idx; 2563 2564 if (!bitmap) 2565 return 0; 2566 2567 idx = bitmap_position(bitmap_git, oid); 2568 return idx >= 0 && bitmap_get(bitmap, idx); 2569} 2570 2571void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, 2572 struct rev_info *revs, 2573 show_reachable_fn show_reachable) 2574{ 2575 assert(bitmap_git->result); 2576 2577 show_objects_for_type(bitmap_git, bitmap_git->result, 2578 OBJ_COMMIT, show_reachable, NULL); 2579 if (revs->tree_objects) 2580 show_objects_for_type(bitmap_git, bitmap_git->result, 2581 OBJ_TREE, show_reachable, NULL); 2582 if (revs->blob_objects) 2583 show_objects_for_type(bitmap_git, bitmap_git->result, 2584 OBJ_BLOB, show_reachable, NULL); 2585 if (revs->tag_objects) 2586 show_objects_for_type(bitmap_git, bitmap_git->result, 2587 OBJ_TAG, show_reachable, NULL); 2588 2589 show_extended_objects(bitmap_git, revs, show_reachable); 2590} 2591 2592static uint32_t count_object_type(struct bitmap_index *bitmap_git, 2593 enum object_type type) 2594{ 2595 struct bitmap *objects = bitmap_git->result; 2596 struct eindex *eindex = &bitmap_git->ext_index; 2597 2598 uint32_t i = 0, count = 0; 2599 struct ewah_or_iterator it; 2600 eword_t filter; 2601 2602 init_type_iterator(&it, bitmap_git, type); 2603 2604 while (i < objects->word_alloc && ewah_or_iterator_next(&filter, &it)) { 2605 eword_t word = objects->words[i++] & filter; 2606 count += ewah_bit_popcount64(word); 2607 } 2608 2609 for (i = 0; i < eindex->count; ++i) { 2610 if (eindex->objects[i]->type == type && 2611 bitmap_get(objects, 2612 st_add(bitmap_num_objects_total(bitmap_git), i))) 2613 count++; 2614 } 2615 2616 ewah_or_iterator_release(&it); 2617 2618 return count; 2619} 2620 2621void count_bitmap_commit_list(struct bitmap_index *bitmap_git, 2622 uint32_t *commits, uint32_t *trees, 2623 uint32_t *blobs, uint32_t *tags) 2624{ 2625 assert(bitmap_git->result); 2626 2627 if (commits) 2628 *commits = count_object_type(bitmap_git, OBJ_COMMIT); 2629 2630 if (trees) 2631 *trees = count_object_type(bitmap_git, OBJ_TREE); 2632 2633 if (blobs) 2634 *blobs = count_object_type(bitmap_git, OBJ_BLOB); 2635 2636 if (tags) 2637 *tags = count_object_type(bitmap_git, OBJ_TAG); 2638} 2639 2640struct bitmap_test_data { 2641 struct bitmap_index *bitmap_git; 2642 struct bitmap *base; 2643 struct bitmap *commits; 2644 struct bitmap *trees; 2645 struct bitmap *blobs; 2646 struct bitmap *tags; 2647 struct progress *prg; 2648 size_t seen; 2649 2650 struct bitmap_test_data *base_tdata; 2651}; 2652 2653static void test_bitmap_type(struct bitmap_test_data *tdata, 2654 struct object *obj, int pos) 2655{ 2656 enum object_type bitmap_type = OBJ_NONE; 2657 int bitmaps_nr = 0; 2658 2659 if (bitmap_is_midx(tdata->bitmap_git)) { 2660 while (pos < tdata->bitmap_git->midx->num_objects_in_base) 2661 tdata = tdata->base_tdata; 2662 } 2663 2664 if (bitmap_get(tdata->commits, pos)) { 2665 bitmap_type = OBJ_COMMIT; 2666 bitmaps_nr++; 2667 } 2668 if (bitmap_get(tdata->trees, pos)) { 2669 bitmap_type = OBJ_TREE; 2670 bitmaps_nr++; 2671 } 2672 if (bitmap_get(tdata->blobs, pos)) { 2673 bitmap_type = OBJ_BLOB; 2674 bitmaps_nr++; 2675 } 2676 if (bitmap_get(tdata->tags, pos)) { 2677 bitmap_type = OBJ_TAG; 2678 bitmaps_nr++; 2679 } 2680 2681 if (bitmap_type == OBJ_NONE) 2682 die(_("object '%s' not found in type bitmaps"), 2683 oid_to_hex(&obj->oid)); 2684 2685 if (bitmaps_nr > 1) 2686 die(_("object '%s' does not have a unique type"), 2687 oid_to_hex(&obj->oid)); 2688 2689 if (bitmap_type != obj->type) 2690 die(_("object '%s': real type '%s', expected: '%s'"), 2691 oid_to_hex(&obj->oid), 2692 type_name(obj->type), 2693 type_name(bitmap_type)); 2694} 2695 2696static void test_show_object(struct object *object, 2697 const char *name UNUSED, 2698 void *data) 2699{ 2700 struct bitmap_test_data *tdata = data; 2701 int bitmap_pos; 2702 2703 bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); 2704 if (bitmap_pos < 0) 2705 die(_("object not in bitmap: '%s'"), oid_to_hex(&object->oid)); 2706 test_bitmap_type(tdata, object, bitmap_pos); 2707 2708 bitmap_set(tdata->base, bitmap_pos); 2709 display_progress(tdata->prg, ++tdata->seen); 2710} 2711 2712static void test_show_commit(struct commit *commit, void *data) 2713{ 2714 struct bitmap_test_data *tdata = data; 2715 int bitmap_pos; 2716 2717 bitmap_pos = bitmap_position(tdata->bitmap_git, 2718 &commit->object.oid); 2719 if (bitmap_pos < 0) 2720 die(_("object not in bitmap: '%s'"), oid_to_hex(&commit->object.oid)); 2721 test_bitmap_type(tdata, &commit->object, bitmap_pos); 2722 2723 bitmap_set(tdata->base, bitmap_pos); 2724 display_progress(tdata->prg, ++tdata->seen); 2725} 2726 2727static uint32_t bitmap_total_entry_count(struct bitmap_index *bitmap_git) 2728{ 2729 uint32_t total = 0; 2730 do { 2731 total = st_add(total, bitmap_git->entry_count); 2732 bitmap_git = bitmap_git->base; 2733 } while (bitmap_git); 2734 2735 return total; 2736} 2737 2738static void bitmap_test_data_prepare(struct bitmap_test_data *tdata, 2739 struct bitmap_index *bitmap_git) 2740{ 2741 memset(tdata, 0, sizeof(struct bitmap_test_data)); 2742 2743 tdata->bitmap_git = bitmap_git; 2744 tdata->base = bitmap_new(); 2745 tdata->commits = ewah_to_bitmap(bitmap_git->commits); 2746 tdata->trees = ewah_to_bitmap(bitmap_git->trees); 2747 tdata->blobs = ewah_to_bitmap(bitmap_git->blobs); 2748 tdata->tags = ewah_to_bitmap(bitmap_git->tags); 2749 2750 if (bitmap_git->base) { 2751 tdata->base_tdata = xmalloc(sizeof(struct bitmap_test_data)); 2752 bitmap_test_data_prepare(tdata->base_tdata, bitmap_git->base); 2753 } 2754} 2755 2756static void bitmap_test_data_release(struct bitmap_test_data *tdata) 2757{ 2758 if (!tdata) 2759 return; 2760 2761 bitmap_test_data_release(tdata->base_tdata); 2762 free(tdata->base_tdata); 2763 2764 bitmap_free(tdata->base); 2765 bitmap_free(tdata->commits); 2766 bitmap_free(tdata->trees); 2767 bitmap_free(tdata->blobs); 2768 bitmap_free(tdata->tags); 2769} 2770 2771void test_bitmap_walk(struct rev_info *revs) 2772{ 2773 struct object *root; 2774 struct bitmap *result = NULL; 2775 size_t result_popcnt; 2776 struct bitmap_test_data tdata; 2777 struct bitmap_index *bitmap_git, *found; 2778 struct ewah_bitmap *bm; 2779 2780 if (!(bitmap_git = prepare_bitmap_git(revs->repo))) 2781 die(_("failed to load bitmap indexes")); 2782 2783 if (revs->pending.nr != 1) 2784 die(_("you must specify exactly one commit to test")); 2785 2786 fprintf_ln(stderr, "Bitmap v%d test (%d entries%s, %d total)", 2787 bitmap_git->version, 2788 bitmap_git->entry_count, 2789 bitmap_git->table_lookup ? "" : " loaded", 2790 bitmap_total_entry_count(bitmap_git)); 2791 2792 root = revs->pending.objects[0].item; 2793 bm = find_bitmap_for_commit(bitmap_git, (struct commit *)root, &found); 2794 2795 if (bm) { 2796 fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum", 2797 oid_to_hex(&root->oid), 2798 (int)bm->bit_size, ewah_checksum(bm)); 2799 2800 if (bitmap_is_midx(found)) 2801 fprintf_ln(stderr, "Located via MIDX '%s'.", 2802 hash_to_hex_algop(get_midx_checksum(found->midx), 2803 revs->repo->hash_algo)); 2804 else 2805 fprintf_ln(stderr, "Located via pack '%s'.", 2806 hash_to_hex_algop(found->pack->hash, 2807 revs->repo->hash_algo)); 2808 2809 result = ewah_to_bitmap(bm); 2810 } 2811 2812 if (!result) 2813 die(_("commit '%s' doesn't have an indexed bitmap"), oid_to_hex(&root->oid)); 2814 2815 revs->tag_objects = 1; 2816 revs->tree_objects = 1; 2817 revs->blob_objects = 1; 2818 2819 result_popcnt = bitmap_popcount(result); 2820 2821 if (prepare_revision_walk(revs)) 2822 die(_("revision walk setup failed")); 2823 2824 bitmap_test_data_prepare(&tdata, bitmap_git); 2825 tdata.prg = start_progress(revs->repo, 2826 "Verifying bitmap entries", 2827 result_popcnt); 2828 2829 traverse_commit_list(revs, &test_show_commit, &test_show_object, &tdata); 2830 2831 stop_progress(&tdata.prg); 2832 2833 if (bitmap_equals(result, tdata.base)) 2834 fprintf_ln(stderr, "OK!"); 2835 else 2836 die(_("mismatch in bitmap results")); 2837 2838 bitmap_free(result); 2839 bitmap_test_data_release(&tdata); 2840 free_bitmap_index(bitmap_git); 2841} 2842 2843int test_bitmap_commits(struct repository *r) 2844{ 2845 struct object_id oid; 2846 MAYBE_UNUSED void *value; 2847 struct bitmap_index *bitmap_git = prepare_bitmap_git(r); 2848 2849 if (!bitmap_git) 2850 die(_("failed to load bitmap indexes")); 2851 2852 /* 2853 * Since this function needs to print the bitmapped 2854 * commits, bypass the commit lookup table (if one exists) 2855 * by forcing the bitmap to eagerly load its entries. 2856 */ 2857 if (bitmap_git->table_lookup) { 2858 if (load_bitmap_entries_v1(bitmap_git) < 0) 2859 die(_("failed to load bitmap indexes")); 2860 } 2861 2862 kh_foreach(bitmap_git->bitmaps, oid, value, { 2863 printf_ln("%s", oid_to_hex(&oid)); 2864 }); 2865 2866 free_bitmap_index(bitmap_git); 2867 2868 return 0; 2869} 2870 2871int test_bitmap_commits_with_offset(struct repository *r) 2872{ 2873 struct object_id oid; 2874 struct stored_bitmap *stored; 2875 struct bitmap_index *bitmap_git; 2876 size_t commit_idx_pos_map_pos, xor_offset_map_pos, flag_map_pos, 2877 ewah_bitmap_map_pos; 2878 2879 bitmap_git = prepare_bitmap_git(r); 2880 if (!bitmap_git) 2881 die(_("failed to load bitmap indexes")); 2882 2883 /* 2884 * Since this function needs to know the position of each individual 2885 * bitmap, bypass the commit lookup table (if one exists) by forcing 2886 * the bitmap to eagerly load its entries. 2887 */ 2888 if (bitmap_git->table_lookup) { 2889 if (load_bitmap_entries_v1(bitmap_git) < 0) 2890 die(_("failed to load bitmap indexes")); 2891 } 2892 2893 kh_foreach (bitmap_git->bitmaps, oid, stored, { 2894 commit_idx_pos_map_pos = stored->map_pos; 2895 xor_offset_map_pos = stored->map_pos + sizeof(uint32_t); 2896 flag_map_pos = xor_offset_map_pos + sizeof(uint8_t); 2897 ewah_bitmap_map_pos = flag_map_pos + sizeof(uint8_t); 2898 2899 printf_ln("%s %"PRIuMAX" %"PRIuMAX" %"PRIuMAX" %"PRIuMAX, 2900 oid_to_hex(&oid), 2901 (uintmax_t)commit_idx_pos_map_pos, 2902 (uintmax_t)xor_offset_map_pos, 2903 (uintmax_t)flag_map_pos, 2904 (uintmax_t)ewah_bitmap_map_pos); 2905 }) 2906 ; 2907 2908 free_bitmap_index(bitmap_git); 2909 2910 return 0; 2911} 2912 2913int test_bitmap_hashes(struct repository *r) 2914{ 2915 struct bitmap_index *bitmap_git = prepare_bitmap_git(r); 2916 struct object_id oid; 2917 uint32_t i, index_pos; 2918 2919 if (!bitmap_git || !bitmap_git->hashes) 2920 goto cleanup; 2921 2922 for (i = 0; i < bitmap_num_objects(bitmap_git); i++) { 2923 if (bitmap_is_midx(bitmap_git)) 2924 index_pos = pack_pos_to_midx(bitmap_git->midx, i); 2925 else 2926 index_pos = pack_pos_to_index(bitmap_git->pack, i); 2927 2928 nth_bitmap_object_oid(bitmap_git, &oid, index_pos); 2929 2930 printf_ln("%s %"PRIu32"", 2931 oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos)); 2932 } 2933 2934cleanup: 2935 free_bitmap_index(bitmap_git); 2936 2937 return 0; 2938} 2939 2940static void bit_pos_to_object_id(struct bitmap_index *bitmap_git, 2941 uint32_t bit_pos, 2942 struct object_id *oid) 2943{ 2944 uint32_t index_pos; 2945 2946 if (bitmap_is_midx(bitmap_git)) 2947 index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); 2948 else 2949 index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos); 2950 2951 nth_bitmap_object_oid(bitmap_git, oid, index_pos); 2952} 2953 2954int test_bitmap_pseudo_merges(struct repository *r) 2955{ 2956 struct bitmap_index *bitmap_git; 2957 uint32_t i; 2958 2959 bitmap_git = prepare_bitmap_git(r); 2960 if (!bitmap_git || !bitmap_git->pseudo_merges.nr) 2961 goto cleanup; 2962 2963 for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) { 2964 struct pseudo_merge *merge; 2965 struct ewah_bitmap *commits_bitmap, *merge_bitmap; 2966 2967 merge = use_pseudo_merge(&bitmap_git->pseudo_merges, 2968 &bitmap_git->pseudo_merges.v[i]); 2969 commits_bitmap = merge->commits; 2970 merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges, 2971 merge); 2972 2973 printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n", 2974 (uintmax_t)merge->at, 2975 (uintmax_t)ewah_bitmap_popcount(commits_bitmap), 2976 (uintmax_t)ewah_bitmap_popcount(merge_bitmap)); 2977 } 2978 2979cleanup: 2980 free_bitmap_index(bitmap_git); 2981 return 0; 2982} 2983 2984static void dump_ewah_object_ids(struct bitmap_index *bitmap_git, 2985 struct ewah_bitmap *bitmap) 2986 2987{ 2988 struct ewah_iterator it; 2989 eword_t word; 2990 uint32_t pos = 0; 2991 2992 ewah_iterator_init(&it, bitmap); 2993 2994 while (ewah_iterator_next(&word, &it)) { 2995 struct object_id oid; 2996 uint32_t offset; 2997 2998 for (offset = 0; offset < BITS_IN_EWORD; offset++) { 2999 if (!(word >> offset)) 3000 break; 3001 3002 offset += ewah_bit_ctz64(word >> offset); 3003 3004 bit_pos_to_object_id(bitmap_git, pos + offset, &oid); 3005 printf("%s\n", oid_to_hex(&oid)); 3006 } 3007 pos += BITS_IN_EWORD; 3008 } 3009} 3010 3011int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n) 3012{ 3013 struct bitmap_index *bitmap_git; 3014 struct pseudo_merge *merge; 3015 int ret = 0; 3016 3017 bitmap_git = prepare_bitmap_git(r); 3018 if (!bitmap_git || !bitmap_git->pseudo_merges.nr) 3019 goto cleanup; 3020 3021 if (n >= bitmap_git->pseudo_merges.nr) { 3022 ret = error(_("pseudo-merge index out of range " 3023 "(%"PRIu32" >= %"PRIuMAX")"), 3024 n, (uintmax_t)bitmap_git->pseudo_merges.nr); 3025 goto cleanup; 3026 } 3027 3028 merge = use_pseudo_merge(&bitmap_git->pseudo_merges, 3029 &bitmap_git->pseudo_merges.v[n]); 3030 dump_ewah_object_ids(bitmap_git, merge->commits); 3031 3032cleanup: 3033 free_bitmap_index(bitmap_git); 3034 return ret; 3035} 3036 3037int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n) 3038{ 3039 struct bitmap_index *bitmap_git; 3040 struct pseudo_merge *merge; 3041 int ret = 0; 3042 3043 bitmap_git = prepare_bitmap_git(r); 3044 if (!bitmap_git || !bitmap_git->pseudo_merges.nr) 3045 goto cleanup; 3046 3047 if (n >= bitmap_git->pseudo_merges.nr) { 3048 ret = error(_("pseudo-merge index out of range " 3049 "(%"PRIu32" >= %"PRIuMAX")"), 3050 n, (uintmax_t)bitmap_git->pseudo_merges.nr); 3051 goto cleanup; 3052 } 3053 3054 merge = use_pseudo_merge(&bitmap_git->pseudo_merges, 3055 &bitmap_git->pseudo_merges.v[n]); 3056 3057 dump_ewah_object_ids(bitmap_git, 3058 pseudo_merge_bitmap(&bitmap_git->pseudo_merges, 3059 merge)); 3060 3061cleanup: 3062 free_bitmap_index(bitmap_git); 3063 return ret; 3064} 3065 3066int rebuild_bitmap(const uint32_t *reposition, 3067 struct ewah_bitmap *source, 3068 struct bitmap *dest) 3069{ 3070 uint32_t pos = 0; 3071 struct ewah_iterator it; 3072 eword_t word; 3073 3074 ewah_iterator_init(&it, source); 3075 3076 while (ewah_iterator_next(&word, &it)) { 3077 uint32_t offset, bit_pos; 3078 3079 for (offset = 0; offset < BITS_IN_EWORD; ++offset) { 3080 if ((word >> offset) == 0) 3081 break; 3082 3083 offset += ewah_bit_ctz64(word >> offset); 3084 3085 bit_pos = reposition[pos + offset]; 3086 if (bit_pos > 0) 3087 bitmap_set(dest, bit_pos - 1); 3088 else /* can't reuse, we don't have the object */ 3089 return -1; 3090 } 3091 3092 pos += BITS_IN_EWORD; 3093 } 3094 return 0; 3095} 3096 3097uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, 3098 struct packing_data *mapping) 3099{ 3100 struct repository *r = bitmap_repo(bitmap_git); 3101 uint32_t i, num_objects; 3102 uint32_t *reposition; 3103 3104 if (!bitmap_is_midx(bitmap_git)) 3105 load_reverse_index(r, bitmap_git); 3106 else if (load_midx_revindex(bitmap_git->midx)) 3107 BUG("rebuild_existing_bitmaps: missing required rev-cache " 3108 "extension"); 3109 3110 num_objects = bitmap_num_objects_total(bitmap_git); 3111 CALLOC_ARRAY(reposition, num_objects); 3112 3113 for (i = 0; i < num_objects; ++i) { 3114 struct object_id oid; 3115 struct object_entry *oe; 3116 uint32_t index_pos; 3117 3118 if (bitmap_is_midx(bitmap_git)) 3119 index_pos = pack_pos_to_midx(bitmap_git->midx, i); 3120 else 3121 index_pos = pack_pos_to_index(bitmap_git->pack, i); 3122 nth_bitmap_object_oid(bitmap_git, &oid, index_pos); 3123 oe = packlist_find(mapping, &oid); 3124 3125 if (oe) { 3126 reposition[i] = oe_in_pack_pos(mapping, oe) + 1; 3127 if (bitmap_git->hashes && !oe->hash) 3128 oe->hash = get_be32(bitmap_git->hashes + index_pos); 3129 } 3130 } 3131 3132 return reposition; 3133} 3134 3135void free_bitmap_index(struct bitmap_index *b) 3136{ 3137 if (!b) 3138 return; 3139 3140 if (b->map) 3141 munmap(b->map, b->map_size); 3142 ewah_pool_free(b->commits); 3143 ewah_pool_free(b->trees); 3144 ewah_pool_free(b->blobs); 3145 ewah_pool_free(b->tags); 3146 free(b->commits_all); 3147 free(b->trees_all); 3148 free(b->blobs_all); 3149 free(b->tags_all); 3150 if (b->bitmaps) { 3151 struct stored_bitmap *sb; 3152 kh_foreach_value(b->bitmaps, sb, { 3153 ewah_pool_free(sb->root); 3154 free(sb); 3155 }); 3156 } 3157 kh_destroy_oid_map(b->bitmaps); 3158 free(b->ext_index.objects); 3159 free(b->ext_index.hashes); 3160 kh_destroy_oid_pos(b->ext_index.positions); 3161 bitmap_free(b->result); 3162 bitmap_free(b->haves); 3163 if (bitmap_is_midx(b)) { 3164 /* 3165 * Multi-pack bitmaps need to have resources associated with 3166 * their on-disk reverse indexes unmapped so that stale .rev and 3167 * .bitmap files can be removed. 3168 * 3169 * Unlike pack-based bitmaps, multi-pack bitmaps can be read and 3170 * written in the same 'git multi-pack-index write --bitmap' 3171 * process. Close resources so they can be removed safely on 3172 * platforms like Windows. 3173 */ 3174 close_midx_revindex(b->midx); 3175 } 3176 free_pseudo_merge_map(&b->pseudo_merges); 3177 free_bitmap_index(b->base); 3178 free(b); 3179} 3180 3181int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git, 3182 const struct object_id *oid) 3183{ 3184 return bitmap_git && 3185 bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid); 3186} 3187 3188static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, 3189 enum object_type object_type) 3190{ 3191 struct bitmap *result = bitmap_git->result; 3192 off_t total = 0; 3193 struct ewah_or_iterator it; 3194 eword_t filter; 3195 size_t i; 3196 3197 init_type_iterator(&it, bitmap_git, object_type); 3198 for (i = 0; i < result->word_alloc && 3199 ewah_or_iterator_next(&filter, &it); i++) { 3200 eword_t word = result->words[i] & filter; 3201 size_t base = (i * BITS_IN_EWORD); 3202 unsigned offset; 3203 3204 if (!word) 3205 continue; 3206 3207 for (offset = 0; offset < BITS_IN_EWORD; offset++) { 3208 if ((word >> offset) == 0) 3209 break; 3210 3211 offset += ewah_bit_ctz64(word >> offset); 3212 3213 if (bitmap_is_midx(bitmap_git)) { 3214 uint32_t pack_pos; 3215 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset); 3216 off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos); 3217 3218 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); 3219 struct packed_git *pack = nth_midxed_pack(bitmap_git->midx, pack_id); 3220 3221 if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) { 3222 struct object_id oid; 3223 nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos); 3224 3225 die(_("could not find '%s' in pack '%s' at offset %"PRIuMAX), 3226 oid_to_hex(&oid), 3227 pack->pack_name, 3228 (uintmax_t)offset); 3229 } 3230 3231 total += pack_pos_to_offset(pack, pack_pos + 1) - offset; 3232 } else { 3233 size_t pos = base + offset; 3234 total += pack_pos_to_offset(bitmap_git->pack, pos + 1) - 3235 pack_pos_to_offset(bitmap_git->pack, pos); 3236 } 3237 } 3238 } 3239 3240 ewah_or_iterator_release(&it); 3241 3242 return total; 3243} 3244 3245static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) 3246{ 3247 struct bitmap *result = bitmap_git->result; 3248 struct eindex *eindex = &bitmap_git->ext_index; 3249 off_t total = 0; 3250 struct object_info oi = OBJECT_INFO_INIT; 3251 off_t object_size; 3252 size_t i; 3253 3254 oi.disk_sizep = &object_size; 3255 3256 for (i = 0; i < eindex->count; i++) { 3257 struct object *obj = eindex->objects[i]; 3258 3259 if (!bitmap_get(result, 3260 st_add(bitmap_num_objects_total(bitmap_git), 3261 i))) 3262 continue; 3263 3264 if (odb_read_object_info_extended(bitmap_repo(bitmap_git)->objects, 3265 &obj->oid, &oi, 0) < 0) 3266 die(_("unable to get disk usage of '%s'"), 3267 oid_to_hex(&obj->oid)); 3268 3269 total += object_size; 3270 } 3271 return total; 3272} 3273 3274off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git, 3275 struct rev_info *revs) 3276{ 3277 off_t total = 0; 3278 3279 total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT); 3280 if (revs->tree_objects) 3281 total += get_disk_usage_for_type(bitmap_git, OBJ_TREE); 3282 if (revs->blob_objects) 3283 total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB); 3284 if (revs->tag_objects) 3285 total += get_disk_usage_for_type(bitmap_git, OBJ_TAG); 3286 3287 total += get_disk_usage_for_extended(bitmap_git); 3288 3289 return total; 3290} 3291 3292int bitmap_is_midx(struct bitmap_index *bitmap_git) 3293{ 3294 return !!bitmap_git->midx; 3295} 3296 3297const struct string_list *bitmap_preferred_tips(struct repository *r) 3298{ 3299 const struct string_list *dest; 3300 3301 if (!repo_config_get_string_multi(r, "pack.preferbitmaptips", &dest)) 3302 return dest; 3303 return NULL; 3304} 3305 3306int bitmap_is_preferred_refname(struct repository *r, const char *refname) 3307{ 3308 const struct string_list *preferred_tips = bitmap_preferred_tips(r); 3309 struct string_list_item *item; 3310 3311 if (!preferred_tips) 3312 return 0; 3313 3314 for_each_string_list_item(item, preferred_tips) { 3315 if (starts_with(refname, item->string)) 3316 return 1; 3317 } 3318 3319 return 0; 3320} 3321 3322static int verify_bitmap_file(const struct git_hash_algo *algop, 3323 const char *name) 3324{ 3325 struct stat st; 3326 unsigned char *data; 3327 int fd = git_open(name); 3328 int res = 0; 3329 3330 /* It is OK to not have the file. */ 3331 if (fd < 0 || fstat(fd, &st)) { 3332 if (fd >= 0) 3333 close(fd); 3334 return 0; 3335 } 3336 3337 data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 3338 close(fd); 3339 if (!hashfile_checksum_valid(algop, data, st.st_size)) 3340 res = error(_("bitmap file '%s' has invalid checksum"), 3341 name); 3342 3343 munmap(data, st.st_size); 3344 return res; 3345} 3346 3347int verify_bitmap_files(struct repository *r) 3348{ 3349 struct odb_source *source; 3350 int res = 0; 3351 3352 odb_prepare_alternates(r->objects); 3353 for (source = r->objects->sources; source; source = source->next) { 3354 struct multi_pack_index *m = get_multi_pack_index(source); 3355 char *midx_bitmap_name; 3356 3357 if (!m) 3358 continue; 3359 3360 midx_bitmap_name = midx_bitmap_filename(m); 3361 res |= verify_bitmap_file(r->hash_algo, midx_bitmap_name); 3362 free(midx_bitmap_name); 3363 } 3364 3365 for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); 3366 p; p = p->next) { 3367 char *pack_bitmap_name = pack_bitmap_filename(p); 3368 res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); 3369 free(pack_bitmap_name); 3370 } 3371 3372 return res; 3373}