Git fork
1#define DISABLE_SIGN_COMPARE_WARNINGS
2
3#include "git-compat-util.h"
4#include "commit.h"
5#include "gettext.h"
6#include "hex.h"
7#include "strbuf.h"
8#include "tag.h"
9#include "diff.h"
10#include "revision.h"
11#include "progress.h"
12#include "list-objects.h"
13#include "pack.h"
14#include "pack-bitmap.h"
15#include "pack-revindex.h"
16#include "pack-objects.h"
17#include "packfile.h"
18#include "repository.h"
19#include "trace2.h"
20#include "odb.h"
21#include "list-objects-filter-options.h"
22#include "midx.h"
23#include "config.h"
24#include "pseudo-merge.h"
25
26/*
27 * An entry on the bitmap index, representing the bitmap for a given
28 * commit.
29 */
30struct stored_bitmap {
31 struct object_id oid;
32 struct ewah_bitmap *root;
33 struct stored_bitmap *xor;
34 size_t map_pos;
35 int flags;
36};
37
38/*
39 * The active bitmap index for a repository. By design, repositories only have
40 * a single bitmap index available (the index for the biggest packfile in
41 * the repository), since bitmap indexes need full closure.
42 *
43 * If there is more than one bitmap index available (e.g. because of alternates),
44 * the active bitmap index is the largest one.
45 */
46struct bitmap_index {
47 /*
48 * The pack or multi-pack index (MIDX) that this bitmap index belongs
49 * to.
50 *
51 * Exactly one of these must be non-NULL; this specifies the object
52 * order used to interpret this bitmap.
53 */
54 struct packed_git *pack;
55 struct multi_pack_index *midx;
56
57 /*
58 * If using a multi-pack index chain, 'base' points to the
59 * bitmap index corresponding to this bitmap's midx->base_midx.
60 *
61 * base_nr indicates how many layers precede this one, and is
62 * zero when base is NULL.
63 */
64 struct bitmap_index *base;
65 uint32_t base_nr;
66
67 /* mmapped buffer of the whole bitmap index */
68 unsigned char *map;
69 size_t map_size; /* size of the mmaped buffer */
70 size_t map_pos; /* current position when loading the index */
71
72 /*
73 * Type indexes.
74 *
75 * Each bitmap marks which objects in the packfile are of the given
76 * type. This provides type information when yielding the objects from
77 * the packfile during a walk, which allows for better delta bases.
78 */
79 struct ewah_bitmap *commits;
80 struct ewah_bitmap *trees;
81 struct ewah_bitmap *blobs;
82 struct ewah_bitmap *tags;
83
84 /*
85 * Type index arrays when this bitmap is associated with an
86 * incremental multi-pack index chain.
87 *
88 * If n is the number of unique layers in the MIDX chain, then
89 * commits_all[n-1] is this structs 'commits' field,
90 * commits_all[n-2] is the commits field of this bitmap's
91 * 'base', and so on.
92 *
93 * When associated either with a non-incremental MIDX or a
94 * single packfile, these arrays each contain a single element.
95 */
96 struct ewah_bitmap **commits_all;
97 struct ewah_bitmap **trees_all;
98 struct ewah_bitmap **blobs_all;
99 struct ewah_bitmap **tags_all;
100
101 /* Map from object ID -> `stored_bitmap` for all the bitmapped commits */
102 kh_oid_map_t *bitmaps;
103
104 /* Number of bitmapped commits */
105 uint32_t entry_count;
106
107 /* If not NULL, this is a name-hash cache pointing into map. */
108 uint32_t *hashes;
109
110 /* The checksum of the packfile or MIDX; points into map. */
111 const unsigned char *checksum;
112
113 /*
114 * If not NULL, this point into the commit table extension
115 * (within the memory mapped region `map`).
116 */
117 unsigned char *table_lookup;
118
119 /* This contains the pseudo-merge cache within 'map' (if found). */
120 struct pseudo_merge_map pseudo_merges;
121
122 /*
123 * Extended index.
124 *
125 * When trying to perform bitmap operations with objects that are not
126 * packed in `pack`, these objects are added to this "fake index" and
127 * are assumed to appear at the end of the packfile for all operations
128 */
129 struct eindex {
130 struct object **objects;
131 uint32_t *hashes;
132 uint32_t count, alloc;
133 kh_oid_pos_t *positions;
134 } ext_index;
135
136 /* Bitmap result of the last performed walk */
137 struct bitmap *result;
138
139 /* "have" bitmap from the last performed walk */
140 struct bitmap *haves;
141
142 /* Version of the bitmap index */
143 unsigned int version;
144};
145
146static int pseudo_merges_satisfied_nr;
147static int pseudo_merges_cascades_nr;
148static int existing_bitmaps_hits_nr;
149static int existing_bitmaps_misses_nr;
150static int roots_with_bitmaps_nr;
151static int roots_without_bitmaps_nr;
152
153static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
154{
155 struct ewah_bitmap *parent;
156 struct ewah_bitmap *composed;
157
158 if (!st->xor)
159 return st->root;
160
161 composed = ewah_pool_new();
162 parent = lookup_stored_bitmap(st->xor);
163 ewah_xor(st->root, parent, composed);
164
165 ewah_pool_free(st->root);
166 st->root = composed;
167 st->xor = NULL;
168
169 return composed;
170}
171
172struct ewah_bitmap *read_bitmap(const unsigned char *map,
173 size_t map_size, size_t *map_pos)
174{
175 struct ewah_bitmap *b = ewah_pool_new();
176
177 ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos,
178 map_size - *map_pos);
179
180 if (bitmap_size < 0) {
181 error(_("failed to load bitmap index (corrupted?)"));
182 ewah_pool_free(b);
183 return NULL;
184 }
185
186 *map_pos += bitmap_size;
187
188 return b;
189}
190
191/*
192 * Read a bitmap from the current read position on the mmaped
193 * index, and increase the read position accordingly
194 */
195static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
196{
197 return read_bitmap(index->map, index->map_size, &index->map_pos);
198}
199
200static uint32_t bitmap_num_objects_total(struct bitmap_index *index)
201{
202 if (index->midx) {
203 struct multi_pack_index *m = index->midx;
204 return m->num_objects + m->num_objects_in_base;
205 }
206 return index->pack->num_objects;
207}
208
209static uint32_t bitmap_num_objects(struct bitmap_index *index)
210{
211 if (index->midx)
212 return index->midx->num_objects;
213 return index->pack->num_objects;
214}
215
216static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
217{
218 if (bitmap_is_midx(bitmap_git))
219 return bitmap_git->midx->source->odb->repo;
220 return bitmap_git->pack->repo;
221}
222
223static int load_bitmap_header(struct bitmap_index *index)
224{
225 struct bitmap_disk_header *header = (void *)index->map;
226 const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
227
228 size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
229
230 if (index->map_size < header_size + hash_algo->rawsz)
231 return error(_("corrupted bitmap index (too small)"));
232
233 if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
234 return error(_("corrupted bitmap index file (wrong header)"));
235
236 index->version = ntohs(header->version);
237 if (index->version != 1)
238 return error(_("unsupported version '%d' for bitmap index file"), index->version);
239
240 /* Parse known bitmap format options */
241 {
242 uint32_t flags = ntohs(header->options);
243 size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
244 unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
245
246 if ((flags & BITMAP_OPT_FULL_DAG) == 0)
247 BUG("unsupported options for bitmap index file "
248 "(Git requires BITMAP_OPT_FULL_DAG)");
249
250 if (flags & BITMAP_OPT_HASH_CACHE) {
251 if (cache_size > index_end - index->map - header_size)
252 return error(_("corrupted bitmap index file (too short to fit hash cache)"));
253 index->hashes = (void *)(index_end - cache_size);
254 index_end -= cache_size;
255 }
256
257 if (flags & BITMAP_OPT_LOOKUP_TABLE) {
258 size_t table_size = st_mult(ntohl(header->entry_count),
259 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
260 if (table_size > index_end - index->map - header_size)
261 return error(_("corrupted bitmap index file (too short to fit lookup table)"));
262 if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1))
263 index->table_lookup = (void *)(index_end - table_size);
264 index_end -= table_size;
265 }
266
267 if (flags & BITMAP_OPT_PSEUDO_MERGES) {
268 unsigned char *pseudo_merge_ofs;
269 size_t table_size;
270 uint32_t i;
271
272 if (sizeof(table_size) > index_end - index->map - header_size)
273 return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)"));
274
275 table_size = get_be64(index_end - 8);
276 if (table_size > index_end - index->map - header_size)
277 return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)"));
278
279 if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) {
280 const unsigned char *ext = (index_end - table_size);
281
282 index->pseudo_merges.map = index->map;
283 index->pseudo_merges.map_size = index->map_size;
284 index->pseudo_merges.commits = ext + get_be64(index_end - 16);
285 index->pseudo_merges.commits_nr = get_be32(index_end - 20);
286 index->pseudo_merges.nr = get_be32(index_end - 24);
287
288 if (st_add(st_mult(index->pseudo_merges.nr,
289 sizeof(uint64_t)),
290 24) > table_size)
291 return error(_("corrupted bitmap index file, pseudo-merge table too short"));
292
293 CALLOC_ARRAY(index->pseudo_merges.v,
294 index->pseudo_merges.nr);
295
296 pseudo_merge_ofs = index_end - 24 -
297 (index->pseudo_merges.nr * sizeof(uint64_t));
298 for (i = 0; i < index->pseudo_merges.nr; i++) {
299 index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs);
300 pseudo_merge_ofs += sizeof(uint64_t);
301 }
302 }
303
304 index_end -= table_size;
305 }
306 }
307
308 index->entry_count = ntohl(header->entry_count);
309 index->checksum = header->checksum;
310 index->map_pos += header_size;
311 return 0;
312}
313
314static struct stored_bitmap *store_bitmap(struct bitmap_index *index,
315 struct ewah_bitmap *root,
316 const struct object_id *oid,
317 struct stored_bitmap *xor_with,
318 int flags, size_t map_pos)
319{
320 struct stored_bitmap *stored;
321 khiter_t hash_pos;
322 int ret;
323
324 stored = xmalloc(sizeof(struct stored_bitmap));
325 stored->map_pos = map_pos;
326 stored->root = root;
327 stored->xor = xor_with;
328 stored->flags = flags;
329 oidcpy(&stored->oid, oid);
330
331 hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret);
332
333 /*
334 * A 0 return code means the insertion succeeded with no changes,
335 * because the SHA1 already existed on the map. This is bad, there
336 * shouldn't be duplicated commits in the index.
337 */
338 if (ret == 0) {
339 error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid));
340 return NULL;
341 }
342
343 kh_value(index->bitmaps, hash_pos) = stored;
344 return stored;
345}
346
347static inline uint32_t read_be32(const unsigned char *buffer, size_t *pos)
348{
349 uint32_t result = get_be32(buffer + *pos);
350 (*pos) += sizeof(result);
351 return result;
352}
353
354static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos)
355{
356 return buffer[(*pos)++];
357}
358
359#define MAX_XOR_OFFSET 160
360
361static int nth_bitmap_object_oid(struct bitmap_index *index,
362 struct object_id *oid,
363 uint32_t n)
364{
365 if (index->midx)
366 return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1;
367 return nth_packed_object_id(oid, index->pack, n);
368}
369
370static int load_bitmap_entries_v1(struct bitmap_index *index)
371{
372 uint32_t i;
373 struct stored_bitmap *recent_bitmaps[MAX_XOR_OFFSET] = { NULL };
374
375 for (i = 0; i < index->entry_count; ++i) {
376 int xor_offset, flags;
377 struct ewah_bitmap *bitmap = NULL;
378 struct stored_bitmap *xor_bitmap = NULL;
379 uint32_t commit_idx_pos;
380 struct object_id oid;
381 size_t entry_map_pos;
382
383 if (index->map_size - index->map_pos < 6)
384 return error(_("corrupt ewah bitmap: truncated header for entry %d"), i);
385
386 entry_map_pos = index->map_pos;
387 commit_idx_pos = read_be32(index->map, &index->map_pos);
388 xor_offset = read_u8(index->map, &index->map_pos);
389 flags = read_u8(index->map, &index->map_pos);
390
391 if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0)
392 return error(_("corrupt ewah bitmap: commit index %u out of range"),
393 (unsigned)commit_idx_pos);
394
395 if (xor_offset > MAX_XOR_OFFSET || xor_offset > i)
396 return error(_("corrupted bitmap pack index"));
397
398 if (xor_offset > 0) {
399 xor_bitmap = recent_bitmaps[(i - xor_offset) % MAX_XOR_OFFSET];
400
401 if (!xor_bitmap)
402 return error(_("invalid XOR offset in bitmap pack index"));
403 }
404
405 bitmap = read_bitmap_1(index);
406 if (!bitmap)
407 return -1;
408
409 recent_bitmaps[i % MAX_XOR_OFFSET] =
410 store_bitmap(index, bitmap, &oid, xor_bitmap, flags,
411 entry_map_pos);
412 }
413
414 return 0;
415}
416
417char *midx_bitmap_filename(struct multi_pack_index *midx)
418{
419 struct strbuf buf = STRBUF_INIT;
420 if (midx->has_chain)
421 get_split_midx_filename_ext(midx->source, &buf,
422 get_midx_checksum(midx),
423 MIDX_EXT_BITMAP);
424 else
425 get_midx_filename_ext(midx->source, &buf,
426 get_midx_checksum(midx),
427 MIDX_EXT_BITMAP);
428
429 return strbuf_detach(&buf, NULL);
430}
431
432char *pack_bitmap_filename(struct packed_git *p)
433{
434 size_t len;
435
436 if (!strip_suffix(p->pack_name, ".pack", &len))
437 BUG("pack_name does not end in .pack");
438 return xstrfmt("%.*s.bitmap", (int)len, p->pack_name);
439}
440
441static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
442 struct multi_pack_index *midx)
443{
444 struct stat st;
445 char *bitmap_name = midx_bitmap_filename(midx);
446 int fd = git_open(bitmap_name);
447 uint32_t i;
448
449 if (fd < 0) {
450 if (errno != ENOENT)
451 warning_errno("cannot open '%s'", bitmap_name);
452 free(bitmap_name);
453 return -1;
454 }
455 free(bitmap_name);
456
457 if (fstat(fd, &st)) {
458 error_errno(_("cannot fstat bitmap file"));
459 close(fd);
460 return -1;
461 }
462
463 if (bitmap_git->pack || bitmap_git->midx) {
464 struct strbuf buf = STRBUF_INIT;
465 get_midx_filename(midx->source, &buf);
466 trace2_data_string("bitmap", bitmap_repo(bitmap_git),
467 "ignoring extra midx bitmap file", buf.buf);
468 close(fd);
469 strbuf_release(&buf);
470 return -1;
471 }
472
473 bitmap_git->midx = midx;
474 bitmap_git->map_size = xsize_t(st.st_size);
475 bitmap_git->map_pos = 0;
476 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ,
477 MAP_PRIVATE, fd, 0);
478 close(fd);
479
480 if (load_bitmap_header(bitmap_git) < 0)
481 goto cleanup;
482
483 if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
484 bitmap_repo(bitmap_git)->hash_algo)) {
485 error(_("checksum doesn't match in MIDX and bitmap"));
486 goto cleanup;
487 }
488
489 if (load_midx_revindex(bitmap_git->midx)) {
490 warning(_("multi-pack bitmap is missing required reverse index"));
491 goto cleanup;
492 }
493
494 for (i = 0; i < bitmap_git->midx->num_packs + bitmap_git->midx->num_packs_in_base; i++) {
495 if (prepare_midx_pack(bitmap_git->midx, i)) {
496 warning(_("could not open pack %s"),
497 bitmap_git->midx->pack_names[i]);
498 goto cleanup;
499 }
500 }
501
502 if (midx->base_midx) {
503 bitmap_git->base = prepare_midx_bitmap_git(midx->base_midx);
504 bitmap_git->base_nr = bitmap_git->base->base_nr + 1;
505 } else {
506 bitmap_git->base_nr = 0;
507 }
508
509 return 0;
510
511cleanup:
512 munmap(bitmap_git->map, bitmap_git->map_size);
513 bitmap_git->map_size = 0;
514 bitmap_git->map_pos = 0;
515 bitmap_git->map = NULL;
516 bitmap_git->midx = NULL;
517 return -1;
518}
519
520static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile)
521{
522 int fd;
523 struct stat st;
524 char *bitmap_name;
525
526 bitmap_name = pack_bitmap_filename(packfile);
527 fd = git_open(bitmap_name);
528
529 if (fd < 0) {
530 if (errno != ENOENT)
531 warning_errno("cannot open '%s'", bitmap_name);
532 free(bitmap_name);
533 return -1;
534 }
535 free(bitmap_name);
536
537 if (fstat(fd, &st)) {
538 error_errno(_("cannot fstat bitmap file"));
539 close(fd);
540 return -1;
541 }
542
543 if (bitmap_git->pack || bitmap_git->midx) {
544 trace2_data_string("bitmap", bitmap_repo(bitmap_git),
545 "ignoring extra bitmap file",
546 packfile->pack_name);
547 close(fd);
548 return -1;
549 }
550
551 if (!is_pack_valid(packfile)) {
552 close(fd);
553 return -1;
554 }
555
556 bitmap_git->pack = packfile;
557 bitmap_git->map_size = xsize_t(st.st_size);
558 bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, MAP_PRIVATE, fd, 0);
559 bitmap_git->map_pos = 0;
560 bitmap_git->base_nr = 0;
561 close(fd);
562
563 if (load_bitmap_header(bitmap_git) < 0) {
564 munmap(bitmap_git->map, bitmap_git->map_size);
565 bitmap_git->map = NULL;
566 bitmap_git->map_size = 0;
567 bitmap_git->map_pos = 0;
568 bitmap_git->pack = NULL;
569 return -1;
570 }
571
572 trace2_data_string("bitmap", bitmap_repo(bitmap_git),
573 "opened bitmap file", packfile->pack_name);
574 return 0;
575}
576
577static int load_reverse_index(struct repository *r, struct bitmap_index *bitmap_git)
578{
579 if (bitmap_is_midx(bitmap_git)) {
580 struct multi_pack_index *m;
581
582 /*
583 * The multi-pack-index's .rev file is already loaded via
584 * open_pack_bitmap_1().
585 *
586 * But we still need to open the individual pack .rev files,
587 * since we will need to make use of them in pack-objects.
588 */
589 for (m = bitmap_git->midx; m; m = m->base_midx) {
590 uint32_t i;
591 int ret;
592
593 for (i = 0; i < m->num_packs; i++) {
594 ret = load_pack_revindex(r, m->packs[i]);
595 if (ret)
596 return ret;
597 }
598 }
599 return 0;
600 }
601 return load_pack_revindex(r, bitmap_git->pack);
602}
603
604static void load_all_type_bitmaps(struct bitmap_index *bitmap_git)
605{
606 struct bitmap_index *curr = bitmap_git;
607 size_t i = bitmap_git->base_nr;
608
609 ALLOC_ARRAY(bitmap_git->commits_all, bitmap_git->base_nr + 1);
610 ALLOC_ARRAY(bitmap_git->trees_all, bitmap_git->base_nr + 1);
611 ALLOC_ARRAY(bitmap_git->blobs_all, bitmap_git->base_nr + 1);
612 ALLOC_ARRAY(bitmap_git->tags_all, bitmap_git->base_nr + 1);
613
614 while (curr) {
615 bitmap_git->commits_all[i] = curr->commits;
616 bitmap_git->trees_all[i] = curr->trees;
617 bitmap_git->blobs_all[i] = curr->blobs;
618 bitmap_git->tags_all[i] = curr->tags;
619
620 curr = curr->base;
621 if (curr && !i)
622 BUG("unexpected number of bitmap layers, expected %"PRIu32,
623 bitmap_git->base_nr + 1);
624 i -= 1;
625 }
626}
627
628static int load_bitmap(struct repository *r, struct bitmap_index *bitmap_git,
629 int recursing)
630{
631 assert(bitmap_git->map);
632
633 bitmap_git->bitmaps = kh_init_oid_map();
634 bitmap_git->ext_index.positions = kh_init_oid_pos();
635
636 if (load_reverse_index(r, bitmap_git))
637 return -1;
638
639 if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) ||
640 !(bitmap_git->trees = read_bitmap_1(bitmap_git)) ||
641 !(bitmap_git->blobs = read_bitmap_1(bitmap_git)) ||
642 !(bitmap_git->tags = read_bitmap_1(bitmap_git)))
643 return -1;
644
645 if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0)
646 return -1;
647
648 if (bitmap_git->base) {
649 if (!bitmap_is_midx(bitmap_git))
650 BUG("non-MIDX bitmap has non-NULL base bitmap index");
651 if (load_bitmap(r, bitmap_git->base, 1) < 0)
652 return -1;
653 }
654
655 if (!recursing)
656 load_all_type_bitmaps(bitmap_git);
657
658 return 0;
659}
660
661static int open_pack_bitmap(struct repository *r,
662 struct bitmap_index *bitmap_git)
663{
664 struct packed_git *p;
665 int ret = -1;
666
667 for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) {
668 if (open_pack_bitmap_1(bitmap_git, p) == 0) {
669 ret = 0;
670 /*
671 * The only reason to keep looking is to report
672 * duplicates.
673 */
674 if (!trace2_is_enabled())
675 break;
676 }
677 }
678
679 return ret;
680}
681
682static int open_midx_bitmap(struct repository *r,
683 struct bitmap_index *bitmap_git)
684{
685 struct odb_source *source;
686 int ret = -1;
687
688 assert(!bitmap_git->map);
689
690 odb_prepare_alternates(r->objects);
691 for (source = r->objects->sources; source; source = source->next) {
692 struct multi_pack_index *midx = get_multi_pack_index(source);
693 if (midx && !open_midx_bitmap_1(bitmap_git, midx))
694 ret = 0;
695 }
696 return ret;
697}
698
699static int open_bitmap(struct repository *r,
700 struct bitmap_index *bitmap_git)
701{
702 int found;
703
704 assert(!bitmap_git->map);
705
706 found = !open_midx_bitmap(r, bitmap_git);
707
708 /*
709 * these will all be skipped if we opened a midx bitmap; but run it
710 * anyway if tracing is enabled to report the duplicates
711 */
712 if (!found || trace2_is_enabled())
713 found |= !open_pack_bitmap(r, bitmap_git);
714
715 return found ? 0 : -1;
716}
717
718struct bitmap_index *prepare_bitmap_git(struct repository *r)
719{
720 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
721
722 if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git, 0))
723 return bitmap_git;
724
725 free_bitmap_index(bitmap_git);
726 return NULL;
727}
728
729struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
730{
731 struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
732
733 if (!open_midx_bitmap_1(bitmap_git, midx))
734 return bitmap_git;
735
736 free_bitmap_index(bitmap_git);
737 return NULL;
738}
739
740int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack)
741{
742 for (; bitmap; bitmap = bitmap->base) {
743 if (bitmap_is_midx(bitmap)) {
744 for (size_t i = 0; i < bitmap->midx->num_packs; i++)
745 if (bitmap->midx->packs[i] == pack)
746 return 1;
747 } else if (bitmap->pack == pack) {
748 return 1;
749 }
750 }
751
752 return 0;
753}
754
755struct include_data {
756 struct bitmap_index *bitmap_git;
757 struct bitmap *base;
758 struct bitmap *seen;
759};
760
761struct bitmap_lookup_table_triplet {
762 uint32_t commit_pos;
763 uint64_t offset;
764 uint32_t xor_row;
765};
766
767struct bitmap_lookup_table_xor_item {
768 struct object_id oid;
769 uint64_t offset;
770};
771
772/*
773 * Given a `triplet` struct pointer and pointer `p`, this
774 * function reads the triplet beginning at `p` into the struct.
775 * Note that this function assumes that there is enough memory
776 * left for filling the `triplet` struct from `p`.
777 */
778static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet,
779 const unsigned char *p)
780{
781 if (!triplet)
782 return -1;
783
784 triplet->commit_pos = get_be32(p);
785 p += sizeof(uint32_t);
786 triplet->offset = get_be64(p);
787 p += sizeof(uint64_t);
788 triplet->xor_row = get_be32(p);
789 return 0;
790}
791
792/*
793 * This function gets the raw triplet from `row`'th row in the
794 * lookup table and fills that data to the `triplet`.
795 */
796static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git,
797 uint32_t pos,
798 struct bitmap_lookup_table_triplet *triplet)
799{
800 unsigned char *p = NULL;
801 if (pos >= bitmap_git->entry_count)
802 return error(_("corrupt bitmap lookup table: triplet position out of index"));
803
804 p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
805
806 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
807}
808
809/*
810 * Searches for a matching triplet. `commit_pos` is a pointer
811 * to the wanted commit position value. `table_entry` points to
812 * a triplet in lookup table. The first 4 bytes of each
813 * triplet (pointed by `table_entry`) are compared with `*commit_pos`.
814 */
815static int triplet_cmp(const void *commit_pos, const void *table_entry)
816{
817
818 uint32_t a = *(uint32_t *)commit_pos;
819 uint32_t b = get_be32(table_entry);
820 if (a > b)
821 return 1;
822 else if (a < b)
823 return -1;
824
825 return 0;
826}
827
828static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git,
829 struct object_id *oid,
830 uint32_t *result)
831{
832 int found;
833
834 if (bitmap_is_midx(bitmap_git))
835 found = bsearch_midx(oid, bitmap_git->midx, result);
836 else
837 found = bsearch_pack(oid, bitmap_git->pack, result);
838
839 return found;
840}
841
842/*
843 * `bsearch_triplet_by_pos` function searches for the raw triplet
844 * having commit position same as `commit_pos` and fills `triplet`
845 * object from the raw triplet. Returns 1 on success and 0 on
846 * failure.
847 */
848static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos,
849 struct bitmap_index *bitmap_git,
850 struct bitmap_lookup_table_triplet *triplet)
851{
852 unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count,
853 BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp);
854
855 if (!p)
856 return -1;
857
858 return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
859}
860
861static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git,
862 struct commit *commit)
863{
864 uint32_t commit_pos, xor_row;
865 uint64_t offset;
866 int flags;
867 struct bitmap_lookup_table_triplet triplet;
868 struct object_id *oid = &commit->object.oid;
869 struct ewah_bitmap *bitmap;
870 struct stored_bitmap *xor_bitmap = NULL;
871 const int bitmap_header_size = 6;
872 static struct bitmap_lookup_table_xor_item *xor_items = NULL;
873 static size_t xor_items_nr = 0, xor_items_alloc = 0;
874 static int is_corrupt = 0;
875 int xor_flags;
876 khiter_t hash_pos;
877 struct bitmap_lookup_table_xor_item *xor_item;
878 size_t entry_map_pos;
879
880 if (is_corrupt)
881 return NULL;
882
883 if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos))
884 return NULL;
885
886 if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0)
887 return NULL;
888
889 xor_items_nr = 0;
890 offset = triplet.offset;
891 xor_row = triplet.xor_row;
892
893 while (xor_row != 0xffffffff) {
894 ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc);
895
896 if (xor_items_nr + 1 >= bitmap_git->entry_count) {
897 error(_("corrupt bitmap lookup table: xor chain exceeds entry count"));
898 goto corrupt;
899 }
900
901 if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0)
902 goto corrupt;
903
904 xor_item = &xor_items[xor_items_nr];
905 xor_item->offset = triplet.offset;
906
907 if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) {
908 error(_("corrupt bitmap lookup table: commit index %u out of range"),
909 triplet.commit_pos);
910 goto corrupt;
911 }
912
913 hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid);
914
915 /*
916 * If desired bitmap is already stored, we don't need
917 * to iterate further. Because we know that bitmaps
918 * that are needed to be parsed to parse this bitmap
919 * has already been stored. So, assign this stored bitmap
920 * to the xor_bitmap.
921 */
922 if (hash_pos < kh_end(bitmap_git->bitmaps) &&
923 (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos)))
924 break;
925 xor_items_nr++;
926 xor_row = triplet.xor_row;
927 }
928
929 while (xor_items_nr) {
930 xor_item = &xor_items[xor_items_nr - 1];
931 bitmap_git->map_pos = xor_item->offset;
932 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
933 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
934 oid_to_hex(&xor_item->oid));
935 goto corrupt;
936 }
937
938 entry_map_pos = bitmap_git->map_pos;
939 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
940 xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
941 bitmap = read_bitmap_1(bitmap_git);
942
943 if (!bitmap)
944 goto corrupt;
945
946 xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid,
947 xor_bitmap, xor_flags, entry_map_pos);
948 xor_items_nr--;
949 }
950
951 bitmap_git->map_pos = offset;
952 if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
953 error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
954 oid_to_hex(oid));
955 goto corrupt;
956 }
957
958 /*
959 * Don't bother reading the commit's index position or its xor
960 * offset:
961 *
962 * - The commit's index position is irrelevant to us, since
963 * load_bitmap_entries_v1 only uses it to learn the object
964 * id which is used to compute the hashmap's key. We already
965 * have an object id, so no need to look it up again.
966 *
967 * - The xor_offset is unusable for us, since it specifies how
968 * many entries previous to ours we should look at. This
969 * makes sense when reading the bitmaps sequentially (as in
970 * load_bitmap_entries_v1()), since we can keep track of
971 * each bitmap as we read them.
972 *
973 * But it can't work for us, since the bitmap's don't have a
974 * fixed size. So we learn the position of the xor'd bitmap
975 * from the commit table (and resolve it to a bitmap in the
976 * above if-statement).
977 *
978 * Instead, we can skip ahead and immediately read the flags and
979 * ewah bitmap.
980 */
981 entry_map_pos = bitmap_git->map_pos;
982 bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
983 flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
984 bitmap = read_bitmap_1(bitmap_git);
985
986 if (!bitmap)
987 goto corrupt;
988
989 return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags,
990 entry_map_pos);
991
992corrupt:
993 free(xor_items);
994 is_corrupt = 1;
995 return NULL;
996}
997
998static struct ewah_bitmap *find_bitmap_for_commit(struct bitmap_index *bitmap_git,
999 struct commit *commit,
1000 struct bitmap_index **found)
1001{
1002 khiter_t hash_pos;
1003 if (!bitmap_git)
1004 return NULL;
1005
1006 hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid);
1007 if (hash_pos >= kh_end(bitmap_git->bitmaps)) {
1008 struct stored_bitmap *bitmap = NULL;
1009 if (!bitmap_git->table_lookup)
1010 return find_bitmap_for_commit(bitmap_git->base, commit,
1011 found);
1012
1013 /* this is a fairly hot codepath - no trace2_region please */
1014 /* NEEDSWORK: cache misses aren't recorded */
1015 bitmap = lazy_bitmap_for_commit(bitmap_git, commit);
1016 if (!bitmap)
1017 return find_bitmap_for_commit(bitmap_git->base, commit,
1018 found);
1019 if (found)
1020 *found = bitmap_git;
1021 return lookup_stored_bitmap(bitmap);
1022 }
1023 if (found)
1024 *found = bitmap_git;
1025 return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
1026}
1027
1028struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
1029 struct commit *commit)
1030{
1031 return find_bitmap_for_commit(bitmap_git, commit, NULL);
1032}
1033
1034static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
1035 const struct object_id *oid)
1036{
1037 kh_oid_pos_t *positions = bitmap_git->ext_index.positions;
1038 khiter_t pos = kh_get_oid_pos(positions, *oid);
1039
1040 if (pos < kh_end(positions)) {
1041 int bitmap_pos = kh_value(positions, pos);
1042 return bitmap_pos + bitmap_num_objects_total(bitmap_git);
1043 }
1044
1045 return -1;
1046}
1047
1048static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
1049 const struct object_id *oid)
1050{
1051 uint32_t pos;
1052 off_t offset = find_pack_entry_one(oid, bitmap_git->pack);
1053 if (!offset)
1054 return -1;
1055
1056 if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0)
1057 return -1;
1058 return pos;
1059}
1060
1061static int bitmap_position_midx(struct bitmap_index *bitmap_git,
1062 const struct object_id *oid)
1063{
1064 uint32_t want, got;
1065 if (!bsearch_midx(oid, bitmap_git->midx, &want))
1066 return -1;
1067
1068 if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0)
1069 return -1;
1070 return got;
1071}
1072
1073static int bitmap_position(struct bitmap_index *bitmap_git,
1074 const struct object_id *oid)
1075{
1076 int pos;
1077 if (bitmap_is_midx(bitmap_git))
1078 pos = bitmap_position_midx(bitmap_git, oid);
1079 else
1080 pos = bitmap_position_packfile(bitmap_git, oid);
1081 return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid);
1082}
1083
1084static int ext_index_add_object(struct bitmap_index *bitmap_git,
1085 struct object *object, const char *name)
1086{
1087 struct eindex *eindex = &bitmap_git->ext_index;
1088
1089 khiter_t hash_pos;
1090 int hash_ret;
1091 int bitmap_pos;
1092
1093 hash_pos = kh_put_oid_pos(eindex->positions, object->oid, &hash_ret);
1094 if (hash_ret > 0) {
1095 if (eindex->count >= eindex->alloc) {
1096 eindex->alloc = (eindex->alloc + 16) * 3 / 2;
1097 REALLOC_ARRAY(eindex->objects, eindex->alloc);
1098 REALLOC_ARRAY(eindex->hashes, eindex->alloc);
1099 }
1100
1101 bitmap_pos = eindex->count;
1102 eindex->objects[eindex->count] = object;
1103 eindex->hashes[eindex->count] = pack_name_hash(name);
1104 kh_value(eindex->positions, hash_pos) = bitmap_pos;
1105 eindex->count++;
1106 } else {
1107 bitmap_pos = kh_value(eindex->positions, hash_pos);
1108 }
1109
1110 return bitmap_pos + bitmap_num_objects_total(bitmap_git);
1111}
1112
1113struct bitmap_show_data {
1114 struct bitmap_index *bitmap_git;
1115 struct bitmap *base;
1116};
1117
1118static void show_object(struct object *object, const char *name, void *data_)
1119{
1120 struct bitmap_show_data *data = data_;
1121 int bitmap_pos;
1122
1123 bitmap_pos = bitmap_position(data->bitmap_git, &object->oid);
1124
1125 if (bitmap_pos < 0)
1126 bitmap_pos = ext_index_add_object(data->bitmap_git, object,
1127 name);
1128
1129 bitmap_set(data->base, bitmap_pos);
1130}
1131
1132static void show_commit(struct commit *commit UNUSED,
1133 void *data UNUSED)
1134{
1135}
1136
1137static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git,
1138 struct bitmap *result,
1139 struct commit *commit,
1140 uint32_t commit_pos)
1141{
1142 struct bitmap_index *curr = bitmap_git;
1143 int ret = 0;
1144
1145 while (curr) {
1146 ret += apply_pseudo_merges_for_commit(&curr->pseudo_merges,
1147 result, commit,
1148 commit_pos);
1149 curr = curr->base;
1150 }
1151
1152 if (ret)
1153 pseudo_merges_satisfied_nr += ret;
1154
1155 return ret;
1156}
1157
1158static int add_to_include_set(struct bitmap_index *bitmap_git,
1159 struct include_data *data,
1160 struct commit *commit,
1161 int bitmap_pos)
1162{
1163 struct ewah_bitmap *partial;
1164
1165 if (data->seen && bitmap_get(data->seen, bitmap_pos))
1166 return 0;
1167
1168 if (bitmap_get(data->base, bitmap_pos))
1169 return 0;
1170
1171 partial = bitmap_for_commit(bitmap_git, commit);
1172 if (partial) {
1173 existing_bitmaps_hits_nr++;
1174
1175 bitmap_or_ewah(data->base, partial);
1176 return 0;
1177 }
1178
1179 existing_bitmaps_misses_nr++;
1180
1181 bitmap_set(data->base, bitmap_pos);
1182 if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit,
1183 bitmap_pos))
1184 return 0;
1185
1186 return 1;
1187}
1188
1189static int should_include(struct commit *commit, void *_data)
1190{
1191 struct include_data *data = _data;
1192 int bitmap_pos;
1193
1194 bitmap_pos = bitmap_position(data->bitmap_git, &commit->object.oid);
1195 if (bitmap_pos < 0)
1196 bitmap_pos = ext_index_add_object(data->bitmap_git,
1197 (struct object *)commit,
1198 NULL);
1199
1200 if (!add_to_include_set(data->bitmap_git, data, commit, bitmap_pos)) {
1201 struct commit_list *parent = commit->parents;
1202
1203 while (parent) {
1204 parent->item->object.flags |= SEEN;
1205 parent = parent->next;
1206 }
1207
1208 return 0;
1209 }
1210
1211 return 1;
1212}
1213
1214static int should_include_obj(struct object *obj, void *_data)
1215{
1216 struct include_data *data = _data;
1217 int bitmap_pos;
1218
1219 bitmap_pos = bitmap_position(data->bitmap_git, &obj->oid);
1220 if (bitmap_pos < 0)
1221 return 1;
1222 if ((data->seen && bitmap_get(data->seen, bitmap_pos)) ||
1223 bitmap_get(data->base, bitmap_pos)) {
1224 obj->flags |= SEEN;
1225 return 0;
1226 }
1227 return 1;
1228}
1229
1230static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
1231 struct bitmap **base,
1232 struct commit *commit)
1233{
1234 struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
1235
1236 if (!or_with) {
1237 existing_bitmaps_misses_nr++;
1238 return 0;
1239 }
1240
1241 existing_bitmaps_hits_nr++;
1242
1243 if (!*base)
1244 *base = ewah_to_bitmap(or_with);
1245 else
1246 bitmap_or_ewah(*base, or_with);
1247
1248 return 1;
1249}
1250
1251static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
1252 struct rev_info *revs,
1253 struct bitmap *base,
1254 struct bitmap *seen)
1255{
1256 struct include_data incdata;
1257 struct bitmap_show_data show_data;
1258
1259 if (!base)
1260 base = bitmap_new();
1261
1262 incdata.bitmap_git = bitmap_git;
1263 incdata.base = base;
1264 incdata.seen = seen;
1265
1266 revs->include_check = should_include;
1267 revs->include_check_obj = should_include_obj;
1268 revs->include_check_data = &incdata;
1269
1270 if (prepare_revision_walk(revs))
1271 die(_("revision walk setup failed"));
1272
1273 show_data.bitmap_git = bitmap_git;
1274 show_data.base = base;
1275
1276 traverse_commit_list(revs, show_commit, show_object, &show_data);
1277
1278 revs->include_check = NULL;
1279 revs->include_check_obj = NULL;
1280 revs->include_check_data = NULL;
1281
1282 return base;
1283}
1284
1285struct bitmap_boundary_cb {
1286 struct bitmap_index *bitmap_git;
1287 struct bitmap *base;
1288
1289 struct object_array boundary;
1290};
1291
1292static void show_boundary_commit(struct commit *commit, void *_data)
1293{
1294 struct bitmap_boundary_cb *data = _data;
1295
1296 if (commit->object.flags & BOUNDARY)
1297 add_object_array(&commit->object, "", &data->boundary);
1298
1299 if (commit->object.flags & UNINTERESTING) {
1300 if (bitmap_walk_contains(data->bitmap_git, data->base,
1301 &commit->object.oid))
1302 return;
1303
1304 add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
1305 }
1306}
1307
1308static void show_boundary_object(struct object *object UNUSED,
1309 const char *name UNUSED,
1310 void *data UNUSED)
1311{
1312 BUG("should not be called");
1313}
1314
1315static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git,
1316 struct bitmap *result,
1317 struct bitmap *roots)
1318{
1319 int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges,
1320 result, roots);
1321 if (ret) {
1322 pseudo_merges_cascades_nr++;
1323 pseudo_merges_satisfied_nr += ret;
1324 }
1325
1326 return ret;
1327}
1328
1329static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
1330 struct rev_info *revs,
1331 struct object_list *roots)
1332{
1333 struct bitmap_boundary_cb cb;
1334 struct object_list *root;
1335 struct repository *repo;
1336 unsigned int i;
1337 unsigned int tmp_blobs, tmp_trees, tmp_tags;
1338 int any_missing = 0;
1339 int existing_bitmaps = 0;
1340
1341 cb.bitmap_git = bitmap_git;
1342 cb.base = bitmap_new();
1343 object_array_init(&cb.boundary);
1344
1345 repo = bitmap_repo(bitmap_git);
1346
1347 revs->ignore_missing_links = 1;
1348
1349 if (bitmap_git->pseudo_merges.nr) {
1350 struct bitmap *roots_bitmap = bitmap_new();
1351 struct object_list *objects = NULL;
1352
1353 for (objects = roots; objects; objects = objects->next) {
1354 struct object *object = objects->item;
1355 int pos;
1356
1357 pos = bitmap_position(bitmap_git, &object->oid);
1358 if (pos < 0)
1359 continue;
1360
1361 bitmap_set(roots_bitmap, pos);
1362 }
1363
1364 cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap);
1365 bitmap_free(roots_bitmap);
1366 }
1367
1368 /*
1369 * OR in any existing reachability bitmaps among `roots` into
1370 * `cb.base`.
1371 */
1372 for (root = roots; root; root = root->next) {
1373 struct object *object = root->item;
1374 if (object->type != OBJ_COMMIT ||
1375 bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
1376 continue;
1377
1378 if (add_commit_to_bitmap(bitmap_git, &cb.base,
1379 (struct commit *)object)) {
1380 existing_bitmaps = 1;
1381 continue;
1382 }
1383
1384 any_missing = 1;
1385 }
1386
1387 if (!any_missing)
1388 goto cleanup;
1389
1390 if (existing_bitmaps)
1391 cascade_pseudo_merges_1(bitmap_git, cb.base, NULL);
1392
1393 tmp_blobs = revs->blob_objects;
1394 tmp_trees = revs->tree_objects;
1395 tmp_tags = revs->tag_objects;
1396 revs->blob_objects = 0;
1397 revs->tree_objects = 0;
1398 revs->tag_objects = 0;
1399
1400 /*
1401 * We didn't have complete coverage of the roots. First setup a
1402 * revision walk to (a) OR in any bitmaps that are UNINTERESTING
1403 * between the tips and boundary, and (b) record the boundary.
1404 */
1405 trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
1406 if (prepare_revision_walk(revs))
1407 die("revision walk setup failed");
1408 trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
1409
1410 trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
1411 revs->boundary = 1;
1412 traverse_commit_list_filtered(revs,
1413 show_boundary_commit,
1414 show_boundary_object,
1415 &cb, NULL);
1416 revs->boundary = 0;
1417 trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
1418
1419 revs->blob_objects = tmp_blobs;
1420 revs->tree_objects = tmp_trees;
1421 revs->tag_objects = tmp_tags;
1422
1423 reset_revision_walk();
1424 clear_object_flags(repo, UNINTERESTING);
1425
1426 /*
1427 * Then add the boundary commit(s) as fill-in traversal tips.
1428 */
1429 trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
1430 for (i = 0; i < cb.boundary.nr; i++) {
1431 struct object *obj = cb.boundary.objects[i].item;
1432 if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
1433 obj->flags |= SEEN;
1434 else
1435 add_pending_object(revs, obj, "");
1436 }
1437 if (revs->pending.nr)
1438 cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
1439 trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
1440
1441cleanup:
1442 object_array_clear(&cb.boundary);
1443 revs->ignore_missing_links = 0;
1444
1445 return cb.base;
1446}
1447
1448struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
1449 struct commit *commit)
1450{
1451 struct commit_list *p;
1452 struct bitmap *parents;
1453 struct pseudo_merge *match = NULL;
1454
1455 if (!bitmap_git->pseudo_merges.nr)
1456 return NULL;
1457
1458 parents = bitmap_new();
1459
1460 for (p = commit->parents; p; p = p->next) {
1461 int pos = bitmap_position(bitmap_git, &p->item->object.oid);
1462 if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
1463 goto done;
1464
1465 /*
1466 * Use bitmap-relative positions instead of offsetting
1467 * by bitmap_git->num_objects_in_base because we use
1468 * this to find a match in pseudo_merge_for_parents(),
1469 * and pseudo-merge groups cannot span multiple bitmap
1470 * layers.
1471 */
1472 bitmap_set(parents, pos);
1473 }
1474
1475 match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, parents);
1476
1477done:
1478 bitmap_free(parents);
1479 if (match)
1480 return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
1481
1482 return NULL;
1483}
1484
1485static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
1486{
1487 uint32_t i;
1488 for (i = 0; i < bitmap_git->pseudo_merges.nr; i++)
1489 bitmap_git->pseudo_merges.v[i].satisfied = 0;
1490}
1491
1492static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
1493 struct rev_info *revs,
1494 struct object_list *roots,
1495 struct bitmap *seen)
1496{
1497 struct bitmap *base = NULL;
1498 int needs_walk = 0;
1499 unsigned existing_bitmaps = 0;
1500
1501 struct object_list *not_mapped = NULL;
1502
1503 unsatisfy_all_pseudo_merges(bitmap_git);
1504
1505 if (bitmap_git->pseudo_merges.nr) {
1506 struct bitmap *roots_bitmap = bitmap_new();
1507 struct object_list *objects = NULL;
1508
1509 for (objects = roots; objects; objects = objects->next) {
1510 struct object *object = objects->item;
1511 int pos;
1512
1513 pos = bitmap_position(bitmap_git, &object->oid);
1514 if (pos < 0)
1515 continue;
1516
1517 bitmap_set(roots_bitmap, pos);
1518 }
1519
1520 base = bitmap_new();
1521 cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap);
1522 bitmap_free(roots_bitmap);
1523 }
1524
1525 /*
1526 * Go through all the roots for the walk. The ones that have bitmaps
1527 * on the bitmap index will be `or`ed together to form an initial
1528 * global reachability analysis.
1529 *
1530 * The ones without bitmaps in the index will be stored in the
1531 * `not_mapped_list` for further processing.
1532 */
1533 while (roots) {
1534 struct object *object = roots->item;
1535
1536 roots = roots->next;
1537
1538 if (base) {
1539 int pos = bitmap_position(bitmap_git, &object->oid);
1540 if (pos > 0 && bitmap_get(base, pos)) {
1541 object->flags |= SEEN;
1542 continue;
1543 }
1544 }
1545
1546 if (object->type == OBJ_COMMIT &&
1547 add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
1548 object->flags |= SEEN;
1549 existing_bitmaps = 1;
1550 continue;
1551 }
1552
1553 object_list_insert(object, ¬_mapped);
1554 }
1555
1556 /*
1557 * Best case scenario: We found bitmaps for all the roots,
1558 * so the resulting `or` bitmap has the full reachability analysis
1559 */
1560 if (!not_mapped)
1561 return base;
1562
1563 roots = not_mapped;
1564
1565 if (existing_bitmaps)
1566 cascade_pseudo_merges_1(bitmap_git, base, NULL);
1567
1568 /*
1569 * Let's iterate through all the roots that don't have bitmaps to
1570 * check if we can determine them to be reachable from the existing
1571 * global bitmap.
1572 *
1573 * If we cannot find them in the existing global bitmap, we'll need
1574 * to push them to an actual walk and run it until we can confirm
1575 * they are reachable
1576 */
1577 while (roots) {
1578 struct object *object = roots->item;
1579 int pos;
1580
1581 roots = roots->next;
1582 pos = bitmap_position(bitmap_git, &object->oid);
1583
1584 if (pos < 0 || base == NULL || !bitmap_get(base, pos)) {
1585 object->flags &= ~UNINTERESTING;
1586 add_pending_object(revs, object, "");
1587 needs_walk = 1;
1588
1589 roots_without_bitmaps_nr++;
1590 } else {
1591 object->flags |= SEEN;
1592
1593 roots_with_bitmaps_nr++;
1594 }
1595 }
1596
1597 if (needs_walk) {
1598 /*
1599 * This fill-in traversal may walk over some objects
1600 * again, since we have already traversed in order to
1601 * find the boundary.
1602 *
1603 * But this extra walk should be extremely cheap, since
1604 * all commit objects are loaded into memory, and
1605 * because we skip walking to parents that are
1606 * UNINTERESTING, since it will be marked in the haves
1607 * bitmap already (or it has an on-disk bitmap, since
1608 * OR-ing it in covers all of its ancestors).
1609 */
1610 base = fill_in_bitmap(bitmap_git, revs, base, seen);
1611 }
1612
1613 object_list_free(¬_mapped);
1614
1615 return base;
1616}
1617
1618static void show_extended_objects(struct bitmap_index *bitmap_git,
1619 struct rev_info *revs,
1620 show_reachable_fn show_reach)
1621{
1622 struct bitmap *objects = bitmap_git->result;
1623 struct eindex *eindex = &bitmap_git->ext_index;
1624 uint32_t i;
1625
1626 for (i = 0; i < eindex->count; ++i) {
1627 struct object *obj;
1628
1629 if (!bitmap_get(objects,
1630 st_add(bitmap_num_objects_total(bitmap_git),
1631 i)))
1632 continue;
1633
1634 obj = eindex->objects[i];
1635 if ((obj->type == OBJ_BLOB && !revs->blob_objects) ||
1636 (obj->type == OBJ_TREE && !revs->tree_objects) ||
1637 (obj->type == OBJ_TAG && !revs->tag_objects))
1638 continue;
1639
1640 show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL);
1641 }
1642}
1643
1644static void init_type_iterator(struct ewah_or_iterator *it,
1645 struct bitmap_index *bitmap_git,
1646 enum object_type type)
1647{
1648 switch (type) {
1649 case OBJ_COMMIT:
1650 ewah_or_iterator_init(it, bitmap_git->commits_all,
1651 bitmap_git->base_nr + 1);
1652 break;
1653
1654 case OBJ_TREE:
1655 ewah_or_iterator_init(it, bitmap_git->trees_all,
1656 bitmap_git->base_nr + 1);
1657 break;
1658
1659 case OBJ_BLOB:
1660 ewah_or_iterator_init(it, bitmap_git->blobs_all,
1661 bitmap_git->base_nr + 1);
1662 break;
1663
1664 case OBJ_TAG:
1665 ewah_or_iterator_init(it, bitmap_git->tags_all,
1666 bitmap_git->base_nr + 1);
1667 break;
1668
1669 default:
1670 BUG("object type %d not stored by bitmap type index", type);
1671 break;
1672 }
1673}
1674
1675static void show_objects_for_type(
1676 struct bitmap_index *bitmap_git,
1677 struct bitmap *objects,
1678 enum object_type object_type,
1679 show_reachable_fn show_reach,
1680 void *payload)
1681{
1682 size_t i = 0;
1683 uint32_t offset;
1684
1685 struct ewah_or_iterator it;
1686 eword_t filter;
1687
1688 init_type_iterator(&it, bitmap_git, object_type);
1689
1690 for (i = 0; i < objects->word_alloc &&
1691 ewah_or_iterator_next(&filter, &it); i++) {
1692 eword_t word = objects->words[i] & filter;
1693 size_t pos = (i * BITS_IN_EWORD);
1694
1695 if (!word)
1696 continue;
1697
1698 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1699 struct packed_git *pack;
1700 struct object_id oid;
1701 uint32_t hash = 0, index_pos;
1702 off_t ofs;
1703
1704 if ((word >> offset) == 0)
1705 break;
1706
1707 offset += ewah_bit_ctz64(word >> offset);
1708
1709 if (bitmap_is_midx(bitmap_git)) {
1710 struct multi_pack_index *m = bitmap_git->midx;
1711 uint32_t pack_id;
1712
1713 index_pos = pack_pos_to_midx(m, pos + offset);
1714 ofs = nth_midxed_offset(m, index_pos);
1715 nth_midxed_object_oid(&oid, m, index_pos);
1716
1717 pack_id = nth_midxed_pack_int_id(m, index_pos);
1718 pack = nth_midxed_pack(bitmap_git->midx, pack_id);
1719 } else {
1720 index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
1721 ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
1722 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
1723
1724 pack = bitmap_git->pack;
1725 }
1726
1727 if (bitmap_git->hashes)
1728 hash = get_be32(bitmap_git->hashes + index_pos);
1729
1730 show_reach(&oid, object_type, 0, hash, pack, ofs, payload);
1731 }
1732 }
1733
1734 ewah_or_iterator_release(&it);
1735}
1736
1737static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
1738 struct object_list *roots)
1739{
1740 while (roots) {
1741 struct object *object = roots->item;
1742 roots = roots->next;
1743
1744 if (bitmap_is_midx(bitmap_git)) {
1745 if (bsearch_midx(&object->oid, bitmap_git->midx, NULL))
1746 return 1;
1747 } else {
1748 if (find_pack_entry_one(&object->oid, bitmap_git->pack) > 0)
1749 return 1;
1750 }
1751 }
1752
1753 return 0;
1754}
1755
1756static struct bitmap *find_tip_objects(struct bitmap_index *bitmap_git,
1757 struct object_list *tip_objects,
1758 enum object_type type)
1759{
1760 struct bitmap *result = bitmap_new();
1761 struct object_list *p;
1762
1763 for (p = tip_objects; p; p = p->next) {
1764 int pos;
1765
1766 if (p->item->type != type)
1767 continue;
1768
1769 pos = bitmap_position(bitmap_git, &p->item->oid);
1770 if (pos < 0)
1771 continue;
1772
1773 bitmap_set(result, pos);
1774 }
1775
1776 return result;
1777}
1778
1779static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git,
1780 struct object_list *tip_objects,
1781 struct bitmap *to_filter,
1782 enum object_type type)
1783{
1784 struct eindex *eindex = &bitmap_git->ext_index;
1785 struct bitmap *tips;
1786 struct ewah_or_iterator it;
1787 eword_t mask;
1788 uint32_t i;
1789
1790 /*
1791 * The non-bitmap version of this filter never removes
1792 * objects which the other side specifically asked for,
1793 * so we must match that behavior.
1794 */
1795 tips = find_tip_objects(bitmap_git, tip_objects, type);
1796
1797 /*
1798 * We can use the type-level bitmap for 'type' to work in whole
1799 * words for the objects that are actually in the bitmapped
1800 * packfile.
1801 */
1802 for (i = 0, init_type_iterator(&it, bitmap_git, type);
1803 i < to_filter->word_alloc && ewah_or_iterator_next(&mask, &it);
1804 i++) {
1805 if (i < tips->word_alloc)
1806 mask &= ~tips->words[i];
1807 to_filter->words[i] &= ~mask;
1808 }
1809
1810 /*
1811 * Clear any objects that weren't in the packfile (and so would
1812 * not have been caught by the loop above. We'll have to check
1813 * them individually.
1814 */
1815 for (i = 0; i < eindex->count; i++) {
1816 size_t pos = st_add(i, bitmap_num_objects_total(bitmap_git));
1817 if (eindex->objects[i]->type == type &&
1818 bitmap_get(to_filter, pos) &&
1819 !bitmap_get(tips, pos))
1820 bitmap_unset(to_filter, pos);
1821 }
1822
1823 ewah_or_iterator_release(&it);
1824 bitmap_free(tips);
1825}
1826
1827static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
1828 struct object_list *tip_objects,
1829 struct bitmap *to_filter)
1830{
1831 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1832 OBJ_BLOB);
1833}
1834
1835static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
1836 uint32_t pos)
1837{
1838 unsigned long size;
1839 struct object_info oi = OBJECT_INFO_INIT;
1840
1841 oi.sizep = &size;
1842
1843 if (pos < bitmap_num_objects_total(bitmap_git)) {
1844 struct packed_git *pack;
1845 off_t ofs;
1846
1847 if (bitmap_is_midx(bitmap_git)) {
1848 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos);
1849 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
1850
1851 pack = nth_midxed_pack(bitmap_git->midx, pack_id);
1852 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
1853 } else {
1854 pack = bitmap_git->pack;
1855 ofs = pack_pos_to_offset(pack, pos);
1856 }
1857
1858 if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
1859 &oi) < 0) {
1860 struct object_id oid;
1861 nth_bitmap_object_oid(bitmap_git, &oid,
1862 pack_pos_to_index(pack, pos));
1863 die(_("unable to get size of %s"), oid_to_hex(&oid));
1864 }
1865 } else {
1866 size_t eindex_pos = pos - bitmap_num_objects_total(bitmap_git);
1867 struct eindex *eindex = &bitmap_git->ext_index;
1868 struct object *obj = eindex->objects[eindex_pos];
1869 if (odb_read_object_info_extended(bitmap_repo(bitmap_git)->objects, &obj->oid,
1870 &oi, 0) < 0)
1871 die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
1872 }
1873
1874 return size;
1875}
1876
1877static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
1878 struct object_list *tip_objects,
1879 struct bitmap *to_filter,
1880 unsigned long limit)
1881{
1882 struct eindex *eindex = &bitmap_git->ext_index;
1883 struct bitmap *tips;
1884 struct ewah_or_iterator it;
1885 eword_t mask;
1886 uint32_t i;
1887
1888 tips = find_tip_objects(bitmap_git, tip_objects, OBJ_BLOB);
1889
1890 for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
1891 i < to_filter->word_alloc && ewah_or_iterator_next(&mask, &it);
1892 i++) {
1893 eword_t word = to_filter->words[i] & mask;
1894 unsigned offset;
1895
1896 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
1897 uint32_t pos;
1898
1899 if ((word >> offset) == 0)
1900 break;
1901 offset += ewah_bit_ctz64(word >> offset);
1902 pos = i * BITS_IN_EWORD + offset;
1903
1904 if (!bitmap_get(tips, pos) &&
1905 get_size_by_pos(bitmap_git, pos) >= limit)
1906 bitmap_unset(to_filter, pos);
1907 }
1908 }
1909
1910 for (i = 0; i < eindex->count; i++) {
1911 size_t pos = st_add(i, bitmap_num_objects(bitmap_git));
1912 if (eindex->objects[i]->type == OBJ_BLOB &&
1913 bitmap_get(to_filter, pos) &&
1914 !bitmap_get(tips, pos) &&
1915 get_size_by_pos(bitmap_git, pos) >= limit)
1916 bitmap_unset(to_filter, pos);
1917 }
1918
1919 ewah_or_iterator_release(&it);
1920 bitmap_free(tips);
1921}
1922
1923static void filter_bitmap_tree_depth(struct bitmap_index *bitmap_git,
1924 struct object_list *tip_objects,
1925 struct bitmap *to_filter,
1926 unsigned long limit)
1927{
1928 if (limit)
1929 BUG("filter_bitmap_tree_depth given non-zero limit");
1930
1931 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1932 OBJ_TREE);
1933 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter,
1934 OBJ_BLOB);
1935}
1936
1937static void filter_bitmap_object_type(struct bitmap_index *bitmap_git,
1938 struct object_list *tip_objects,
1939 struct bitmap *to_filter,
1940 enum object_type object_type)
1941{
1942 if (object_type < OBJ_COMMIT || object_type > OBJ_TAG)
1943 BUG("filter_bitmap_object_type given invalid object");
1944
1945 if (object_type != OBJ_TAG)
1946 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TAG);
1947 if (object_type != OBJ_COMMIT)
1948 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_COMMIT);
1949 if (object_type != OBJ_TREE)
1950 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_TREE);
1951 if (object_type != OBJ_BLOB)
1952 filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, OBJ_BLOB);
1953}
1954
1955static int filter_bitmap(struct bitmap_index *bitmap_git,
1956 struct object_list *tip_objects,
1957 struct bitmap *to_filter,
1958 struct list_objects_filter_options *filter)
1959{
1960 if (!filter || filter->choice == LOFC_DISABLED)
1961 return 0;
1962
1963 if (filter->choice == LOFC_BLOB_NONE) {
1964 if (bitmap_git)
1965 filter_bitmap_blob_none(bitmap_git, tip_objects,
1966 to_filter);
1967 return 0;
1968 }
1969
1970 if (filter->choice == LOFC_BLOB_LIMIT) {
1971 if (bitmap_git)
1972 filter_bitmap_blob_limit(bitmap_git, tip_objects,
1973 to_filter,
1974 filter->blob_limit_value);
1975 return 0;
1976 }
1977
1978 if (filter->choice == LOFC_TREE_DEPTH &&
1979 filter->tree_exclude_depth == 0) {
1980 if (bitmap_git)
1981 filter_bitmap_tree_depth(bitmap_git, tip_objects,
1982 to_filter,
1983 filter->tree_exclude_depth);
1984 return 0;
1985 }
1986
1987 if (filter->choice == LOFC_OBJECT_TYPE) {
1988 if (bitmap_git)
1989 filter_bitmap_object_type(bitmap_git, tip_objects,
1990 to_filter,
1991 filter->object_type);
1992 return 0;
1993 }
1994
1995 if (filter->choice == LOFC_COMBINE) {
1996 int i;
1997 for (i = 0; i < filter->sub_nr; i++) {
1998 if (filter_bitmap(bitmap_git, tip_objects, to_filter,
1999 &filter->sub[i]) < 0)
2000 return -1;
2001 }
2002 return 0;
2003 }
2004
2005 /* filter choice not handled */
2006 return -1;
2007}
2008
2009static int can_filter_bitmap(struct list_objects_filter_options *filter)
2010{
2011 return !filter_bitmap(NULL, NULL, NULL, filter);
2012}
2013
2014
2015static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
2016 struct bitmap *result)
2017{
2018 struct eindex *eindex = &bitmap_git->ext_index;
2019 uint32_t objects_nr;
2020 size_t i, pos;
2021
2022 objects_nr = bitmap_num_objects_total(bitmap_git);
2023 pos = objects_nr / BITS_IN_EWORD;
2024
2025 if (pos > result->word_alloc)
2026 pos = result->word_alloc;
2027
2028 memset(result->words, 0x00, sizeof(eword_t) * pos);
2029 for (i = pos * BITS_IN_EWORD; i < objects_nr; i++)
2030 bitmap_unset(result, i);
2031
2032 for (i = 0; i < eindex->count; ++i) {
2033 if (has_object_pack(bitmap_repo(bitmap_git),
2034 &eindex->objects[i]->oid))
2035 bitmap_unset(result, objects_nr + i);
2036 }
2037}
2038
2039int for_each_bitmapped_object(struct bitmap_index *bitmap_git,
2040 struct list_objects_filter_options *filter,
2041 show_reachable_fn show_reach,
2042 void *payload)
2043{
2044 struct bitmap *filtered_bitmap = NULL;
2045 uint32_t objects_nr;
2046 size_t full_word_count;
2047 int ret;
2048
2049 if (!can_filter_bitmap(filter)) {
2050 ret = -1;
2051 goto out;
2052 }
2053
2054 objects_nr = bitmap_num_objects(bitmap_git);
2055 full_word_count = objects_nr / BITS_IN_EWORD;
2056
2057 /* We start from the all-1 bitmap and then filter down from there. */
2058 filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD));
2059 memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words));
2060 for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++)
2061 bitmap_set(filtered_bitmap, i);
2062
2063 if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) {
2064 ret = -1;
2065 goto out;
2066 }
2067
2068 show_objects_for_type(bitmap_git, filtered_bitmap,
2069 OBJ_COMMIT, show_reach, payload);
2070 show_objects_for_type(bitmap_git, filtered_bitmap,
2071 OBJ_TREE, show_reach, payload);
2072 show_objects_for_type(bitmap_git, filtered_bitmap,
2073 OBJ_BLOB, show_reach, payload);
2074 show_objects_for_type(bitmap_git, filtered_bitmap,
2075 OBJ_TAG, show_reach, payload);
2076
2077 ret = 0;
2078out:
2079 bitmap_free(filtered_bitmap);
2080 return ret;
2081}
2082
2083struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
2084 int filter_provided_objects)
2085{
2086 unsigned int i;
2087 int use_boundary_traversal;
2088
2089 struct object_list *wants = NULL;
2090 struct object_list *haves = NULL;
2091
2092 struct bitmap *wants_bitmap = NULL;
2093 struct bitmap *haves_bitmap = NULL;
2094
2095 struct bitmap_index *bitmap_git;
2096 struct repository *repo;
2097
2098 /*
2099 * We can't do pathspec limiting with bitmaps, because we don't know
2100 * which commits are associated with which object changes (let alone
2101 * even which objects are associated with which paths).
2102 */
2103 if (revs->prune)
2104 return NULL;
2105
2106 if (!can_filter_bitmap(&revs->filter))
2107 return NULL;
2108
2109 /* try to open a bitmapped pack, but don't parse it yet
2110 * because we may not need to use it */
2111 CALLOC_ARRAY(bitmap_git, 1);
2112 if (open_bitmap(revs->repo, bitmap_git) < 0)
2113 goto cleanup;
2114
2115 for (i = 0; i < revs->pending.nr; ++i) {
2116 struct object *object = revs->pending.objects[i].item;
2117
2118 if (object->type == OBJ_NONE)
2119 parse_object_or_die(revs->repo, &object->oid, NULL);
2120
2121 while (object->type == OBJ_TAG) {
2122 struct tag *tag = (struct tag *) object;
2123
2124 if (object->flags & UNINTERESTING)
2125 object_list_insert(object, &haves);
2126 else
2127 object_list_insert(object, &wants);
2128
2129 object = parse_object_or_die(revs->repo, get_tagged_oid(tag), NULL);
2130 object->flags |= (tag->object.flags & UNINTERESTING);
2131 }
2132
2133 if (object->flags & UNINTERESTING)
2134 object_list_insert(object, &haves);
2135 else
2136 object_list_insert(object, &wants);
2137 }
2138
2139 use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
2140 if (use_boundary_traversal < 0) {
2141 prepare_repo_settings(revs->repo);
2142 use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
2143 }
2144
2145 if (!use_boundary_traversal) {
2146 /*
2147 * if we have a HAVES list, but none of those haves is contained
2148 * in the packfile that has a bitmap, we don't have anything to
2149 * optimize here
2150 */
2151 if (haves && !in_bitmapped_pack(bitmap_git, haves))
2152 goto cleanup;
2153 }
2154
2155 /* if we don't want anything, we're done here */
2156 if (!wants)
2157 goto cleanup;
2158
2159 /*
2160 * now we're going to use bitmaps, so load the actual bitmap entries
2161 * from disk. this is the point of no return; after this the rev_list
2162 * becomes invalidated and we must perform the revwalk through bitmaps
2163 */
2164 if (load_bitmap(revs->repo, bitmap_git, 0) < 0)
2165 goto cleanup;
2166
2167 if (!use_boundary_traversal)
2168 object_array_clear(&revs->pending);
2169
2170 repo = bitmap_repo(bitmap_git);
2171
2172 if (haves) {
2173 if (use_boundary_traversal) {
2174 trace2_region_enter("pack-bitmap", "haves/boundary", repo);
2175 haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
2176 trace2_region_leave("pack-bitmap", "haves/boundary", repo);
2177 } else {
2178 trace2_region_enter("pack-bitmap", "haves/classic", repo);
2179 revs->ignore_missing_links = 1;
2180 haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
2181 reset_revision_walk();
2182 revs->ignore_missing_links = 0;
2183 trace2_region_leave("pack-bitmap", "haves/classic", repo);
2184 }
2185
2186 if (!haves_bitmap)
2187 BUG("failed to perform bitmap walk");
2188 }
2189
2190 if (use_boundary_traversal) {
2191 object_array_clear(&revs->pending);
2192 reset_revision_walk();
2193 }
2194
2195 wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
2196
2197 if (!wants_bitmap)
2198 BUG("failed to perform bitmap walk");
2199
2200 if (haves_bitmap)
2201 bitmap_and_not(wants_bitmap, haves_bitmap);
2202
2203 filter_bitmap(bitmap_git,
2204 (revs->filter.choice && filter_provided_objects) ? NULL : wants,
2205 wants_bitmap,
2206 &revs->filter);
2207
2208 if (revs->unpacked)
2209 filter_packed_objects_from_bitmap(bitmap_git, wants_bitmap);
2210
2211 bitmap_git->result = wants_bitmap;
2212 bitmap_git->haves = haves_bitmap;
2213
2214 object_list_free(&wants);
2215 object_list_free(&haves);
2216
2217 trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
2218 pseudo_merges_satisfied_nr);
2219 trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
2220 pseudo_merges_cascades_nr);
2221 trace2_data_intmax("bitmap", repo, "bitmap/hits",
2222 existing_bitmaps_hits_nr);
2223 trace2_data_intmax("bitmap", repo, "bitmap/misses",
2224 existing_bitmaps_misses_nr);
2225 trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
2226 roots_with_bitmaps_nr);
2227 trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
2228 roots_without_bitmaps_nr);
2229
2230 return bitmap_git;
2231
2232cleanup:
2233 free_bitmap_index(bitmap_git);
2234 object_list_free(&wants);
2235 object_list_free(&haves);
2236 return NULL;
2237}
2238
2239/*
2240 * -1 means "stop trying further objects"; 0 means we may or may not have
2241 * reused, but you can keep feeding bits.
2242 */
2243static int try_partial_reuse(struct bitmap_index *bitmap_git,
2244 struct bitmapped_pack *pack,
2245 size_t bitmap_pos,
2246 uint32_t pack_pos,
2247 off_t offset,
2248 struct bitmap *reuse,
2249 struct pack_window **w_curs)
2250{
2251 off_t delta_obj_offset;
2252 enum object_type type;
2253 unsigned long size;
2254
2255 if (pack_pos >= pack->p->num_objects)
2256 return -1; /* not actually in the pack */
2257
2258 delta_obj_offset = offset;
2259 type = unpack_object_header(pack->p, w_curs, &offset, &size);
2260 if (type < 0)
2261 return -1; /* broken packfile, punt */
2262
2263 if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
2264 off_t base_offset;
2265 uint32_t base_pos;
2266 uint32_t base_bitmap_pos;
2267
2268 /*
2269 * Find the position of the base object so we can look it up
2270 * in our bitmaps. If we can't come up with an offset, or if
2271 * that offset is not in the revidx, the pack is corrupt.
2272 * There's nothing we can do, so just punt on this object,
2273 * and the normal slow path will complain about it in
2274 * more detail.
2275 */
2276 base_offset = get_delta_base(pack->p, w_curs, &offset, type,
2277 delta_obj_offset);
2278 if (!base_offset)
2279 return 0;
2280
2281 offset_to_pack_pos(pack->p, base_offset, &base_pos);
2282
2283 if (bitmap_is_midx(bitmap_git)) {
2284 /*
2285 * Cross-pack deltas are rejected for now, but could
2286 * theoretically be supported in the future.
2287 *
2288 * We would need to ensure that we're sending both
2289 * halves of the delta/base pair, regardless of whether
2290 * or not the two cross a pack boundary. If they do,
2291 * then we must convert the delta to an REF_DELTA to
2292 * refer back to the base in the other pack.
2293 * */
2294 if (midx_pair_to_pack_pos(bitmap_git->midx,
2295 pack->pack_int_id,
2296 base_offset,
2297 &base_bitmap_pos) < 0) {
2298 return 0;
2299 }
2300 } else {
2301 if (offset_to_pack_pos(pack->p, base_offset,
2302 &base_pos) < 0)
2303 return 0;
2304 /*
2305 * We assume delta dependencies always point backwards.
2306 * This lets us do a single pass, and is basically
2307 * always true due to the way OFS_DELTAs work. You would
2308 * not typically find REF_DELTA in a bitmapped pack,
2309 * since we only bitmap packs we write fresh, and
2310 * OFS_DELTA is the default). But let's double check to
2311 * make sure the pack wasn't written with odd
2312 * parameters.
2313 */
2314 if (base_pos >= pack_pos)
2315 return 0;
2316 base_bitmap_pos = pack->bitmap_pos + base_pos;
2317 }
2318
2319 /*
2320 * And finally, if we're not sending the base as part of our
2321 * reuse chunk, then don't send this object either. The base
2322 * would come after us, along with other objects not
2323 * necessarily in the pack, which means we'd need to convert
2324 * to REF_DELTA on the fly. Better to just let the normal
2325 * object_entry code path handle it.
2326 */
2327 if (!bitmap_get(reuse, base_bitmap_pos))
2328 return 0;
2329 }
2330
2331 /*
2332 * If we got here, then the object is OK to reuse. Mark it.
2333 */
2334 bitmap_set(reuse, bitmap_pos);
2335 return 0;
2336}
2337
2338static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git,
2339 struct bitmapped_pack *pack,
2340 struct bitmap *reuse)
2341{
2342 struct bitmap *result = bitmap_git->result;
2343 struct pack_window *w_curs = NULL;
2344 size_t pos = pack->bitmap_pos / BITS_IN_EWORD;
2345
2346 if (!pack->bitmap_pos) {
2347 /*
2348 * If we're processing the first (in the case of a MIDX, the
2349 * preferred pack) or the only (in the case of single-pack
2350 * bitmaps) pack, then we can reuse whole words at a time.
2351 *
2352 * This is because we know that any deltas in this range *must*
2353 * have their bases chosen from the same pack, since:
2354 *
2355 * - In the single pack case, there is no other pack to choose
2356 * them from.
2357 *
2358 * - In the MIDX case, the first pack is the preferred pack, so
2359 * all ties are broken in favor of that pack (i.e. the one
2360 * we're currently processing). So any duplicate bases will be
2361 * resolved in favor of the pack we're processing.
2362 */
2363 while (pos < result->word_alloc &&
2364 pos < pack->bitmap_nr / BITS_IN_EWORD &&
2365 result->words[pos] == (eword_t)~0)
2366 pos++;
2367 memset(reuse->words, 0xFF, pos * sizeof(eword_t));
2368 }
2369
2370 for (; pos < result->word_alloc; pos++) {
2371 eword_t word = result->words[pos];
2372 size_t offset;
2373
2374 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
2375 size_t bit_pos;
2376 uint32_t pack_pos;
2377 off_t ofs;
2378
2379 if (word >> offset == 0)
2380 break;
2381
2382 offset += ewah_bit_ctz64(word >> offset);
2383
2384 bit_pos = pos * BITS_IN_EWORD + offset;
2385 if (bit_pos < pack->bitmap_pos)
2386 continue;
2387 if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr)
2388 goto done;
2389
2390 if (bitmap_is_midx(bitmap_git)) {
2391 uint32_t midx_pos;
2392
2393 midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
2394 ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
2395
2396 if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0)
2397 BUG("could not find object in pack %s "
2398 "at offset %"PRIuMAX" in MIDX",
2399 pack_basename(pack->p), (uintmax_t)ofs);
2400 } else {
2401 pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos));
2402 if (pack_pos >= pack->p->num_objects)
2403 BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")",
2404 pack_basename(pack->p), (uintmax_t)pack_pos,
2405 pack->p->num_objects);
2406
2407 ofs = pack_pos_to_offset(pack->p, pack_pos);
2408 }
2409
2410 if (try_partial_reuse(bitmap_git, pack, bit_pos,
2411 pack_pos, ofs, reuse, &w_curs) < 0) {
2412 /*
2413 * try_partial_reuse indicated we couldn't reuse
2414 * any bits, so there is no point in trying more
2415 * bits in the current word, or any other words
2416 * in result.
2417 *
2418 * Jump out of both loops to avoid future
2419 * unnecessary calls to try_partial_reuse.
2420 */
2421 goto done;
2422 }
2423 }
2424 }
2425
2426done:
2427 unuse_pack(&w_curs);
2428}
2429
2430static int bitmapped_pack_cmp(const void *va, const void *vb)
2431{
2432 const struct bitmapped_pack *a = va;
2433 const struct bitmapped_pack *b = vb;
2434
2435 if (a->bitmap_pos < b->bitmap_pos)
2436 return -1;
2437 if (a->bitmap_pos > b->bitmap_pos)
2438 return 1;
2439 return 0;
2440}
2441
2442void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
2443 struct bitmapped_pack **packs_out,
2444 size_t *packs_nr_out,
2445 struct bitmap **reuse_out,
2446 int multi_pack_reuse)
2447{
2448 struct repository *r = bitmap_repo(bitmap_git);
2449 struct bitmapped_pack *packs = NULL;
2450 struct bitmap *result = bitmap_git->result;
2451 struct bitmap *reuse;
2452 size_t i;
2453 size_t packs_nr = 0, packs_alloc = 0;
2454 size_t word_alloc;
2455 uint32_t objects_nr = 0;
2456
2457 assert(result);
2458
2459 load_reverse_index(r, bitmap_git);
2460
2461 if (!bitmap_is_midx(bitmap_git) || !bitmap_git->midx->chunk_bitmapped_packs)
2462 multi_pack_reuse = 0;
2463
2464 if (multi_pack_reuse) {
2465 struct multi_pack_index *m = bitmap_git->midx;
2466 for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) {
2467 struct bitmapped_pack pack;
2468 if (nth_bitmapped_pack(bitmap_git->midx, &pack, i) < 0) {
2469 warning(_("unable to load pack: '%s', disabling pack-reuse"),
2470 bitmap_git->midx->pack_names[i]);
2471 free(packs);
2472 return;
2473 }
2474
2475 if (!pack.bitmap_nr)
2476 continue;
2477
2478 if (is_pack_valid(pack.p)) {
2479 ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
2480 memcpy(&packs[packs_nr++], &pack, sizeof(pack));
2481 }
2482
2483 objects_nr += pack.p->num_objects;
2484 }
2485
2486 QSORT(packs, packs_nr, bitmapped_pack_cmp);
2487 } else {
2488 struct packed_git *pack;
2489 uint32_t pack_int_id;
2490
2491 if (bitmap_is_midx(bitmap_git)) {
2492 struct multi_pack_index *m = bitmap_git->midx;
2493 uint32_t preferred_pack_pos;
2494
2495 while (m->base_midx)
2496 m = m->base_midx;
2497
2498 if (midx_preferred_pack(m, &preferred_pack_pos) < 0) {
2499 warning(_("unable to compute preferred pack, disabling pack-reuse"));
2500 return;
2501 }
2502
2503 pack = nth_midxed_pack(m, preferred_pack_pos);
2504 pack_int_id = preferred_pack_pos;
2505 } else {
2506 pack = bitmap_git->pack;
2507 /*
2508 * Any value for 'pack_int_id' will do here. When we
2509 * process the pack via try_partial_reuse(), we won't
2510 * use the `pack_int_id` field since we have a non-MIDX
2511 * bitmap.
2512 *
2513 * Use '-1' as a sentinel value to make it clear
2514 * that we do not expect to read this field.
2515 */
2516 pack_int_id = -1;
2517 }
2518
2519 if (is_pack_valid(pack)) {
2520 ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
2521 packs[packs_nr].p = pack;
2522 packs[packs_nr].pack_int_id = pack_int_id;
2523 packs[packs_nr].bitmap_nr = pack->num_objects;
2524 packs[packs_nr].bitmap_pos = 0;
2525 packs[packs_nr].from_midx = bitmap_git->midx;
2526 packs_nr++;
2527 }
2528
2529 objects_nr = pack->num_objects;
2530 }
2531
2532 if (!packs_nr)
2533 return;
2534
2535 word_alloc = objects_nr / BITS_IN_EWORD;
2536 if (objects_nr % BITS_IN_EWORD)
2537 word_alloc++;
2538 reuse = bitmap_word_alloc(word_alloc);
2539
2540 for (i = 0; i < packs_nr; i++)
2541 reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);
2542
2543 if (bitmap_is_empty(reuse)) {
2544 free(packs);
2545 bitmap_free(reuse);
2546 return;
2547 }
2548
2549 /*
2550 * Drop any reused objects from the result, since they will not
2551 * need to be handled separately.
2552 */
2553 bitmap_and_not(result, reuse);
2554 *packs_out = packs;
2555 *packs_nr_out = packs_nr;
2556 *reuse_out = reuse;
2557}
2558
2559int bitmap_walk_contains(struct bitmap_index *bitmap_git,
2560 struct bitmap *bitmap, const struct object_id *oid)
2561{
2562 int idx;
2563
2564 if (!bitmap)
2565 return 0;
2566
2567 idx = bitmap_position(bitmap_git, oid);
2568 return idx >= 0 && bitmap_get(bitmap, idx);
2569}
2570
2571void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
2572 struct rev_info *revs,
2573 show_reachable_fn show_reachable)
2574{
2575 assert(bitmap_git->result);
2576
2577 show_objects_for_type(bitmap_git, bitmap_git->result,
2578 OBJ_COMMIT, show_reachable, NULL);
2579 if (revs->tree_objects)
2580 show_objects_for_type(bitmap_git, bitmap_git->result,
2581 OBJ_TREE, show_reachable, NULL);
2582 if (revs->blob_objects)
2583 show_objects_for_type(bitmap_git, bitmap_git->result,
2584 OBJ_BLOB, show_reachable, NULL);
2585 if (revs->tag_objects)
2586 show_objects_for_type(bitmap_git, bitmap_git->result,
2587 OBJ_TAG, show_reachable, NULL);
2588
2589 show_extended_objects(bitmap_git, revs, show_reachable);
2590}
2591
2592static uint32_t count_object_type(struct bitmap_index *bitmap_git,
2593 enum object_type type)
2594{
2595 struct bitmap *objects = bitmap_git->result;
2596 struct eindex *eindex = &bitmap_git->ext_index;
2597
2598 uint32_t i = 0, count = 0;
2599 struct ewah_or_iterator it;
2600 eword_t filter;
2601
2602 init_type_iterator(&it, bitmap_git, type);
2603
2604 while (i < objects->word_alloc && ewah_or_iterator_next(&filter, &it)) {
2605 eword_t word = objects->words[i++] & filter;
2606 count += ewah_bit_popcount64(word);
2607 }
2608
2609 for (i = 0; i < eindex->count; ++i) {
2610 if (eindex->objects[i]->type == type &&
2611 bitmap_get(objects,
2612 st_add(bitmap_num_objects_total(bitmap_git), i)))
2613 count++;
2614 }
2615
2616 ewah_or_iterator_release(&it);
2617
2618 return count;
2619}
2620
2621void count_bitmap_commit_list(struct bitmap_index *bitmap_git,
2622 uint32_t *commits, uint32_t *trees,
2623 uint32_t *blobs, uint32_t *tags)
2624{
2625 assert(bitmap_git->result);
2626
2627 if (commits)
2628 *commits = count_object_type(bitmap_git, OBJ_COMMIT);
2629
2630 if (trees)
2631 *trees = count_object_type(bitmap_git, OBJ_TREE);
2632
2633 if (blobs)
2634 *blobs = count_object_type(bitmap_git, OBJ_BLOB);
2635
2636 if (tags)
2637 *tags = count_object_type(bitmap_git, OBJ_TAG);
2638}
2639
2640struct bitmap_test_data {
2641 struct bitmap_index *bitmap_git;
2642 struct bitmap *base;
2643 struct bitmap *commits;
2644 struct bitmap *trees;
2645 struct bitmap *blobs;
2646 struct bitmap *tags;
2647 struct progress *prg;
2648 size_t seen;
2649
2650 struct bitmap_test_data *base_tdata;
2651};
2652
2653static void test_bitmap_type(struct bitmap_test_data *tdata,
2654 struct object *obj, int pos)
2655{
2656 enum object_type bitmap_type = OBJ_NONE;
2657 int bitmaps_nr = 0;
2658
2659 if (bitmap_is_midx(tdata->bitmap_git)) {
2660 while (pos < tdata->bitmap_git->midx->num_objects_in_base)
2661 tdata = tdata->base_tdata;
2662 }
2663
2664 if (bitmap_get(tdata->commits, pos)) {
2665 bitmap_type = OBJ_COMMIT;
2666 bitmaps_nr++;
2667 }
2668 if (bitmap_get(tdata->trees, pos)) {
2669 bitmap_type = OBJ_TREE;
2670 bitmaps_nr++;
2671 }
2672 if (bitmap_get(tdata->blobs, pos)) {
2673 bitmap_type = OBJ_BLOB;
2674 bitmaps_nr++;
2675 }
2676 if (bitmap_get(tdata->tags, pos)) {
2677 bitmap_type = OBJ_TAG;
2678 bitmaps_nr++;
2679 }
2680
2681 if (bitmap_type == OBJ_NONE)
2682 die(_("object '%s' not found in type bitmaps"),
2683 oid_to_hex(&obj->oid));
2684
2685 if (bitmaps_nr > 1)
2686 die(_("object '%s' does not have a unique type"),
2687 oid_to_hex(&obj->oid));
2688
2689 if (bitmap_type != obj->type)
2690 die(_("object '%s': real type '%s', expected: '%s'"),
2691 oid_to_hex(&obj->oid),
2692 type_name(obj->type),
2693 type_name(bitmap_type));
2694}
2695
2696static void test_show_object(struct object *object,
2697 const char *name UNUSED,
2698 void *data)
2699{
2700 struct bitmap_test_data *tdata = data;
2701 int bitmap_pos;
2702
2703 bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid);
2704 if (bitmap_pos < 0)
2705 die(_("object not in bitmap: '%s'"), oid_to_hex(&object->oid));
2706 test_bitmap_type(tdata, object, bitmap_pos);
2707
2708 bitmap_set(tdata->base, bitmap_pos);
2709 display_progress(tdata->prg, ++tdata->seen);
2710}
2711
2712static void test_show_commit(struct commit *commit, void *data)
2713{
2714 struct bitmap_test_data *tdata = data;
2715 int bitmap_pos;
2716
2717 bitmap_pos = bitmap_position(tdata->bitmap_git,
2718 &commit->object.oid);
2719 if (bitmap_pos < 0)
2720 die(_("object not in bitmap: '%s'"), oid_to_hex(&commit->object.oid));
2721 test_bitmap_type(tdata, &commit->object, bitmap_pos);
2722
2723 bitmap_set(tdata->base, bitmap_pos);
2724 display_progress(tdata->prg, ++tdata->seen);
2725}
2726
2727static uint32_t bitmap_total_entry_count(struct bitmap_index *bitmap_git)
2728{
2729 uint32_t total = 0;
2730 do {
2731 total = st_add(total, bitmap_git->entry_count);
2732 bitmap_git = bitmap_git->base;
2733 } while (bitmap_git);
2734
2735 return total;
2736}
2737
2738static void bitmap_test_data_prepare(struct bitmap_test_data *tdata,
2739 struct bitmap_index *bitmap_git)
2740{
2741 memset(tdata, 0, sizeof(struct bitmap_test_data));
2742
2743 tdata->bitmap_git = bitmap_git;
2744 tdata->base = bitmap_new();
2745 tdata->commits = ewah_to_bitmap(bitmap_git->commits);
2746 tdata->trees = ewah_to_bitmap(bitmap_git->trees);
2747 tdata->blobs = ewah_to_bitmap(bitmap_git->blobs);
2748 tdata->tags = ewah_to_bitmap(bitmap_git->tags);
2749
2750 if (bitmap_git->base) {
2751 tdata->base_tdata = xmalloc(sizeof(struct bitmap_test_data));
2752 bitmap_test_data_prepare(tdata->base_tdata, bitmap_git->base);
2753 }
2754}
2755
2756static void bitmap_test_data_release(struct bitmap_test_data *tdata)
2757{
2758 if (!tdata)
2759 return;
2760
2761 bitmap_test_data_release(tdata->base_tdata);
2762 free(tdata->base_tdata);
2763
2764 bitmap_free(tdata->base);
2765 bitmap_free(tdata->commits);
2766 bitmap_free(tdata->trees);
2767 bitmap_free(tdata->blobs);
2768 bitmap_free(tdata->tags);
2769}
2770
2771void test_bitmap_walk(struct rev_info *revs)
2772{
2773 struct object *root;
2774 struct bitmap *result = NULL;
2775 size_t result_popcnt;
2776 struct bitmap_test_data tdata;
2777 struct bitmap_index *bitmap_git, *found;
2778 struct ewah_bitmap *bm;
2779
2780 if (!(bitmap_git = prepare_bitmap_git(revs->repo)))
2781 die(_("failed to load bitmap indexes"));
2782
2783 if (revs->pending.nr != 1)
2784 die(_("you must specify exactly one commit to test"));
2785
2786 fprintf_ln(stderr, "Bitmap v%d test (%d entries%s, %d total)",
2787 bitmap_git->version,
2788 bitmap_git->entry_count,
2789 bitmap_git->table_lookup ? "" : " loaded",
2790 bitmap_total_entry_count(bitmap_git));
2791
2792 root = revs->pending.objects[0].item;
2793 bm = find_bitmap_for_commit(bitmap_git, (struct commit *)root, &found);
2794
2795 if (bm) {
2796 fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum",
2797 oid_to_hex(&root->oid),
2798 (int)bm->bit_size, ewah_checksum(bm));
2799
2800 if (bitmap_is_midx(found))
2801 fprintf_ln(stderr, "Located via MIDX '%s'.",
2802 hash_to_hex_algop(get_midx_checksum(found->midx),
2803 revs->repo->hash_algo));
2804 else
2805 fprintf_ln(stderr, "Located via pack '%s'.",
2806 hash_to_hex_algop(found->pack->hash,
2807 revs->repo->hash_algo));
2808
2809 result = ewah_to_bitmap(bm);
2810 }
2811
2812 if (!result)
2813 die(_("commit '%s' doesn't have an indexed bitmap"), oid_to_hex(&root->oid));
2814
2815 revs->tag_objects = 1;
2816 revs->tree_objects = 1;
2817 revs->blob_objects = 1;
2818
2819 result_popcnt = bitmap_popcount(result);
2820
2821 if (prepare_revision_walk(revs))
2822 die(_("revision walk setup failed"));
2823
2824 bitmap_test_data_prepare(&tdata, bitmap_git);
2825 tdata.prg = start_progress(revs->repo,
2826 "Verifying bitmap entries",
2827 result_popcnt);
2828
2829 traverse_commit_list(revs, &test_show_commit, &test_show_object, &tdata);
2830
2831 stop_progress(&tdata.prg);
2832
2833 if (bitmap_equals(result, tdata.base))
2834 fprintf_ln(stderr, "OK!");
2835 else
2836 die(_("mismatch in bitmap results"));
2837
2838 bitmap_free(result);
2839 bitmap_test_data_release(&tdata);
2840 free_bitmap_index(bitmap_git);
2841}
2842
2843int test_bitmap_commits(struct repository *r)
2844{
2845 struct object_id oid;
2846 MAYBE_UNUSED void *value;
2847 struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
2848
2849 if (!bitmap_git)
2850 die(_("failed to load bitmap indexes"));
2851
2852 /*
2853 * Since this function needs to print the bitmapped
2854 * commits, bypass the commit lookup table (if one exists)
2855 * by forcing the bitmap to eagerly load its entries.
2856 */
2857 if (bitmap_git->table_lookup) {
2858 if (load_bitmap_entries_v1(bitmap_git) < 0)
2859 die(_("failed to load bitmap indexes"));
2860 }
2861
2862 kh_foreach(bitmap_git->bitmaps, oid, value, {
2863 printf_ln("%s", oid_to_hex(&oid));
2864 });
2865
2866 free_bitmap_index(bitmap_git);
2867
2868 return 0;
2869}
2870
2871int test_bitmap_commits_with_offset(struct repository *r)
2872{
2873 struct object_id oid;
2874 struct stored_bitmap *stored;
2875 struct bitmap_index *bitmap_git;
2876 size_t commit_idx_pos_map_pos, xor_offset_map_pos, flag_map_pos,
2877 ewah_bitmap_map_pos;
2878
2879 bitmap_git = prepare_bitmap_git(r);
2880 if (!bitmap_git)
2881 die(_("failed to load bitmap indexes"));
2882
2883 /*
2884 * Since this function needs to know the position of each individual
2885 * bitmap, bypass the commit lookup table (if one exists) by forcing
2886 * the bitmap to eagerly load its entries.
2887 */
2888 if (bitmap_git->table_lookup) {
2889 if (load_bitmap_entries_v1(bitmap_git) < 0)
2890 die(_("failed to load bitmap indexes"));
2891 }
2892
2893 kh_foreach (bitmap_git->bitmaps, oid, stored, {
2894 commit_idx_pos_map_pos = stored->map_pos;
2895 xor_offset_map_pos = stored->map_pos + sizeof(uint32_t);
2896 flag_map_pos = xor_offset_map_pos + sizeof(uint8_t);
2897 ewah_bitmap_map_pos = flag_map_pos + sizeof(uint8_t);
2898
2899 printf_ln("%s %"PRIuMAX" %"PRIuMAX" %"PRIuMAX" %"PRIuMAX,
2900 oid_to_hex(&oid),
2901 (uintmax_t)commit_idx_pos_map_pos,
2902 (uintmax_t)xor_offset_map_pos,
2903 (uintmax_t)flag_map_pos,
2904 (uintmax_t)ewah_bitmap_map_pos);
2905 })
2906 ;
2907
2908 free_bitmap_index(bitmap_git);
2909
2910 return 0;
2911}
2912
2913int test_bitmap_hashes(struct repository *r)
2914{
2915 struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
2916 struct object_id oid;
2917 uint32_t i, index_pos;
2918
2919 if (!bitmap_git || !bitmap_git->hashes)
2920 goto cleanup;
2921
2922 for (i = 0; i < bitmap_num_objects(bitmap_git); i++) {
2923 if (bitmap_is_midx(bitmap_git))
2924 index_pos = pack_pos_to_midx(bitmap_git->midx, i);
2925 else
2926 index_pos = pack_pos_to_index(bitmap_git->pack, i);
2927
2928 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
2929
2930 printf_ln("%s %"PRIu32"",
2931 oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos));
2932 }
2933
2934cleanup:
2935 free_bitmap_index(bitmap_git);
2936
2937 return 0;
2938}
2939
2940static void bit_pos_to_object_id(struct bitmap_index *bitmap_git,
2941 uint32_t bit_pos,
2942 struct object_id *oid)
2943{
2944 uint32_t index_pos;
2945
2946 if (bitmap_is_midx(bitmap_git))
2947 index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
2948 else
2949 index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos);
2950
2951 nth_bitmap_object_oid(bitmap_git, oid, index_pos);
2952}
2953
2954int test_bitmap_pseudo_merges(struct repository *r)
2955{
2956 struct bitmap_index *bitmap_git;
2957 uint32_t i;
2958
2959 bitmap_git = prepare_bitmap_git(r);
2960 if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
2961 goto cleanup;
2962
2963 for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) {
2964 struct pseudo_merge *merge;
2965 struct ewah_bitmap *commits_bitmap, *merge_bitmap;
2966
2967 merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
2968 &bitmap_git->pseudo_merges.v[i]);
2969 commits_bitmap = merge->commits;
2970 merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
2971 merge);
2972
2973 printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n",
2974 (uintmax_t)merge->at,
2975 (uintmax_t)ewah_bitmap_popcount(commits_bitmap),
2976 (uintmax_t)ewah_bitmap_popcount(merge_bitmap));
2977 }
2978
2979cleanup:
2980 free_bitmap_index(bitmap_git);
2981 return 0;
2982}
2983
2984static void dump_ewah_object_ids(struct bitmap_index *bitmap_git,
2985 struct ewah_bitmap *bitmap)
2986
2987{
2988 struct ewah_iterator it;
2989 eword_t word;
2990 uint32_t pos = 0;
2991
2992 ewah_iterator_init(&it, bitmap);
2993
2994 while (ewah_iterator_next(&word, &it)) {
2995 struct object_id oid;
2996 uint32_t offset;
2997
2998 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
2999 if (!(word >> offset))
3000 break;
3001
3002 offset += ewah_bit_ctz64(word >> offset);
3003
3004 bit_pos_to_object_id(bitmap_git, pos + offset, &oid);
3005 printf("%s\n", oid_to_hex(&oid));
3006 }
3007 pos += BITS_IN_EWORD;
3008 }
3009}
3010
3011int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n)
3012{
3013 struct bitmap_index *bitmap_git;
3014 struct pseudo_merge *merge;
3015 int ret = 0;
3016
3017 bitmap_git = prepare_bitmap_git(r);
3018 if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
3019 goto cleanup;
3020
3021 if (n >= bitmap_git->pseudo_merges.nr) {
3022 ret = error(_("pseudo-merge index out of range "
3023 "(%"PRIu32" >= %"PRIuMAX")"),
3024 n, (uintmax_t)bitmap_git->pseudo_merges.nr);
3025 goto cleanup;
3026 }
3027
3028 merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
3029 &bitmap_git->pseudo_merges.v[n]);
3030 dump_ewah_object_ids(bitmap_git, merge->commits);
3031
3032cleanup:
3033 free_bitmap_index(bitmap_git);
3034 return ret;
3035}
3036
3037int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n)
3038{
3039 struct bitmap_index *bitmap_git;
3040 struct pseudo_merge *merge;
3041 int ret = 0;
3042
3043 bitmap_git = prepare_bitmap_git(r);
3044 if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
3045 goto cleanup;
3046
3047 if (n >= bitmap_git->pseudo_merges.nr) {
3048 ret = error(_("pseudo-merge index out of range "
3049 "(%"PRIu32" >= %"PRIuMAX")"),
3050 n, (uintmax_t)bitmap_git->pseudo_merges.nr);
3051 goto cleanup;
3052 }
3053
3054 merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
3055 &bitmap_git->pseudo_merges.v[n]);
3056
3057 dump_ewah_object_ids(bitmap_git,
3058 pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
3059 merge));
3060
3061cleanup:
3062 free_bitmap_index(bitmap_git);
3063 return ret;
3064}
3065
3066int rebuild_bitmap(const uint32_t *reposition,
3067 struct ewah_bitmap *source,
3068 struct bitmap *dest)
3069{
3070 uint32_t pos = 0;
3071 struct ewah_iterator it;
3072 eword_t word;
3073
3074 ewah_iterator_init(&it, source);
3075
3076 while (ewah_iterator_next(&word, &it)) {
3077 uint32_t offset, bit_pos;
3078
3079 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
3080 if ((word >> offset) == 0)
3081 break;
3082
3083 offset += ewah_bit_ctz64(word >> offset);
3084
3085 bit_pos = reposition[pos + offset];
3086 if (bit_pos > 0)
3087 bitmap_set(dest, bit_pos - 1);
3088 else /* can't reuse, we don't have the object */
3089 return -1;
3090 }
3091
3092 pos += BITS_IN_EWORD;
3093 }
3094 return 0;
3095}
3096
3097uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
3098 struct packing_data *mapping)
3099{
3100 struct repository *r = bitmap_repo(bitmap_git);
3101 uint32_t i, num_objects;
3102 uint32_t *reposition;
3103
3104 if (!bitmap_is_midx(bitmap_git))
3105 load_reverse_index(r, bitmap_git);
3106 else if (load_midx_revindex(bitmap_git->midx))
3107 BUG("rebuild_existing_bitmaps: missing required rev-cache "
3108 "extension");
3109
3110 num_objects = bitmap_num_objects_total(bitmap_git);
3111 CALLOC_ARRAY(reposition, num_objects);
3112
3113 for (i = 0; i < num_objects; ++i) {
3114 struct object_id oid;
3115 struct object_entry *oe;
3116 uint32_t index_pos;
3117
3118 if (bitmap_is_midx(bitmap_git))
3119 index_pos = pack_pos_to_midx(bitmap_git->midx, i);
3120 else
3121 index_pos = pack_pos_to_index(bitmap_git->pack, i);
3122 nth_bitmap_object_oid(bitmap_git, &oid, index_pos);
3123 oe = packlist_find(mapping, &oid);
3124
3125 if (oe) {
3126 reposition[i] = oe_in_pack_pos(mapping, oe) + 1;
3127 if (bitmap_git->hashes && !oe->hash)
3128 oe->hash = get_be32(bitmap_git->hashes + index_pos);
3129 }
3130 }
3131
3132 return reposition;
3133}
3134
3135void free_bitmap_index(struct bitmap_index *b)
3136{
3137 if (!b)
3138 return;
3139
3140 if (b->map)
3141 munmap(b->map, b->map_size);
3142 ewah_pool_free(b->commits);
3143 ewah_pool_free(b->trees);
3144 ewah_pool_free(b->blobs);
3145 ewah_pool_free(b->tags);
3146 free(b->commits_all);
3147 free(b->trees_all);
3148 free(b->blobs_all);
3149 free(b->tags_all);
3150 if (b->bitmaps) {
3151 struct stored_bitmap *sb;
3152 kh_foreach_value(b->bitmaps, sb, {
3153 ewah_pool_free(sb->root);
3154 free(sb);
3155 });
3156 }
3157 kh_destroy_oid_map(b->bitmaps);
3158 free(b->ext_index.objects);
3159 free(b->ext_index.hashes);
3160 kh_destroy_oid_pos(b->ext_index.positions);
3161 bitmap_free(b->result);
3162 bitmap_free(b->haves);
3163 if (bitmap_is_midx(b)) {
3164 /*
3165 * Multi-pack bitmaps need to have resources associated with
3166 * their on-disk reverse indexes unmapped so that stale .rev and
3167 * .bitmap files can be removed.
3168 *
3169 * Unlike pack-based bitmaps, multi-pack bitmaps can be read and
3170 * written in the same 'git multi-pack-index write --bitmap'
3171 * process. Close resources so they can be removed safely on
3172 * platforms like Windows.
3173 */
3174 close_midx_revindex(b->midx);
3175 }
3176 free_pseudo_merge_map(&b->pseudo_merges);
3177 free_bitmap_index(b->base);
3178 free(b);
3179}
3180
3181int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
3182 const struct object_id *oid)
3183{
3184 return bitmap_git &&
3185 bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid);
3186}
3187
3188static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
3189 enum object_type object_type)
3190{
3191 struct bitmap *result = bitmap_git->result;
3192 off_t total = 0;
3193 struct ewah_or_iterator it;
3194 eword_t filter;
3195 size_t i;
3196
3197 init_type_iterator(&it, bitmap_git, object_type);
3198 for (i = 0; i < result->word_alloc &&
3199 ewah_or_iterator_next(&filter, &it); i++) {
3200 eword_t word = result->words[i] & filter;
3201 size_t base = (i * BITS_IN_EWORD);
3202 unsigned offset;
3203
3204 if (!word)
3205 continue;
3206
3207 for (offset = 0; offset < BITS_IN_EWORD; offset++) {
3208 if ((word >> offset) == 0)
3209 break;
3210
3211 offset += ewah_bit_ctz64(word >> offset);
3212
3213 if (bitmap_is_midx(bitmap_git)) {
3214 uint32_t pack_pos;
3215 uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset);
3216 off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos);
3217
3218 uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos);
3219 struct packed_git *pack = nth_midxed_pack(bitmap_git->midx, pack_id);
3220
3221 if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) {
3222 struct object_id oid;
3223 nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos);
3224
3225 die(_("could not find '%s' in pack '%s' at offset %"PRIuMAX),
3226 oid_to_hex(&oid),
3227 pack->pack_name,
3228 (uintmax_t)offset);
3229 }
3230
3231 total += pack_pos_to_offset(pack, pack_pos + 1) - offset;
3232 } else {
3233 size_t pos = base + offset;
3234 total += pack_pos_to_offset(bitmap_git->pack, pos + 1) -
3235 pack_pos_to_offset(bitmap_git->pack, pos);
3236 }
3237 }
3238 }
3239
3240 ewah_or_iterator_release(&it);
3241
3242 return total;
3243}
3244
3245static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
3246{
3247 struct bitmap *result = bitmap_git->result;
3248 struct eindex *eindex = &bitmap_git->ext_index;
3249 off_t total = 0;
3250 struct object_info oi = OBJECT_INFO_INIT;
3251 off_t object_size;
3252 size_t i;
3253
3254 oi.disk_sizep = &object_size;
3255
3256 for (i = 0; i < eindex->count; i++) {
3257 struct object *obj = eindex->objects[i];
3258
3259 if (!bitmap_get(result,
3260 st_add(bitmap_num_objects_total(bitmap_git),
3261 i)))
3262 continue;
3263
3264 if (odb_read_object_info_extended(bitmap_repo(bitmap_git)->objects,
3265 &obj->oid, &oi, 0) < 0)
3266 die(_("unable to get disk usage of '%s'"),
3267 oid_to_hex(&obj->oid));
3268
3269 total += object_size;
3270 }
3271 return total;
3272}
3273
3274off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
3275 struct rev_info *revs)
3276{
3277 off_t total = 0;
3278
3279 total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT);
3280 if (revs->tree_objects)
3281 total += get_disk_usage_for_type(bitmap_git, OBJ_TREE);
3282 if (revs->blob_objects)
3283 total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB);
3284 if (revs->tag_objects)
3285 total += get_disk_usage_for_type(bitmap_git, OBJ_TAG);
3286
3287 total += get_disk_usage_for_extended(bitmap_git);
3288
3289 return total;
3290}
3291
3292int bitmap_is_midx(struct bitmap_index *bitmap_git)
3293{
3294 return !!bitmap_git->midx;
3295}
3296
3297const struct string_list *bitmap_preferred_tips(struct repository *r)
3298{
3299 const struct string_list *dest;
3300
3301 if (!repo_config_get_string_multi(r, "pack.preferbitmaptips", &dest))
3302 return dest;
3303 return NULL;
3304}
3305
3306int bitmap_is_preferred_refname(struct repository *r, const char *refname)
3307{
3308 const struct string_list *preferred_tips = bitmap_preferred_tips(r);
3309 struct string_list_item *item;
3310
3311 if (!preferred_tips)
3312 return 0;
3313
3314 for_each_string_list_item(item, preferred_tips) {
3315 if (starts_with(refname, item->string))
3316 return 1;
3317 }
3318
3319 return 0;
3320}
3321
3322static int verify_bitmap_file(const struct git_hash_algo *algop,
3323 const char *name)
3324{
3325 struct stat st;
3326 unsigned char *data;
3327 int fd = git_open(name);
3328 int res = 0;
3329
3330 /* It is OK to not have the file. */
3331 if (fd < 0 || fstat(fd, &st)) {
3332 if (fd >= 0)
3333 close(fd);
3334 return 0;
3335 }
3336
3337 data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
3338 close(fd);
3339 if (!hashfile_checksum_valid(algop, data, st.st_size))
3340 res = error(_("bitmap file '%s' has invalid checksum"),
3341 name);
3342
3343 munmap(data, st.st_size);
3344 return res;
3345}
3346
3347int verify_bitmap_files(struct repository *r)
3348{
3349 struct odb_source *source;
3350 int res = 0;
3351
3352 odb_prepare_alternates(r->objects);
3353 for (source = r->objects->sources; source; source = source->next) {
3354 struct multi_pack_index *m = get_multi_pack_index(source);
3355 char *midx_bitmap_name;
3356
3357 if (!m)
3358 continue;
3359
3360 midx_bitmap_name = midx_bitmap_filename(m);
3361 res |= verify_bitmap_file(r->hash_algo, midx_bitmap_name);
3362 free(midx_bitmap_name);
3363 }
3364
3365 for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles);
3366 p; p = p->next) {
3367 char *pack_bitmap_name = pack_bitmap_filename(p);
3368 res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name);
3369 free(pack_bitmap_name);
3370 }
3371
3372 return res;
3373}