Git fork

midx: implement writing incremental MIDX bitmaps

Now that the pack-bitmap machinery has learned how to read and interact
with an incremental MIDX bitmap, teach the pack-bitmap-write.c machinery
(and relevant callers from within the MIDX machinery) to write such
bitmaps.

The details for doing so are mostly straightforward. The main changes
are as follows:

- find_object_pos() now makes use of an extra MIDX parameter which is
used to locate the bit positions of objects which are from previous
layers (and thus do not exist in the current layer's pack_order
field).

(Note also that the pack_order field is moved into struct
write_midx_context to further simplify the callers for
write_midx_bitmap()).

- bitmap_writer_build_type_index() first determines how many objects
precede the current bitmap layer and offsets the bits it sets in
each respective type-level bitmap by that amount so they can be OR'd
together.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Taylor Blau and committed by
Junio C Hamano
27afc272 5999b44f

+179 -37
+2 -1
builtin/pack-objects.c
··· 1397 1397 1398 1398 if (write_bitmap_index) { 1399 1399 bitmap_writer_init(&bitmap_writer, 1400 - the_repository, &to_pack); 1400 + the_repository, &to_pack, 1401 + NULL); 1401 1402 bitmap_writer_set_checksum(&bitmap_writer, hash); 1402 1403 bitmap_writer_build_type_index(&bitmap_writer, 1403 1404 written_list);
+38 -19
midx-write.c
··· 647 647 return pack_order; 648 648 } 649 649 650 - static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, 651 - struct write_midx_context *ctx) 650 + static void write_midx_reverse_index(struct write_midx_context *ctx, 651 + const char *object_dir, 652 + unsigned char *midx_hash) 652 653 { 653 654 struct strbuf buf = STRBUF_INIT; 654 655 char *tmp_file; 655 656 656 657 trace2_region_enter("midx", "write_midx_reverse_index", ctx->repo); 657 658 658 - strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex_algop(midx_hash, 659 - ctx->repo->hash_algo)); 659 + if (ctx->incremental) 660 + get_split_midx_filename_ext(ctx->repo->hash_algo, &buf, 661 + object_dir, midx_hash, 662 + MIDX_EXT_REV); 663 + else 664 + get_midx_filename_ext(ctx->repo->hash_algo, &buf, object_dir, 665 + midx_hash, MIDX_EXT_REV); 660 666 661 667 tmp_file = write_rev_file_order(ctx->repo->hash_algo, NULL, ctx->pack_order, 662 668 ctx->entries_nr, midx_hash, WRITE_REV); ··· 829 835 return cb.commits; 830 836 } 831 837 832 - static int write_midx_bitmap(struct repository *r, const char *midx_name, 838 + static int write_midx_bitmap(struct write_midx_context *ctx, 839 + const char *object_dir, 833 840 const unsigned char *midx_hash, 834 841 struct packing_data *pdata, 835 842 struct commit **commits, 836 843 uint32_t commits_nr, 837 - uint32_t *pack_order, 838 844 unsigned flags) 839 845 { 840 846 int ret, i; 841 847 uint16_t options = 0; 842 848 struct bitmap_writer writer; 843 849 struct pack_idx_entry **index; 844 - char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, 845 - hash_to_hex_algop(midx_hash, r->hash_algo)); 850 + struct strbuf bitmap_name = STRBUF_INIT; 851 + 852 + trace2_region_enter("midx", "write_midx_bitmap", ctx->repo); 846 853 847 - trace2_region_enter("midx", "write_midx_bitmap", r); 854 + if (ctx->incremental) 855 + get_split_midx_filename_ext(ctx->repo->hash_algo, &bitmap_name, 856 + object_dir, midx_hash, 857 + MIDX_EXT_BITMAP); 858 + else 859 + get_midx_filename_ext(ctx->repo->hash_algo, &bitmap_name, 860 + object_dir, midx_hash, MIDX_EXT_BITMAP); 848 861 849 862 if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) 850 863 options |= BITMAP_OPT_HASH_CACHE; ··· 861 874 for (i = 0; i < pdata->nr_objects; i++) 862 875 index[i] = &pdata->objects[i].idx; 863 876 864 - bitmap_writer_init(&writer, r, pdata); 877 + bitmap_writer_init(&writer, ctx->repo, pdata, 878 + ctx->incremental ? ctx->base_midx : NULL); 865 879 bitmap_writer_show_progress(&writer, flags & MIDX_PROGRESS); 866 880 bitmap_writer_build_type_index(&writer, index); 867 881 ··· 879 893 * bitmap_writer_finish(). 880 894 */ 881 895 for (i = 0; i < pdata->nr_objects; i++) 882 - index[pack_order[i]] = &pdata->objects[i].idx; 896 + index[ctx->pack_order[i]] = &pdata->objects[i].idx; 883 897 884 898 bitmap_writer_select_commits(&writer, commits, commits_nr); 885 899 ret = bitmap_writer_build(&writer); ··· 887 901 goto cleanup; 888 902 889 903 bitmap_writer_set_checksum(&writer, midx_hash); 890 - bitmap_writer_finish(&writer, index, bitmap_name, options); 904 + bitmap_writer_finish(&writer, index, bitmap_name.buf, options); 891 905 892 906 cleanup: 893 907 free(index); 894 - free(bitmap_name); 908 + strbuf_release(&bitmap_name); 895 909 bitmap_writer_free(&writer); 896 910 897 - trace2_region_leave("midx", "write_midx_bitmap", r); 911 + trace2_region_leave("midx", "write_midx_bitmap", ctx->repo); 898 912 899 913 return ret; 900 914 } ··· 1077 1091 ctx.repo = r; 1078 1092 1079 1093 ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); 1080 - if (ctx.incremental && (flags & MIDX_WRITE_BITMAP)) 1081 - die(_("cannot write incremental MIDX with bitmap")); 1082 1094 1083 1095 if (ctx.incremental) 1084 1096 strbuf_addf(&midx_name, ··· 1119 1131 if (ctx.incremental) { 1120 1132 struct multi_pack_index *m = ctx.base_midx; 1121 1133 while (m) { 1134 + if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { 1135 + error(_("could not load reverse index for MIDX %s"), 1136 + hash_to_hex_algop(get_midx_checksum(m), 1137 + m->repo->hash_algo)); 1138 + result = 1; 1139 + goto cleanup; 1140 + } 1122 1141 ctx.num_multi_pack_indexes_before++; 1123 1142 m = m->base_midx; 1124 1143 } ··· 1387 1406 1388 1407 if (flags & MIDX_WRITE_REV_INDEX && 1389 1408 git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) 1390 - write_midx_reverse_index(midx_name.buf, midx_hash, &ctx); 1409 + write_midx_reverse_index(&ctx, object_dir, midx_hash); 1391 1410 1392 1411 if (flags & MIDX_WRITE_BITMAP) { 1393 1412 struct packing_data pdata; ··· 1410 1429 FREE_AND_NULL(ctx.entries); 1411 1430 ctx.entries_nr = 0; 1412 1431 1413 - if (write_midx_bitmap(r, midx_name.buf, midx_hash, &pdata, 1414 - commits, commits_nr, ctx.pack_order, 1432 + if (write_midx_bitmap(&ctx, object_dir, 1433 + midx_hash, &pdata, commits, commits_nr, 1415 1434 flags) < 0) { 1416 1435 error(_("could not write multi-pack bitmap")); 1417 1436 result = 1;
+49 -16
pack-bitmap-write.c
··· 26 26 #include "alloc.h" 27 27 #include "refs.h" 28 28 #include "strmap.h" 29 + #include "midx.h" 30 + #include "pack-revindex.h" 29 31 30 32 struct bitmapped_commit { 31 33 struct commit *commit; ··· 43 45 } 44 46 45 47 void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r, 46 - struct packing_data *pdata) 48 + struct packing_data *pdata, 49 + struct multi_pack_index *midx) 47 50 { 48 51 memset(writer, 0, sizeof(struct bitmap_writer)); 49 52 if (writer->bitmaps) ··· 51 54 writer->bitmaps = kh_init_oid_map(); 52 55 writer->pseudo_merge_commits = kh_init_oid_map(); 53 56 writer->to_pack = pdata; 57 + writer->midx = midx; 54 58 55 59 string_list_init_dup(&writer->pseudo_merge_groups); 56 60 ··· 113 117 struct pack_idx_entry **index) 114 118 { 115 119 uint32_t i; 120 + uint32_t base_objects = 0; 121 + 122 + if (writer->midx) 123 + base_objects = writer->midx->num_objects + 124 + writer->midx->num_objects_in_base; 116 125 117 126 writer->commits = ewah_new(); 118 127 writer->trees = ewah_new(); ··· 142 151 143 152 switch (real_type) { 144 153 case OBJ_COMMIT: 145 - ewah_set(writer->commits, i); 154 + ewah_set(writer->commits, i + base_objects); 146 155 break; 147 156 148 157 case OBJ_TREE: 149 - ewah_set(writer->trees, i); 158 + ewah_set(writer->trees, i + base_objects); 150 159 break; 151 160 152 161 case OBJ_BLOB: 153 - ewah_set(writer->blobs, i); 162 + ewah_set(writer->blobs, i + base_objects); 154 163 break; 155 164 156 165 case OBJ_TAG: 157 - ewah_set(writer->tags, i); 166 + ewah_set(writer->tags, i + base_objects); 158 167 break; 159 168 160 169 default: ··· 207 216 static uint32_t find_object_pos(struct bitmap_writer *writer, 208 217 const struct object_id *oid, int *found) 209 218 { 210 - struct object_entry *entry = packlist_find(writer->to_pack, oid); 219 + struct object_entry *entry; 220 + 221 + entry = packlist_find(writer->to_pack, oid); 222 + if (entry) { 223 + uint32_t base_objects = 0; 224 + if (writer->midx) 225 + base_objects = writer->midx->num_objects + 226 + writer->midx->num_objects_in_base; 227 + 228 + if (found) 229 + *found = 1; 230 + return oe_in_pack_pos(writer->to_pack, entry) + base_objects; 231 + } else if (writer->midx) { 232 + uint32_t at, pos; 211 233 212 - if (!entry) { 234 + if (!bsearch_midx(oid, writer->midx, &at)) 235 + goto missing; 236 + if (midx_to_pack_pos(writer->midx, at, &pos) < 0) 237 + goto missing; 238 + 213 239 if (found) 214 - *found = 0; 215 - warning("Failed to write bitmap index. Packfile doesn't have full closure " 216 - "(object %s is missing)", oid_to_hex(oid)); 217 - return 0; 240 + *found = 1; 241 + return pos; 218 242 } 219 243 244 + missing: 220 245 if (found) 221 - *found = 1; 222 - return oe_in_pack_pos(writer->to_pack, entry); 246 + *found = 0; 247 + warning("Failed to write bitmap index. Packfile doesn't have full closure " 248 + "(object %s is missing)", oid_to_hex(oid)); 249 + return 0; 223 250 } 224 251 225 252 static void compute_xor_offsets(struct bitmap_writer *writer) ··· 586 613 struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; 587 614 struct prio_queue tree_queue = { NULL }; 588 615 struct bitmap_index *old_bitmap; 589 - uint32_t *mapping; 616 + uint32_t *mapping = NULL; 590 617 int closed = 1; /* until proven otherwise */ 591 618 592 619 if (writer->show_progress) ··· 1021 1048 struct strbuf tmp_file = STRBUF_INIT; 1022 1049 struct hashfile *f; 1023 1050 off_t *offsets = NULL; 1024 - uint32_t i; 1051 + uint32_t i, base_objects; 1025 1052 1026 1053 struct bitmap_disk_header header; 1027 1054 ··· 1047 1074 if (options & BITMAP_OPT_LOOKUP_TABLE) 1048 1075 CALLOC_ARRAY(offsets, writer->to_pack->nr_objects); 1049 1076 1077 + if (writer->midx) 1078 + base_objects = writer->midx->num_objects + 1079 + writer->midx->num_objects_in_base; 1080 + else 1081 + base_objects = 0; 1082 + 1050 1083 for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { 1051 1084 struct bitmapped_commit *stored = &writer->selected[i]; 1052 1085 int commit_pos = oid_pos(&stored->commit->object.oid, index, ··· 1055 1088 1056 1089 if (commit_pos < 0) 1057 1090 BUG(_("trying to write commit not in index")); 1058 - stored->commit_pos = commit_pos; 1091 + stored->commit_pos = commit_pos + base_objects; 1059 1092 } 1060 1093 1061 1094 write_selected_commits_v1(writer, f, offsets);
+3 -1
pack-bitmap.h
··· 111 111 112 112 kh_oid_map_t *bitmaps; 113 113 struct packing_data *to_pack; 114 + struct multi_pack_index *midx; /* if appending to a MIDX chain */ 114 115 115 116 struct bitmapped_commit *selected; 116 117 unsigned int selected_nr, selected_alloc; ··· 125 126 }; 126 127 127 128 void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r, 128 - struct packing_data *pdata); 129 + struct packing_data *pdata, 130 + struct multi_pack_index *midx); 129 131 void bitmap_writer_show_progress(struct bitmap_writer *writer, int show); 130 132 void bitmap_writer_set_checksum(struct bitmap_writer *writer, 131 133 const unsigned char *sha1);
+87
t/t5334-incremental-multi-pack-index.sh
··· 44 44 45 45 compare_results_with_midx 'non-incremental MIDX conversion' 46 46 47 + write_midx_layer () { 48 + n=1 49 + if test -f $midx_chain 50 + then 51 + n="$(($(wc -l <$midx_chain) + 1))" 52 + fi 53 + 54 + for i in 1 2 55 + do 56 + test_commit $n.$i && 57 + git repack -d || return 1 58 + done && 59 + git multi-pack-index write --bitmap --incremental 60 + } 61 + 62 + test_expect_success 'write initial MIDX layer' ' 63 + git repack -ad && 64 + write_midx_layer 65 + ' 66 + 67 + test_expect_success 'read bitmap from first MIDX layer' ' 68 + git rev-list --test-bitmap 1.2 69 + ' 70 + 71 + test_expect_success 'write another MIDX layer' ' 72 + write_midx_layer 73 + ' 74 + 75 + test_expect_success 'midx verify with multiple layers' ' 76 + test_path_is_file "$midx_chain" && 77 + test_line_count = 2 "$midx_chain" && 78 + 79 + git multi-pack-index verify 80 + ' 81 + 82 + test_expect_success 'read bitmap from second MIDX layer' ' 83 + git rev-list --test-bitmap 2.2 84 + ' 85 + 86 + test_expect_success 'read earlier bitmap from second MIDX layer' ' 87 + git rev-list --test-bitmap 1.2 88 + ' 89 + 90 + test_expect_success 'show object from first pack' ' 91 + git cat-file -p 1.1 92 + ' 93 + 94 + test_expect_success 'show object from second pack' ' 95 + git cat-file -p 2.2 96 + ' 97 + 98 + for reuse in false single multi 99 + do 100 + test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' 101 + rm -fr clone.git && 102 + 103 + git config pack.allowPackReuse $reuse && 104 + git clone --no-local --bare . clone.git 105 + ' 106 + done 107 + 108 + test_expect_success 'relink existing MIDX layer' ' 109 + rm -fr "$midxdir" && 110 + 111 + GIT_TEST_MIDX_WRITE_REV=1 git multi-pack-index write --bitmap && 112 + 113 + midx_hash="$(test-tool read-midx --checksum $objdir)" && 114 + 115 + test_path_is_file "$packdir/multi-pack-index" && 116 + test_path_is_file "$packdir/multi-pack-index-$midx_hash.bitmap" && 117 + test_path_is_file "$packdir/multi-pack-index-$midx_hash.rev" && 118 + 119 + test_commit another && 120 + git repack -d && 121 + git multi-pack-index write --bitmap --incremental && 122 + 123 + test_path_is_missing "$packdir/multi-pack-index" && 124 + test_path_is_missing "$packdir/multi-pack-index-$midx_hash.bitmap" && 125 + test_path_is_missing "$packdir/multi-pack-index-$midx_hash.rev" && 126 + 127 + test_path_is_file "$midxdir/multi-pack-index-$midx_hash.midx" && 128 + test_path_is_file "$midxdir/multi-pack-index-$midx_hash.bitmap" && 129 + test_path_is_file "$midxdir/multi-pack-index-$midx_hash.rev" && 130 + test_line_count = 2 "$midx_chain" 131 + 132 + ' 133 + 47 134 test_done