Git fork

Merge branch 'ds/fix-thin-fix'

"git index-pack --fix-thin" used to abort to prevent a cycle in
delta chains from forming in a corner case even when there is no
such cycle.

* ds/fix-thin-fix:
index-pack: allow revisiting REF_DELTA chains
t5309: create failing test for 'git index-pack'
test-tool: add pack-deltas helper

+216 -28
+1
Makefile
··· 819 819 TEST_BUILTINS_OBJS += test-mktemp.o 820 820 TEST_BUILTINS_OBJS += test-name-hash.o 821 821 TEST_BUILTINS_OBJS += test-online-cpus.o 822 + TEST_BUILTINS_OBJS += test-pack-deltas.o 822 823 TEST_BUILTINS_OBJS += test-pack-mtimes.o 823 824 TEST_BUILTINS_OBJS += test-parse-options.o 824 825 TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
+32 -26
builtin/index-pack.c
··· 1108 1108 set_thread_data(data); 1109 1109 for (;;) { 1110 1110 struct base_data *parent = NULL; 1111 - struct object_entry *child_obj; 1112 - struct base_data *child; 1111 + struct object_entry *child_obj = NULL; 1112 + struct base_data *child = NULL; 1113 1113 1114 1114 counter_lock(); 1115 1115 display_progress(progress, nr_resolved_deltas); ··· 1136 1136 parent = list_first_entry(&work_head, struct base_data, 1137 1137 list); 1138 1138 1139 - if (parent->ref_first <= parent->ref_last) { 1139 + while (parent->ref_first <= parent->ref_last) { 1140 1140 int offset = ref_deltas[parent->ref_first++].obj_no; 1141 1141 child_obj = objects + offset; 1142 - if (child_obj->real_type != OBJ_REF_DELTA) 1143 - die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", 1144 - (uintmax_t) child_obj->idx.offset, 1145 - oid_to_hex(&parent->obj->idx.oid)); 1142 + if (child_obj->real_type != OBJ_REF_DELTA) { 1143 + child_obj = NULL; 1144 + continue; 1145 + } 1146 1146 child_obj->real_type = parent->obj->real_type; 1147 - } else { 1147 + break; 1148 + } 1149 + 1150 + if (!child_obj && parent->ofs_first <= parent->ofs_last) { 1148 1151 child_obj = objects + 1149 1152 ofs_deltas[parent->ofs_first++].obj_no; 1150 1153 assert(child_obj->real_type == OBJ_OFS_DELTA); ··· 1177 1180 } 1178 1181 work_unlock(); 1179 1182 1180 - if (parent) { 1181 - child = resolve_delta(child_obj, parent); 1182 - if (!child->children_remaining) 1183 - FREE_AND_NULL(child->data); 1184 - } else { 1185 - child = make_base(child_obj, NULL); 1186 - if (child->children_remaining) { 1187 - /* 1188 - * Since this child has its own delta children, 1189 - * we will need this data in the future. 1190 - * Inflate now so that future iterations will 1191 - * have access to this object's data while 1192 - * outside the work mutex. 1193 - */ 1194 - child->data = get_data_from_pack(child_obj); 1195 - child->size = child_obj->size; 1183 + if (child_obj) { 1184 + if (parent) { 1185 + child = resolve_delta(child_obj, parent); 1186 + if (!child->children_remaining) 1187 + FREE_AND_NULL(child->data); 1188 + } else{ 1189 + child = make_base(child_obj, NULL); 1190 + if (child->children_remaining) { 1191 + /* 1192 + * Since this child has its own delta children, 1193 + * we will need this data in the future. 1194 + * Inflate now so that future iterations will 1195 + * have access to this object's data while 1196 + * outside the work mutex. 1197 + */ 1198 + child->data = get_data_from_pack(child_obj); 1199 + child->size = child_obj->size; 1200 + } 1196 1201 } 1197 1202 } 1198 1203 1199 1204 work_lock(); 1200 1205 if (parent) 1201 1206 parent->retain_data--; 1202 - if (child->data) { 1207 + 1208 + if (child && child->data) { 1203 1209 /* 1204 1210 * This child has its own children, so add it to 1205 1211 * work_head. ··· 1208 1214 base_cache_used += child->size; 1209 1215 prune_base_data(NULL); 1210 1216 free_base_data(child); 1211 - } else { 1217 + } else if (child) { 1212 1218 /* 1213 1219 * This child does not have its own children. It may be 1214 1220 * the last descendant of its ancestors; free those
+1
t/helper/meson.build
··· 36 36 'test-mktemp.c', 37 37 'test-name-hash.c', 38 38 'test-online-cpus.c', 39 + 'test-pack-deltas.c', 39 40 'test-pack-mtimes.c', 40 41 'test-parse-options.c', 41 42 'test-parse-pathspec-file.c',
+148
t/helper/test-pack-deltas.c
··· 1 + #define USE_THE_REPOSITORY_VARIABLE 2 + 3 + #include "test-tool.h" 4 + #include "git-compat-util.h" 5 + #include "delta.h" 6 + #include "git-zlib.h" 7 + #include "hash.h" 8 + #include "hex.h" 9 + #include "pack.h" 10 + #include "pack-objects.h" 11 + #include "parse-options.h" 12 + #include "setup.h" 13 + #include "strbuf.h" 14 + #include "string-list.h" 15 + 16 + static const char *usage_str[] = { 17 + "test-tool pack-deltas --num-objects <num-objects>", 18 + NULL 19 + }; 20 + 21 + static unsigned long do_compress(void **pptr, unsigned long size) 22 + { 23 + git_zstream stream; 24 + void *in, *out; 25 + unsigned long maxsize; 26 + 27 + git_deflate_init(&stream, 1); 28 + maxsize = git_deflate_bound(&stream, size); 29 + 30 + in = *pptr; 31 + out = xmalloc(maxsize); 32 + *pptr = out; 33 + 34 + stream.next_in = in; 35 + stream.avail_in = size; 36 + stream.next_out = out; 37 + stream.avail_out = maxsize; 38 + while (git_deflate(&stream, Z_FINISH) == Z_OK) 39 + ; /* nothing */ 40 + git_deflate_end(&stream); 41 + 42 + free(in); 43 + return stream.total_out; 44 + } 45 + 46 + static void write_ref_delta(struct hashfile *f, 47 + struct object_id *oid, 48 + struct object_id *base) 49 + { 50 + unsigned char header[MAX_PACK_OBJECT_HEADER]; 51 + unsigned long size, base_size, delta_size, compressed_size, hdrlen; 52 + enum object_type type; 53 + void *base_buf, *delta_buf; 54 + void *buf = repo_read_object_file(the_repository, 55 + oid, &type, 56 + &size); 57 + 58 + if (!buf) 59 + die("unable to read %s", oid_to_hex(oid)); 60 + 61 + base_buf = repo_read_object_file(the_repository, 62 + base, &type, 63 + &base_size); 64 + 65 + if (!base_buf) 66 + die("unable to read %s", oid_to_hex(base)); 67 + 68 + delta_buf = diff_delta(base_buf, base_size, 69 + buf, size, &delta_size, 0); 70 + 71 + compressed_size = do_compress(&delta_buf, delta_size); 72 + 73 + hdrlen = encode_in_pack_object_header(header, sizeof(header), 74 + OBJ_REF_DELTA, delta_size); 75 + hashwrite(f, header, hdrlen); 76 + hashwrite(f, base->hash, the_repository->hash_algo->rawsz); 77 + hashwrite(f, delta_buf, compressed_size); 78 + 79 + free(buf); 80 + free(base_buf); 81 + free(delta_buf); 82 + } 83 + 84 + int cmd__pack_deltas(int argc, const char **argv) 85 + { 86 + int num_objects = -1; 87 + struct hashfile *f; 88 + struct strbuf line = STRBUF_INIT; 89 + struct option options[] = { 90 + OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")), 91 + OPT_END() 92 + }; 93 + 94 + argc = parse_options(argc, argv, NULL, 95 + options, usage_str, 0); 96 + 97 + if (argc || num_objects < 0) 98 + usage_with_options(usage_str, options); 99 + 100 + setup_git_directory(); 101 + 102 + f = hashfd(the_repository->hash_algo, 1, "<stdout>"); 103 + write_pack_header(f, num_objects); 104 + 105 + /* Read each line from stdin into 'line' */ 106 + while (strbuf_getline_lf(&line, stdin) != EOF) { 107 + const char *type_str, *content_oid_str, *base_oid_str = NULL; 108 + struct object_id content_oid, base_oid; 109 + struct string_list items = STRING_LIST_INIT_NODUP; 110 + /* 111 + * Tokenize into two or three parts: 112 + * 1. REF_DELTA, OFS_DELTA, or FULL. 113 + * 2. The object ID for the content object. 114 + * 3. The object ID for the base object (optional). 115 + */ 116 + if (string_list_split_in_place(&items, line.buf, " ", 3) < 0) 117 + die("invalid input format: %s", line.buf); 118 + 119 + if (items.nr < 2) 120 + die("invalid input format: %s", line.buf); 121 + 122 + type_str = items.items[0].string; 123 + content_oid_str = items.items[1].string; 124 + 125 + if (get_oid_hex(content_oid_str, &content_oid)) 126 + die("invalid object: %s", content_oid_str); 127 + if (items.nr >= 3) { 128 + base_oid_str = items.items[2].string; 129 + if (get_oid_hex(base_oid_str, &base_oid)) 130 + die("invalid object: %s", base_oid_str); 131 + } 132 + string_list_clear(&items, 0); 133 + 134 + if (!strcmp(type_str, "REF_DELTA")) 135 + write_ref_delta(f, &content_oid, &base_oid); 136 + else if (!strcmp(type_str, "OFS_DELTA")) 137 + die("OFS_DELTA not implemented"); 138 + else if (!strcmp(type_str, "FULL")) 139 + die("FULL not implemented"); 140 + else 141 + die("unknown pack type: %s", type_str); 142 + } 143 + 144 + finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK, 145 + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); 146 + strbuf_release(&line); 147 + return 0; 148 + }
+1
t/helper/test-tool.c
··· 46 46 { "mktemp", cmd__mktemp }, 47 47 { "name-hash", cmd__name_hash }, 48 48 { "online-cpus", cmd__online_cpus }, 49 + { "pack-deltas", cmd__pack_deltas }, 49 50 { "pack-mtimes", cmd__pack_mtimes }, 50 51 { "parse-options", cmd__parse_options }, 51 52 { "parse-options-flags", cmd__parse_options_flags },
+1
t/helper/test-tool.h
··· 39 39 int cmd__mktemp(int argc, const char **argv); 40 40 int cmd__name_hash(int argc, const char **argv); 41 41 int cmd__online_cpus(int argc, const char **argv); 42 + int cmd__pack_deltas(int argc, const char **argv); 42 43 int cmd__pack_mtimes(int argc, const char **argv); 43 44 int cmd__parse_options(int argc, const char **argv); 44 45 int cmd__parse_options_flags(int argc, const char **argv);
+32 -2
t/t5309-pack-delta-cycles.sh
··· 60 60 test_expect_success 'failover to an object in another pack' ' 61 61 clear_packs && 62 62 git index-pack --stdin <ab.pack && 63 - test_must_fail git index-pack --stdin --fix-thin <cycle.pack 63 + 64 + # This cycle does not fail since the existence of A & B in 65 + # the repo allows us to resolve the cycle. 66 + git index-pack --stdin --fix-thin <cycle.pack 64 67 ' 65 68 66 69 test_expect_success 'failover to a duplicate object in the same pack' ' ··· 72 75 pack_obj $A 73 76 } >recoverable.pack && 74 77 pack_trailer recoverable.pack && 75 - test_must_fail git index-pack --fix-thin --stdin <recoverable.pack 78 + 79 + # This cycle does not fail since the existence of a full copy 80 + # of A in the pack allows us to resolve the cycle. 81 + git index-pack --fix-thin --stdin <recoverable.pack 82 + ' 83 + 84 + test_expect_success 'index-pack works with thin pack A->B->C with B on disk' ' 85 + git init server && 86 + ( 87 + cd server && 88 + test_commit_bulk 4 89 + ) && 90 + 91 + A=$(git -C server rev-parse HEAD^{tree}) && 92 + B=$(git -C server rev-parse HEAD~1^{tree}) && 93 + C=$(git -C server rev-parse HEAD~2^{tree}) && 94 + git -C server reset --hard HEAD~1 && 95 + 96 + test-tool -C server pack-deltas --num-objects=2 >thin.pack <<-EOF && 97 + REF_DELTA $A $B 98 + REF_DELTA $B $C 99 + EOF 100 + 101 + git clone "file://$(pwd)/server" client && 102 + ( 103 + cd client && 104 + git index-pack --fix-thin --stdin <../thin.pack 105 + ) 76 106 ' 77 107 78 108 test_done