Git fork

Merge branch 'jh/midx-verify-too-many-packs'

"git multi-pack-index verify" did not scale well with the number of
packfiles, which is being improved.

* jh/midx-verify-too-many-packs:
midx: during verify group objects by packfile to speed verification
midx: add progress indicators in multi-pack-index verify
trace2:data: add trace2 data to midx
progress: add sparse mode to force 100% complete message

+118 -9
+3
builtin/multi-pack-index.c
··· 3 3 #include "config.h" 4 4 #include "parse-options.h" 5 5 #include "midx.h" 6 + #include "trace2.h" 6 7 7 8 static char const * const builtin_multi_pack_index_usage[] = { 8 9 N_("git multi-pack-index [--object-dir=<dir>] (write|verify)"), ··· 39 40 die(_("too many arguments")); 40 41 return 1; 41 42 } 43 + 44 + trace2_cmd_mode(argv[0]); 42 45 43 46 if (!strcmp(argv[0], "write")) 44 47 return write_midx_file(opts.object_dir);
+74 -5
midx.c
··· 8 8 #include "sha1-lookup.h" 9 9 #include "midx.h" 10 10 #include "progress.h" 11 + #include "trace2.h" 11 12 12 13 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ 13 14 #define MIDX_VERSION 1 ··· 163 164 m->pack_names[i - 1], 164 165 m->pack_names[i]); 165 166 } 167 + 168 + trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); 169 + trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); 166 170 167 171 return m; 168 172 ··· 958 962 va_end(ap); 959 963 } 960 964 965 + struct pair_pos_vs_id 966 + { 967 + uint32_t pos; 968 + uint32_t pack_int_id; 969 + }; 970 + 971 + static int compare_pair_pos_vs_id(const void *_a, const void *_b) 972 + { 973 + struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; 974 + struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; 975 + 976 + return b->pack_int_id - a->pack_int_id; 977 + } 978 + 979 + /* 980 + * Limit calls to display_progress() for performance reasons. 981 + * The interval here was arbitrarily chosen. 982 + */ 983 + #define SPARSE_PROGRESS_INTERVAL (1 << 12) 984 + #define midx_display_sparse_progress(progress, n) \ 985 + do { \ 986 + uint64_t _n = (n); \ 987 + if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ 988 + display_progress(progress, _n); \ 989 + } while (0) 990 + 961 991 int verify_midx_file(const char *object_dir) 962 992 { 993 + struct pair_pos_vs_id *pairs = NULL; 963 994 uint32_t i; 964 995 struct progress *progress; 965 996 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); ··· 968 999 if (!m) 969 1000 return 0; 970 1001 1002 + progress = start_progress(_("Looking for referenced packfiles"), 1003 + m->num_packs); 971 1004 for (i = 0; i < m->num_packs; i++) { 972 1005 if (prepare_midx_pack(m, i)) 973 1006 midx_report("failed to load pack in position %d", i); 1007 + 1008 + display_progress(progress, i + 1); 974 1009 } 1010 + stop_progress(&progress); 975 1011 976 1012 for (i = 0; i < 255; i++) { 977 1013 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); ··· 982 1018 i, oid_fanout1, oid_fanout2, i + 1); 983 1019 } 984 1020 1021 + progress = start_sparse_progress(_("Verifying OID order in MIDX"), 1022 + m->num_objects - 1); 985 1023 for (i = 0; i < m->num_objects - 1; i++) { 986 1024 struct object_id oid1, oid2; 987 1025 ··· 991 1029 if (oidcmp(&oid1, &oid2) >= 0) 992 1030 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), 993 1031 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); 1032 + 1033 + midx_display_sparse_progress(progress, i + 1); 994 1034 } 1035 + stop_progress(&progress); 995 1036 996 - progress = start_progress(_("Verifying object offsets"), m->num_objects); 1037 + /* 1038 + * Create an array mapping each object to its packfile id. Sort it 1039 + * to group the objects by packfile. Use this permutation to visit 1040 + * each of the objects and only require 1 packfile to be open at a 1041 + * time. 1042 + */ 1043 + ALLOC_ARRAY(pairs, m->num_objects); 1044 + for (i = 0; i < m->num_objects; i++) { 1045 + pairs[i].pos = i; 1046 + pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); 1047 + } 1048 + 1049 + progress = start_sparse_progress(_("Sorting objects by packfile"), 1050 + m->num_objects); 1051 + display_progress(progress, 0); /* TODO: Measure QSORT() progress */ 1052 + QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); 1053 + stop_progress(&progress); 1054 + 1055 + progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); 997 1056 for (i = 0; i < m->num_objects; i++) { 998 1057 struct object_id oid; 999 1058 struct pack_entry e; 1000 1059 off_t m_offset, p_offset; 1001 1060 1002 - nth_midxed_object_oid(&oid, m, i); 1061 + if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && 1062 + m->packs[pairs[i-1].pack_int_id]) 1063 + { 1064 + close_pack_fd(m->packs[pairs[i-1].pack_int_id]); 1065 + close_pack_index(m->packs[pairs[i-1].pack_int_id]); 1066 + } 1067 + 1068 + nth_midxed_object_oid(&oid, m, pairs[i].pos); 1069 + 1003 1070 if (!fill_midx_entry(&oid, &e, m)) { 1004 1071 midx_report(_("failed to load pack entry for oid[%d] = %s"), 1005 - i, oid_to_hex(&oid)); 1072 + pairs[i].pos, oid_to_hex(&oid)); 1006 1073 continue; 1007 1074 } 1008 1075 ··· 1017 1084 1018 1085 if (m_offset != p_offset) 1019 1086 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), 1020 - i, oid_to_hex(&oid), m_offset, p_offset); 1087 + pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); 1021 1088 1022 - display_progress(progress, i + 1); 1089 + midx_display_sparse_progress(progress, i + 1); 1023 1090 } 1024 1091 stop_progress(&progress); 1092 + 1093 + free(pairs); 1025 1094 1026 1095 return verify_midx_error; 1027 1096 }
+1 -1
packfile.c
··· 309 309 } 310 310 } 311 311 312 - static int close_pack_fd(struct packed_git *p) 312 + int close_pack_fd(struct packed_git *p) 313 313 { 314 314 if (p->pack_fd < 0) 315 315 return 0;
+2
packfile.h
··· 76 76 */ 77 77 extern void close_pack_index(struct packed_git *); 78 78 79 + int close_pack_fd(struct packed_git *p); 80 + 79 81 extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); 80 82 81 83 extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
+35 -3
progress.c
··· 34 34 uint64_t total; 35 35 unsigned last_percent; 36 36 unsigned delay; 37 + unsigned sparse; 37 38 struct throughput *throughput; 38 39 uint64_t start_ns; 39 40 }; ··· 194 195 } 195 196 196 197 static struct progress *start_progress_delay(const char *title, uint64_t total, 197 - unsigned delay) 198 + unsigned delay, unsigned sparse) 198 199 { 199 200 struct progress *progress = malloc(sizeof(*progress)); 200 201 if (!progress) { ··· 208 209 progress->last_value = -1; 209 210 progress->last_percent = -1; 210 211 progress->delay = delay; 212 + progress->sparse = sparse; 211 213 progress->throughput = NULL; 212 214 progress->start_ns = getnanotime(); 213 215 set_progress_signal(); ··· 216 218 217 219 struct progress *start_delayed_progress(const char *title, uint64_t total) 218 220 { 219 - return start_progress_delay(title, total, 2); 221 + return start_progress_delay(title, total, 2, 0); 220 222 } 221 223 222 224 struct progress *start_progress(const char *title, uint64_t total) 223 225 { 224 - return start_progress_delay(title, total, 0); 226 + return start_progress_delay(title, total, 0, 0); 227 + } 228 + 229 + /* 230 + * Here "sparse" means that the caller might use some sampling criteria to 231 + * decide when to call display_progress() rather than calling it for every 232 + * integer value in[0 .. total). In particular, the caller might not call 233 + * display_progress() for the last value in the range. 234 + * 235 + * When "sparse" is set, stop_progress() will automatically force the done 236 + * message to show 100%. 237 + */ 238 + struct progress *start_sparse_progress(const char *title, uint64_t total) 239 + { 240 + return start_progress_delay(title, total, 0, 1); 241 + } 242 + 243 + struct progress *start_delayed_sparse_progress(const char *title, 244 + uint64_t total) 245 + { 246 + return start_progress_delay(title, total, 2, 1); 247 + } 248 + 249 + static void finish_if_sparse(struct progress *progress) 250 + { 251 + if (progress && 252 + progress->sparse && 253 + progress->last_value != progress->total) 254 + display_progress(progress, progress->total); 225 255 } 226 256 227 257 void stop_progress(struct progress **p_progress) 228 258 { 259 + finish_if_sparse(*p_progress); 260 + 229 261 stop_progress_msg(p_progress, _("done")); 230 262 } 231 263
+3
progress.h
··· 6 6 void display_throughput(struct progress *progress, uint64_t total); 7 7 int display_progress(struct progress *progress, uint64_t n); 8 8 struct progress *start_progress(const char *title, uint64_t total); 9 + struct progress *start_sparse_progress(const char *title, uint64_t total); 9 10 struct progress *start_delayed_progress(const char *title, uint64_t total); 11 + struct progress *start_delayed_sparse_progress(const char *title, 12 + uint64_t total); 10 13 void stop_progress(struct progress **progress); 11 14 void stop_progress_msg(struct progress **progress, const char *msg); 12 15