Git fork

environment: move access to "core.bigFileThreshold" into repo settings

The "core.bigFileThreshold" setting is stored in a global variable and
populated via `git_default_core_config()`. This may cause issues in
the case where one is handling multiple different repositories in a
single process with different values for that config key, as we may or
may not see the correct value in that case. Furthermore, global state
blocks our path towards libification.

Refactor the code so that we instead store the value in `struct
repo_settings`, where the value is computed as-needed and cached.

Note that this change requires us to adapt one test in t1050 that
verifies that we die when parsing an invalid "core.bigFileThreshold"
value. The exercised Git command doesn't use the value at all, and thus
it won't hit the new code path that parses the value. This is addressed
by using git-hash-object(1) instead, which does read the value.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Patrick Steinhardt and committed by
Junio C Hamano
7835ee75 2582846f

+52 -22
+1 -1
archive.c
··· 216 /* Stream it? */ 217 if (S_ISREG(mode) && !args->convert && 218 oid_object_info(args->repo, oid, &size) == OBJ_BLOB && 219 - size > big_file_threshold) 220 return write_entry(args, oid, path.buf, path.len, mode, NULL, size); 221 222 buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
··· 216 /* Stream it? */ 217 if (S_ISREG(mode) && !args->convert && 218 oid_object_info(args->repo, oid, &size) == OBJ_BLOB && 219 + size > repo_settings_get_big_file_threshold(the_repository)) 220 return write_entry(args, oid, path.buf, path.len, mode, NULL, size); 221 222 buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+2 -2
builtin/fast-import.c
··· 2021 static struct strbuf buf = STRBUF_INIT; 2022 uintmax_t len; 2023 2024 - if (parse_data(&buf, big_file_threshold, &len)) 2025 store_object(OBJ_BLOB, &buf, last, oidout, mark); 2026 else { 2027 if (last) { ··· 3402 unsigned long v; 3403 if (!git_parse_ulong(option, &v)) 3404 return 0; 3405 - big_file_threshold = v; 3406 } else if (skip_prefix(option, "depth=", &option)) { 3407 option_depth(option); 3408 } else if (skip_prefix(option, "active-branches=", &option)) {
··· 2021 static struct strbuf buf = STRBUF_INIT; 2022 uintmax_t len; 2023 2024 + if (parse_data(&buf, repo_settings_get_big_file_threshold(the_repository), &len)) 2025 store_object(OBJ_BLOB, &buf, last, oidout, mark); 2026 else { 2027 if (last) { ··· 3402 unsigned long v; 3403 if (!git_parse_ulong(option, &v)) 3404 return 0; 3405 + repo_settings_set_big_file_threshold(the_repository, v); 3406 } else if (skip_prefix(option, "depth=", &option)) { 3407 option_depth(option); 3408 } else if (skip_prefix(option, "active-branches=", &option)) {
+4 -2
builtin/index-pack.c
··· 485 git_hash_update(&c, hdr, hdrlen); 486 } else 487 oid = NULL; 488 - if (type == OBJ_BLOB && size > big_file_threshold) 489 buf = fixed_buf; 490 else 491 buf = xmallocz(size); ··· 799 enum object_type type; 800 unsigned long size; 801 802 - if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB) 803 return -1; 804 805 memset(&data, 0, sizeof(data));
··· 485 git_hash_update(&c, hdr, hdrlen); 486 } else 487 oid = NULL; 488 + if (type == OBJ_BLOB && 489 + size > repo_settings_get_big_file_threshold(the_repository)) 490 buf = fixed_buf; 491 else 492 buf = xmallocz(size); ··· 800 enum object_type type; 801 unsigned long size; 802 803 + if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || 804 + entry->type != OBJ_BLOB) 805 return -1; 806 807 memset(&data, 0, sizeof(data));
+4 -2
builtin/pack-objects.c
··· 499 500 if (!usable_delta) { 501 if (oe_type(entry) == OBJ_BLOB && 502 - oe_size_greater_than(&to_pack, entry, big_file_threshold) && 503 (st = open_istream(the_repository, &entry->idx.oid, &type, 504 &size, NULL)) != NULL) 505 buf = NULL; ··· 2454 struct object_entry *entry = sorted_by_offset[i]; 2455 check_object(entry, i); 2456 if (entry->type_valid && 2457 - oe_size_greater_than(&to_pack, entry, big_file_threshold)) 2458 entry->no_try_delta = 1; 2459 display_progress(progress_state, i + 1); 2460 }
··· 499 500 if (!usable_delta) { 501 if (oe_type(entry) == OBJ_BLOB && 502 + oe_size_greater_than(&to_pack, entry, 503 + repo_settings_get_big_file_threshold(the_repository)) && 504 (st = open_istream(the_repository, &entry->idx.oid, &type, 505 &size, NULL)) != NULL) 506 buf = NULL; ··· 2455 struct object_entry *entry = sorted_by_offset[i]; 2456 check_object(entry, i); 2457 if (entry->type_valid && 2458 + oe_size_greater_than(&to_pack, entry, 2459 + repo_settings_get_big_file_threshold(the_repository))) 2460 entry->no_try_delta = 1; 2461 display_progress(progress_state, i + 1); 2462 }
+2 -1
builtin/unpack-objects.c
··· 553 554 switch (type) { 555 case OBJ_BLOB: 556 - if (!dry_run && size > big_file_threshold) { 557 stream_blob(size, nr); 558 return; 559 }
··· 553 554 switch (type) { 555 case OBJ_BLOB: 556 + if (!dry_run && 557 + size > repo_settings_get_big_file_threshold(the_repository)) { 558 stream_blob(size, nr); 559 return; 560 }
-5
config.c
··· 1490 return 0; 1491 } 1492 1493 - if (!strcmp(var, "core.bigfilethreshold")) { 1494 - big_file_threshold = git_config_ulong(var, value, ctx->kvi); 1495 - return 0; 1496 - } 1497 - 1498 if (!strcmp(var, "core.autocrlf")) { 1499 if (value && !strcasecmp(value, "input")) { 1500 auto_crlf = AUTO_CRLF_INPUT;
··· 1490 return 0; 1491 } 1492 1493 if (!strcmp(var, "core.autocrlf")) { 1494 if (value && !strcasecmp(value, "input")) { 1495 auto_crlf = AUTO_CRLF_INPUT;
+4 -2
diff.c
··· 4193 * is probably fine. 4194 */ 4195 if (check_binary && 4196 - s->size > big_file_threshold && s->is_binary == -1) { 4197 s->is_binary = 1; 4198 return 0; 4199 } ··· 4243 if (size_only || check_binary) { 4244 if (size_only) 4245 return 0; 4246 - if (s->size > big_file_threshold && s->is_binary == -1) { 4247 s->is_binary = 1; 4248 return 0; 4249 }
··· 4193 * is probably fine. 4194 */ 4195 if (check_binary && 4196 + s->size > repo_settings_get_big_file_threshold(the_repository) && 4197 + s->is_binary == -1) { 4198 s->is_binary = 1; 4199 return 0; 4200 } ··· 4244 if (size_only || check_binary) { 4245 if (size_only) 4246 return 0; 4247 + if (s->size > repo_settings_get_big_file_threshold(the_repository) && 4248 + s->is_binary == -1) { 4249 s->is_binary = 1; 4250 return 0; 4251 }
-1
environment.c
··· 49 int use_fsync = -1; 50 enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT; 51 enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT; 52 - unsigned long big_file_threshold = 512 * 1024 * 1024; 53 char *editor_program; 54 char *askpass_program; 55 char *excludes_file;
··· 49 int use_fsync = -1; 50 enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT; 51 enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT; 52 char *editor_program; 53 char *askpass_program; 54 char *excludes_file;
-1
environment.h
··· 154 extern int pack_compression_level; 155 extern size_t packed_git_window_size; 156 extern size_t packed_git_limit; 157 - extern unsigned long big_file_threshold; 158 extern unsigned long pack_size_limit_cfg; 159 extern int max_allowed_tree_depth; 160
··· 154 extern int pack_compression_level; 155 extern size_t packed_git_window_size; 156 extern size_t packed_git_limit; 157 extern unsigned long pack_size_limit_cfg; 158 extern int max_allowed_tree_depth; 159
+4 -2
object-file.c
··· 2803 ret = index_stream_convert_blob(istate, oid, fd, path, flags); 2804 else if (!S_ISREG(st->st_mode)) 2805 ret = index_pipe(istate, oid, fd, type, path, flags); 2806 - else if (st->st_size <= big_file_threshold || type != OBJ_BLOB || 2807 (path && would_convert_to_git(istate, path))) 2808 ret = index_core(istate, oid, fd, xsize_t(st->st_size), 2809 type, path, flags); ··· 3137 goto out; 3138 } 3139 3140 - if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) { 3141 if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) 3142 goto out; 3143 } else {
··· 2803 ret = index_stream_convert_blob(istate, oid, fd, path, flags); 2804 else if (!S_ISREG(st->st_mode)) 2805 ret = index_pipe(istate, oid, fd, type, path, flags); 2806 + else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) || 2807 + type != OBJ_BLOB || 2808 (path && would_convert_to_git(istate, path))) 2809 ret = index_core(istate, oid, fd, xsize_t(st->st_size), 2810 type, path, flags); ··· 3138 goto out; 3139 } 3140 3141 + if (*oi->typep == OBJ_BLOB && 3142 + *size > repo_settings_get_big_file_threshold(the_repository)) { 3143 if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) 3144 goto out; 3145 } else {
+2 -1
pack-check.c
··· 131 type = unpack_object_header(p, w_curs, &curpos, &size); 132 unuse_pack(w_curs); 133 134 - if (type == OBJ_BLOB && big_file_threshold <= size) { 135 /* 136 * Let stream_object_signature() check it with 137 * the streaming interface; no point slurping
··· 131 type = unpack_object_header(p, w_curs, &curpos, &size); 132 unuse_pack(w_curs); 133 134 + if (type == OBJ_BLOB && 135 + repo_settings_get_big_file_threshold(the_repository) <= size) { 136 /* 137 * Let stream_object_signature() check it with 138 * the streaming interface; no point slurping
+20
repo-settings.c
··· 20 *dest = def; 21 } 22 23 void prepare_repo_settings(struct repository *r) 24 { 25 int experimental; ··· 149 FREE_AND_NULL(r->settings.fsmonitor); 150 FREE_AND_NULL(r->settings.hooks_path); 151 r->settings = empty; 152 } 153 154 enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
··· 20 *dest = def; 21 } 22 23 + static void repo_cfg_ulong(struct repository *r, const char *key, unsigned long *dest, 24 + unsigned long def) 25 + { 26 + if (repo_config_get_ulong(r, key, dest)) 27 + *dest = def; 28 + } 29 + 30 void prepare_repo_settings(struct repository *r) 31 { 32 int experimental; ··· 156 FREE_AND_NULL(r->settings.fsmonitor); 157 FREE_AND_NULL(r->settings.hooks_path); 158 r->settings = empty; 159 + } 160 + 161 + unsigned long repo_settings_get_big_file_threshold(struct repository *repo) 162 + { 163 + if (!repo->settings.big_file_threshold) 164 + repo_cfg_ulong(repo, "core.bigfilethreshold", 165 + &repo->settings.big_file_threshold, 512 * 1024 * 1024); 166 + return repo->settings.big_file_threshold; 167 + } 168 + 169 + void repo_settings_set_big_file_threshold(struct repository *repo, unsigned long value) 170 + { 171 + repo->settings.big_file_threshold = value; 172 } 173 174 enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
+5
repo-settings.h
··· 64 size_t delta_base_cache_limit; 65 size_t packed_git_window_size; 66 size_t packed_git_limit; 67 68 char *hooks_path; 69 }; ··· 87 int repo_settings_get_warn_ambiguous_refs(struct repository *repo); 88 /* Read the value for "core.hooksPath". */ 89 const char *repo_settings_get_hooks_path(struct repository *repo); 90 91 /* Read, set or reset the value for "core.sharedRepository". */ 92 int repo_settings_get_shared_repository(struct repository *repo);
··· 64 size_t delta_base_cache_limit; 65 size_t packed_git_window_size; 66 size_t packed_git_limit; 67 + unsigned long big_file_threshold; 68 69 char *hooks_path; 70 }; ··· 88 int repo_settings_get_warn_ambiguous_refs(struct repository *repo); 89 /* Read the value for "core.hooksPath". */ 90 const char *repo_settings_get_hooks_path(struct repository *repo); 91 + 92 + /* Read and set the value for "core.bigFileThreshold". */ 93 + unsigned long repo_settings_get_big_file_threshold(struct repository *repo); 94 + void repo_settings_set_big_file_threshold(struct repository *repo, unsigned long value); 95 96 /* Read, set or reset the value for "core.sharedRepository". */ 97 int repo_settings_get_shared_repository(struct repository *repo);
+2 -1
streaming.c
··· 431 st->open = open_istream_loose; 432 return 0; 433 case OI_PACKED: 434 - if (!oi.u.packed.is_delta && big_file_threshold < size) { 435 st->u.in_pack.pack = oi.u.packed.pack; 436 st->u.in_pack.pos = oi.u.packed.offset; 437 st->open = open_istream_pack_non_delta;
··· 431 st->open = open_istream_loose; 432 return 0; 433 case OI_PACKED: 434 + if (!oi.u.packed.is_delta && 435 + repo_settings_get_big_file_threshold(the_repository) < size) { 436 st->u.in_pack.pack = oi.u.packed.pack; 437 st->u.in_pack.pos = oi.u.packed.offset; 438 st->open = open_istream_pack_non_delta;
+2 -1
t/t1050-large.sh
··· 6 . ./test-lib.sh 7 8 test_expect_success 'core.bigFileThreshold must be non-negative' ' 9 - test_must_fail git -c core.bigFileThreshold=-1 rev-parse >out 2>err && 10 grep "bad numeric config value" err && 11 test_must_be_empty out 12 '
··· 6 . ./test-lib.sh 7 8 test_expect_success 'core.bigFileThreshold must be non-negative' ' 9 + : >input && 10 + test_must_fail git -c core.bigFileThreshold=-1 hash-object input >out 2>err && 11 grep "bad numeric config value" err && 12 test_must_be_empty out 13 '