Git fork
1/* We need this macro to access core_apply_sparse_checkout */
2#define USE_THE_REPOSITORY_VARIABLE
3
4#include "builtin.h"
5#include "git-compat-util.h"
6#include "config.h"
7#include "parse-options.h"
8#include "repository.h"
9#include "commit.h"
10#include "dir.h"
11#include "environment.h"
12#include "hex.h"
13#include "tree.h"
14#include "tree-walk.h"
15#include "object.h"
16#include "odb.h"
17#include "oid-array.h"
18#include "oidset.h"
19#include "promisor-remote.h"
20#include "strmap.h"
21#include "string-list.h"
22#include "revision.h"
23#include "trace2.h"
24#include "progress.h"
25#include "packfile.h"
26#include "path-walk.h"
27
28static const char * const builtin_backfill_usage[] = {
29 N_("git backfill [--min-batch-size=<n>] [--[no-]sparse]"),
30 NULL
31};
32
33struct backfill_context {
34 struct repository *repo;
35 struct oid_array current_batch;
36 size_t min_batch_size;
37 int sparse;
38};
39
40static void backfill_context_clear(struct backfill_context *ctx)
41{
42 oid_array_clear(&ctx->current_batch);
43}
44
45static void download_batch(struct backfill_context *ctx)
46{
47 promisor_remote_get_direct(ctx->repo,
48 ctx->current_batch.oid,
49 ctx->current_batch.nr);
50 oid_array_clear(&ctx->current_batch);
51
52 /*
53 * We likely have a new packfile. Add it to the packed list to
54 * avoid possible duplicate downloads of the same objects.
55 */
56 odb_reprepare(ctx->repo->objects);
57}
58
59static int fill_missing_blobs(const char *path UNUSED,
60 struct oid_array *list,
61 enum object_type type,
62 void *data)
63{
64 struct backfill_context *ctx = data;
65
66 if (type != OBJ_BLOB)
67 return 0;
68
69 for (size_t i = 0; i < list->nr; i++) {
70 if (!odb_has_object(ctx->repo->objects, &list->oid[i],
71 OBJECT_INFO_FOR_PREFETCH))
72 oid_array_append(&ctx->current_batch, &list->oid[i]);
73 }
74
75 if (ctx->current_batch.nr >= ctx->min_batch_size)
76 download_batch(ctx);
77
78 return 0;
79}
80
81static int do_backfill(struct backfill_context *ctx)
82{
83 struct rev_info revs;
84 struct path_walk_info info = PATH_WALK_INFO_INIT;
85 int ret;
86
87 if (ctx->sparse) {
88 CALLOC_ARRAY(info.pl, 1);
89 if (get_sparse_checkout_patterns(info.pl)) {
90 path_walk_info_clear(&info);
91 return error(_("problem loading sparse-checkout"));
92 }
93 }
94
95 repo_init_revisions(ctx->repo, &revs, "");
96 handle_revision_arg("HEAD", &revs, 0, 0);
97
98 info.blobs = 1;
99 info.tags = info.commits = info.trees = 0;
100
101 info.revs = &revs;
102 info.path_fn = fill_missing_blobs;
103 info.path_fn_data = ctx;
104
105 ret = walk_objects_by_path(&info);
106
107 /* Download the objects that did not fill a batch. */
108 if (!ret)
109 download_batch(ctx);
110
111 path_walk_info_clear(&info);
112 release_revisions(&revs);
113 return ret;
114}
115
116int cmd_backfill(int argc, const char **argv, const char *prefix, struct repository *repo)
117{
118 int result;
119 struct backfill_context ctx = {
120 .repo = repo,
121 .current_batch = OID_ARRAY_INIT,
122 .min_batch_size = 50000,
123 .sparse = 0,
124 };
125 struct option options[] = {
126 OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
127 N_("Minimum number of objects to request at a time")),
128 OPT_BOOL(0, "sparse", &ctx.sparse,
129 N_("Restrict the missing objects to the current sparse-checkout")),
130 OPT_END(),
131 };
132
133 show_usage_with_options_if_asked(argc, argv,
134 builtin_backfill_usage, options);
135
136 argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
137 0);
138
139 repo_config(repo, git_default_config, NULL);
140
141 if (ctx.sparse < 0)
142 ctx.sparse = core_apply_sparse_checkout;
143
144 result = do_backfill(&ctx);
145 backfill_context_clear(&ctx);
146 return result;
147}