Git fork
1/*
2 * path-walk.c: implementation for path-based walks of the object graph.
3 */
4#include "git-compat-util.h"
5#include "path-walk.h"
6#include "blob.h"
7#include "commit.h"
8#include "dir.h"
9#include "hashmap.h"
10#include "hex.h"
11#include "list-objects.h"
12#include "object.h"
13#include "oid-array.h"
14#include "prio-queue.h"
15#include "repository.h"
16#include "revision.h"
17#include "string-list.h"
18#include "strmap.h"
19#include "tag.h"
20#include "trace2.h"
21#include "tree.h"
22#include "tree-walk.h"
23
24static const char *root_path = "";
25
26struct type_and_oid_list {
27 enum object_type type;
28 struct oid_array oids;
29 int maybe_interesting;
30};
31
32#define TYPE_AND_OID_LIST_INIT { \
33 .type = OBJ_NONE, \
34 .oids = OID_ARRAY_INIT \
35}
36
37struct path_walk_context {
38 /**
39 * Repeats of data in 'struct path_walk_info' for
40 * access with fewer characters.
41 */
42 struct repository *repo;
43 struct rev_info *revs;
44 struct path_walk_info *info;
45
46 /**
47 * Map a path to a 'struct type_and_oid_list'
48 * containing the objects discovered at that
49 * path.
50 */
51 struct strmap paths_to_lists;
52
53 /**
54 * Store the current list of paths in a priority queue,
55 * using object type as a sorting mechanism, mostly to
56 * make sure blobs are popped off the stack first. No
57 * other sort is made, so within each object type it acts
58 * like a stack and performs a DFS within the trees.
59 *
60 * Use path_stack_pushed to indicate whether a path
61 * was previously added to path_stack.
62 */
63 struct prio_queue path_stack;
64 struct strset path_stack_pushed;
65};
66
67static int compare_by_type(const void *one, const void *two, void *cb_data)
68{
69 struct type_and_oid_list *list1, *list2;
70 const char *str1 = one;
71 const char *str2 = two;
72 struct path_walk_context *ctx = cb_data;
73
74 list1 = strmap_get(&ctx->paths_to_lists, str1);
75 list2 = strmap_get(&ctx->paths_to_lists, str2);
76
77 /*
78 * If object types are equal, then use path comparison.
79 */
80 if (!list1 || !list2 || list1->type == list2->type)
81 return strcmp(str1, str2);
82
83 /* Prefer tags to be popped off first. */
84 if (list1->type == OBJ_TAG)
85 return -1;
86 if (list2->type == OBJ_TAG)
87 return 1;
88
89 /* Prefer blobs to be popped off second. */
90 if (list1->type == OBJ_BLOB)
91 return -1;
92 if (list2->type == OBJ_BLOB)
93 return 1;
94
95 return 0;
96}
97
98static void push_to_stack(struct path_walk_context *ctx,
99 const char *path)
100{
101 if (strset_contains(&ctx->path_stack_pushed, path))
102 return;
103
104 strset_add(&ctx->path_stack_pushed, path);
105 prio_queue_put(&ctx->path_stack, xstrdup(path));
106}
107
108static void add_path_to_list(struct path_walk_context *ctx,
109 const char *path,
110 enum object_type type,
111 struct object_id *oid,
112 int interesting)
113{
114 struct type_and_oid_list *list = strmap_get(&ctx->paths_to_lists, path);
115
116 if (!list) {
117 CALLOC_ARRAY(list, 1);
118 list->type = type;
119 strmap_put(&ctx->paths_to_lists, path, list);
120 }
121
122 list->maybe_interesting |= interesting;
123 oid_array_append(&list->oids, oid);
124}
125
126static int add_tree_entries(struct path_walk_context *ctx,
127 const char *base_path,
128 struct object_id *oid)
129{
130 struct tree_desc desc;
131 struct name_entry entry;
132 struct strbuf path = STRBUF_INIT;
133 size_t base_len;
134 struct tree *tree = lookup_tree(ctx->repo, oid);
135
136 if (!tree) {
137 error(_("failed to walk children of tree %s: not found"),
138 oid_to_hex(oid));
139 return -1;
140 } else if (parse_tree_gently(tree, 1)) {
141 error("bad tree object %s", oid_to_hex(oid));
142 return -1;
143 }
144
145 strbuf_addstr(&path, base_path);
146 base_len = path.len;
147
148 init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
149 while (tree_entry(&desc, &entry)) {
150 struct object *o;
151 /* Not actually true, but we will ignore submodules later. */
152 enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
153
154 /* Skip submodules. */
155 if (S_ISGITLINK(entry.mode))
156 continue;
157
158 /* If the caller doesn't want blobs, then don't bother. */
159 if (!ctx->info->blobs && type == OBJ_BLOB)
160 continue;
161
162 if (type == OBJ_TREE) {
163 struct tree *child = lookup_tree(ctx->repo, &entry.oid);
164 o = child ? &child->object : NULL;
165 } else if (type == OBJ_BLOB) {
166 struct blob *child = lookup_blob(ctx->repo, &entry.oid);
167 o = child ? &child->object : NULL;
168 } else {
169 BUG("invalid type for tree entry: %d", type);
170 }
171
172 if (!o) {
173 error(_("failed to find object %s"),
174 oid_to_hex(&o->oid));
175 return -1;
176 }
177
178 /* Skip this object if already seen. */
179 if (o->flags & SEEN)
180 continue;
181 o->flags |= SEEN;
182
183 strbuf_setlen(&path, base_len);
184 strbuf_add(&path, entry.path, entry.pathlen);
185
186 /*
187 * Trees will end with "/" for concatenation and distinction
188 * from blobs at the same path.
189 */
190 if (type == OBJ_TREE)
191 strbuf_addch(&path, '/');
192
193 if (ctx->info->pl) {
194 int dtype;
195 enum pattern_match_result match;
196 match = path_matches_pattern_list(path.buf, path.len,
197 path.buf + base_len, &dtype,
198 ctx->info->pl,
199 ctx->repo->index);
200
201 if (ctx->info->pl->use_cone_patterns &&
202 match == NOT_MATCHED)
203 continue;
204 else if (!ctx->info->pl->use_cone_patterns &&
205 type == OBJ_BLOB &&
206 match != MATCHED)
207 continue;
208 }
209
210 add_path_to_list(ctx, path.buf, type, &entry.oid,
211 !(o->flags & UNINTERESTING));
212
213 push_to_stack(ctx, path.buf);
214 }
215
216 free_tree_buffer(tree);
217 strbuf_release(&path);
218 return 0;
219}
220
221/*
222 * For each path in paths_to_explore, walk the trees another level
223 * and add any found blobs to the batch (but only if they exist and
224 * haven't been added yet).
225 */
226static int walk_path(struct path_walk_context *ctx,
227 const char *path)
228{
229 struct type_and_oid_list *list;
230 int ret = 0;
231
232 list = strmap_get(&ctx->paths_to_lists, path);
233
234 if (!list)
235 BUG("provided path '%s' that had no associated list", path);
236
237 if (!list->oids.nr)
238 return 0;
239
240 if (ctx->info->prune_all_uninteresting) {
241 /*
242 * This is true if all objects were UNINTERESTING
243 * when added to the list.
244 */
245 if (!list->maybe_interesting)
246 return 0;
247
248 /*
249 * But it's still possible that the objects were set
250 * as UNINTERESTING after being added. Do a quick check.
251 */
252 list->maybe_interesting = 0;
253 for (size_t i = 0;
254 !list->maybe_interesting && i < list->oids.nr;
255 i++) {
256 if (list->type == OBJ_TREE) {
257 struct tree *t = lookup_tree(ctx->repo,
258 &list->oids.oid[i]);
259 if (t && !(t->object.flags & UNINTERESTING))
260 list->maybe_interesting = 1;
261 } else if (list->type == OBJ_BLOB) {
262 struct blob *b = lookup_blob(ctx->repo,
263 &list->oids.oid[i]);
264 if (b && !(b->object.flags & UNINTERESTING))
265 list->maybe_interesting = 1;
266 } else {
267 /* Tags are always interesting if visited. */
268 list->maybe_interesting = 1;
269 }
270 }
271
272 /* We have confirmed that all objects are UNINTERESTING. */
273 if (!list->maybe_interesting)
274 return 0;
275 }
276
277 /* Evaluate function pointer on this data, if requested. */
278 if ((list->type == OBJ_TREE && ctx->info->trees) ||
279 (list->type == OBJ_BLOB && ctx->info->blobs) ||
280 (list->type == OBJ_TAG && ctx->info->tags))
281 ret = ctx->info->path_fn(path, &list->oids, list->type,
282 ctx->info->path_fn_data);
283
284 /* Expand data for children. */
285 if (list->type == OBJ_TREE) {
286 for (size_t i = 0; i < list->oids.nr; i++) {
287 ret |= add_tree_entries(ctx,
288 path,
289 &list->oids.oid[i]);
290 }
291 }
292
293 oid_array_clear(&list->oids);
294 strmap_remove(&ctx->paths_to_lists, path, 1);
295 return ret;
296}
297
298static void clear_paths_to_lists(struct strmap *map)
299{
300 struct hashmap_iter iter;
301 struct strmap_entry *e;
302
303 hashmap_for_each_entry(&map->map, &iter, e, ent) {
304 struct type_and_oid_list *list = e->value;
305 oid_array_clear(&list->oids);
306 }
307 strmap_clear(map, 1);
308 strmap_init(map);
309}
310
311static struct repository *edge_repo;
312static struct type_and_oid_list *edge_tree_list;
313
314static void show_edge(struct commit *commit)
315{
316 struct tree *t = repo_get_commit_tree(edge_repo, commit);
317
318 if (!t)
319 return;
320
321 if (commit->object.flags & UNINTERESTING)
322 t->object.flags |= UNINTERESTING;
323
324 if (t->object.flags & SEEN)
325 return;
326 t->object.flags |= SEEN;
327
328 oid_array_append(&edge_tree_list->oids, &t->object.oid);
329}
330
331static int setup_pending_objects(struct path_walk_info *info,
332 struct path_walk_context *ctx)
333{
334 struct type_and_oid_list *tags = NULL;
335 struct type_and_oid_list *tagged_blobs = NULL;
336 struct type_and_oid_list *root_tree_list = NULL;
337
338 if (info->tags)
339 CALLOC_ARRAY(tags, 1);
340 if (info->blobs)
341 CALLOC_ARRAY(tagged_blobs, 1);
342 if (info->trees)
343 root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
344
345 /*
346 * Pending objects include:
347 * * Commits at branch tips.
348 * * Annotated tags at tag tips.
349 * * Any kind of object at lightweight tag tips.
350 * * Trees and blobs in the index (with an associated path).
351 */
352 for (size_t i = 0; i < info->revs->pending.nr; i++) {
353 struct object_array_entry *pending = info->revs->pending.objects + i;
354 struct object *obj = pending->item;
355
356 /* Commits will be picked up by revision walk. */
357 if (obj->type == OBJ_COMMIT)
358 continue;
359
360 /* Navigate annotated tag object chains. */
361 while (obj->type == OBJ_TAG) {
362 struct tag *tag = lookup_tag(info->revs->repo, &obj->oid);
363 if (!tag) {
364 error(_("failed to find tag %s"),
365 oid_to_hex(&obj->oid));
366 return -1;
367 }
368 if (tag->object.flags & SEEN)
369 break;
370 tag->object.flags |= SEEN;
371
372 if (tags)
373 oid_array_append(&tags->oids, &obj->oid);
374 obj = tag->tagged;
375 }
376
377 if (obj->type == OBJ_TAG)
378 continue;
379
380 /* We are now at a non-tag object. */
381 if (obj->flags & SEEN)
382 continue;
383 obj->flags |= SEEN;
384
385 switch (obj->type) {
386 case OBJ_TREE:
387 if (!info->trees)
388 continue;
389 if (pending->path) {
390 char *path = *pending->path ? xstrfmt("%s/", pending->path)
391 : xstrdup("");
392 add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1);
393 free(path);
394 } else {
395 /* assume a root tree, such as a lightweight tag. */
396 oid_array_append(&root_tree_list->oids, &obj->oid);
397 }
398 break;
399
400 case OBJ_BLOB:
401 if (!info->blobs)
402 continue;
403 if (pending->path)
404 add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
405 else
406 oid_array_append(&tagged_blobs->oids, &obj->oid);
407 break;
408
409 case OBJ_COMMIT:
410 /* Make sure it is in the object walk */
411 if (obj != pending->item)
412 add_pending_object(info->revs, obj, "");
413 break;
414
415 default:
416 BUG("should not see any other type here");
417 }
418 }
419
420 /*
421 * Add tag objects and tagged blobs if they exist.
422 */
423 if (tagged_blobs) {
424 if (tagged_blobs->oids.nr) {
425 const char *tagged_blob_path = "/tagged-blobs";
426 tagged_blobs->type = OBJ_BLOB;
427 tagged_blobs->maybe_interesting = 1;
428 strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
429 push_to_stack(ctx, tagged_blob_path);
430 } else {
431 oid_array_clear(&tagged_blobs->oids);
432 free(tagged_blobs);
433 }
434 }
435 if (tags) {
436 if (tags->oids.nr) {
437 const char *tag_path = "/tags";
438 tags->type = OBJ_TAG;
439 tags->maybe_interesting = 1;
440 strmap_put(&ctx->paths_to_lists, tag_path, tags);
441 push_to_stack(ctx, tag_path);
442 } else {
443 oid_array_clear(&tags->oids);
444 free(tags);
445 }
446 }
447
448 return 0;
449}
450
451/**
452 * Given the configuration of 'info', walk the commits based on 'info->revs' and
453 * call 'info->path_fn' on each discovered path.
454 *
455 * Returns nonzero on an error.
456 */
457int walk_objects_by_path(struct path_walk_info *info)
458{
459 int ret;
460 size_t commits_nr = 0, paths_nr = 0;
461 struct commit *c;
462 struct type_and_oid_list *root_tree_list;
463 struct type_and_oid_list *commit_list;
464 struct path_walk_context ctx = {
465 .repo = info->revs->repo,
466 .revs = info->revs,
467 .info = info,
468 .path_stack = {
469 .compare = compare_by_type,
470 .cb_data = &ctx
471 },
472 .path_stack_pushed = STRSET_INIT,
473 .paths_to_lists = STRMAP_INIT
474 };
475
476 trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
477
478 CALLOC_ARRAY(commit_list, 1);
479 commit_list->type = OBJ_COMMIT;
480
481 if (info->tags)
482 info->revs->tag_objects = 1;
483
484 /* Insert a single list for the root tree into the paths. */
485 CALLOC_ARRAY(root_tree_list, 1);
486 root_tree_list->type = OBJ_TREE;
487 root_tree_list->maybe_interesting = 1;
488 strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
489 push_to_stack(&ctx, root_path);
490
491 /*
492 * Set these values before preparing the walk to catch
493 * lightweight tags pointing to non-commits and indexed objects.
494 */
495 info->revs->blob_objects = info->blobs;
496 info->revs->tree_objects = info->trees;
497
498 if (prepare_revision_walk(info->revs))
499 die(_("failed to setup revision walk"));
500
501 /*
502 * Walk trees to mark them as UNINTERESTING.
503 * This is particularly important when 'edge_aggressive' is set.
504 */
505 info->revs->edge_hint_aggressive = info->edge_aggressive;
506 edge_repo = info->revs->repo;
507 edge_tree_list = root_tree_list;
508 mark_edges_uninteresting(info->revs, show_edge,
509 info->prune_all_uninteresting);
510 edge_repo = NULL;
511 edge_tree_list = NULL;
512
513 info->revs->blob_objects = info->revs->tree_objects = 0;
514
515 trace2_region_enter("path-walk", "pending-walk", info->revs->repo);
516 ret = setup_pending_objects(info, &ctx);
517 trace2_region_leave("path-walk", "pending-walk", info->revs->repo);
518
519 if (ret)
520 return ret;
521
522 while ((c = get_revision(info->revs))) {
523 struct object_id *oid;
524 struct tree *t;
525 commits_nr++;
526
527 if (info->commits)
528 oid_array_append(&commit_list->oids,
529 &c->object.oid);
530
531 /* If we only care about commits, then skip trees. */
532 if (!info->trees && !info->blobs)
533 continue;
534
535 oid = get_commit_tree_oid(c);
536 t = lookup_tree(info->revs->repo, oid);
537
538 if (!t) {
539 error("could not find tree %s", oid_to_hex(oid));
540 return -1;
541 }
542
543 if (t->object.flags & SEEN)
544 continue;
545 t->object.flags |= SEEN;
546 oid_array_append(&root_tree_list->oids, oid);
547 }
548
549 trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
550 trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
551
552 /* Track all commits. */
553 if (info->commits && commit_list->oids.nr)
554 ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
555 info->path_fn_data);
556 oid_array_clear(&commit_list->oids);
557 free(commit_list);
558
559 trace2_region_enter("path-walk", "path-walk", info->revs->repo);
560 while (!ret && ctx.path_stack.nr) {
561 char *path = prio_queue_get(&ctx.path_stack);
562 paths_nr++;
563
564 ret = walk_path(&ctx, path);
565
566 free(path);
567 }
568
569 /* Are there paths remaining? Likely they are from indexed objects. */
570 if (!strmap_empty(&ctx.paths_to_lists)) {
571 struct hashmap_iter iter;
572 struct strmap_entry *entry;
573
574 strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry)
575 push_to_stack(&ctx, entry->key);
576
577 while (!ret && ctx.path_stack.nr) {
578 char *path = prio_queue_get(&ctx.path_stack);
579 paths_nr++;
580
581 ret = walk_path(&ctx, path);
582
583 free(path);
584 }
585 }
586
587 trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);
588 trace2_region_leave("path-walk", "path-walk", info->revs->repo);
589
590 clear_paths_to_lists(&ctx.paths_to_lists);
591 strset_clear(&ctx.path_stack_pushed);
592 clear_prio_queue(&ctx.path_stack);
593 return ret;
594}
595
596void path_walk_info_init(struct path_walk_info *info)
597{
598 struct path_walk_info empty = PATH_WALK_INFO_INIT;
599 memcpy(info, &empty, sizeof(empty));
600}
601
602void path_walk_info_clear(struct path_walk_info *info)
603{
604 if (info->pl) {
605 clear_pattern_list(info->pl);
606 free(info->pl);
607 }
608}