Git fork
at reftables-rust 608 lines 15 kB view raw
1/* 2 * path-walk.c: implementation for path-based walks of the object graph. 3 */ 4#include "git-compat-util.h" 5#include "path-walk.h" 6#include "blob.h" 7#include "commit.h" 8#include "dir.h" 9#include "hashmap.h" 10#include "hex.h" 11#include "list-objects.h" 12#include "object.h" 13#include "oid-array.h" 14#include "prio-queue.h" 15#include "repository.h" 16#include "revision.h" 17#include "string-list.h" 18#include "strmap.h" 19#include "tag.h" 20#include "trace2.h" 21#include "tree.h" 22#include "tree-walk.h" 23 24static const char *root_path = ""; 25 26struct type_and_oid_list { 27 enum object_type type; 28 struct oid_array oids; 29 int maybe_interesting; 30}; 31 32#define TYPE_AND_OID_LIST_INIT { \ 33 .type = OBJ_NONE, \ 34 .oids = OID_ARRAY_INIT \ 35} 36 37struct path_walk_context { 38 /** 39 * Repeats of data in 'struct path_walk_info' for 40 * access with fewer characters. 41 */ 42 struct repository *repo; 43 struct rev_info *revs; 44 struct path_walk_info *info; 45 46 /** 47 * Map a path to a 'struct type_and_oid_list' 48 * containing the objects discovered at that 49 * path. 50 */ 51 struct strmap paths_to_lists; 52 53 /** 54 * Store the current list of paths in a priority queue, 55 * using object type as a sorting mechanism, mostly to 56 * make sure blobs are popped off the stack first. No 57 * other sort is made, so within each object type it acts 58 * like a stack and performs a DFS within the trees. 59 * 60 * Use path_stack_pushed to indicate whether a path 61 * was previously added to path_stack. 62 */ 63 struct prio_queue path_stack; 64 struct strset path_stack_pushed; 65}; 66 67static int compare_by_type(const void *one, const void *two, void *cb_data) 68{ 69 struct type_and_oid_list *list1, *list2; 70 const char *str1 = one; 71 const char *str2 = two; 72 struct path_walk_context *ctx = cb_data; 73 74 list1 = strmap_get(&ctx->paths_to_lists, str1); 75 list2 = strmap_get(&ctx->paths_to_lists, str2); 76 77 /* 78 * If object types are equal, then use path comparison. 79 */ 80 if (!list1 || !list2 || list1->type == list2->type) 81 return strcmp(str1, str2); 82 83 /* Prefer tags to be popped off first. */ 84 if (list1->type == OBJ_TAG) 85 return -1; 86 if (list2->type == OBJ_TAG) 87 return 1; 88 89 /* Prefer blobs to be popped off second. */ 90 if (list1->type == OBJ_BLOB) 91 return -1; 92 if (list2->type == OBJ_BLOB) 93 return 1; 94 95 return 0; 96} 97 98static void push_to_stack(struct path_walk_context *ctx, 99 const char *path) 100{ 101 if (strset_contains(&ctx->path_stack_pushed, path)) 102 return; 103 104 strset_add(&ctx->path_stack_pushed, path); 105 prio_queue_put(&ctx->path_stack, xstrdup(path)); 106} 107 108static void add_path_to_list(struct path_walk_context *ctx, 109 const char *path, 110 enum object_type type, 111 struct object_id *oid, 112 int interesting) 113{ 114 struct type_and_oid_list *list = strmap_get(&ctx->paths_to_lists, path); 115 116 if (!list) { 117 CALLOC_ARRAY(list, 1); 118 list->type = type; 119 strmap_put(&ctx->paths_to_lists, path, list); 120 } 121 122 list->maybe_interesting |= interesting; 123 oid_array_append(&list->oids, oid); 124} 125 126static int add_tree_entries(struct path_walk_context *ctx, 127 const char *base_path, 128 struct object_id *oid) 129{ 130 struct tree_desc desc; 131 struct name_entry entry; 132 struct strbuf path = STRBUF_INIT; 133 size_t base_len; 134 struct tree *tree = lookup_tree(ctx->repo, oid); 135 136 if (!tree) { 137 error(_("failed to walk children of tree %s: not found"), 138 oid_to_hex(oid)); 139 return -1; 140 } else if (parse_tree_gently(tree, 1)) { 141 error("bad tree object %s", oid_to_hex(oid)); 142 return -1; 143 } 144 145 strbuf_addstr(&path, base_path); 146 base_len = path.len; 147 148 init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); 149 while (tree_entry(&desc, &entry)) { 150 struct object *o; 151 /* Not actually true, but we will ignore submodules later. */ 152 enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; 153 154 /* Skip submodules. */ 155 if (S_ISGITLINK(entry.mode)) 156 continue; 157 158 /* If the caller doesn't want blobs, then don't bother. */ 159 if (!ctx->info->blobs && type == OBJ_BLOB) 160 continue; 161 162 if (type == OBJ_TREE) { 163 struct tree *child = lookup_tree(ctx->repo, &entry.oid); 164 o = child ? &child->object : NULL; 165 } else if (type == OBJ_BLOB) { 166 struct blob *child = lookup_blob(ctx->repo, &entry.oid); 167 o = child ? &child->object : NULL; 168 } else { 169 BUG("invalid type for tree entry: %d", type); 170 } 171 172 if (!o) { 173 error(_("failed to find object %s"), 174 oid_to_hex(&o->oid)); 175 return -1; 176 } 177 178 /* Skip this object if already seen. */ 179 if (o->flags & SEEN) 180 continue; 181 o->flags |= SEEN; 182 183 strbuf_setlen(&path, base_len); 184 strbuf_add(&path, entry.path, entry.pathlen); 185 186 /* 187 * Trees will end with "/" for concatenation and distinction 188 * from blobs at the same path. 189 */ 190 if (type == OBJ_TREE) 191 strbuf_addch(&path, '/'); 192 193 if (ctx->info->pl) { 194 int dtype; 195 enum pattern_match_result match; 196 match = path_matches_pattern_list(path.buf, path.len, 197 path.buf + base_len, &dtype, 198 ctx->info->pl, 199 ctx->repo->index); 200 201 if (ctx->info->pl->use_cone_patterns && 202 match == NOT_MATCHED) 203 continue; 204 else if (!ctx->info->pl->use_cone_patterns && 205 type == OBJ_BLOB && 206 match != MATCHED) 207 continue; 208 } 209 210 add_path_to_list(ctx, path.buf, type, &entry.oid, 211 !(o->flags & UNINTERESTING)); 212 213 push_to_stack(ctx, path.buf); 214 } 215 216 free_tree_buffer(tree); 217 strbuf_release(&path); 218 return 0; 219} 220 221/* 222 * For each path in paths_to_explore, walk the trees another level 223 * and add any found blobs to the batch (but only if they exist and 224 * haven't been added yet). 225 */ 226static int walk_path(struct path_walk_context *ctx, 227 const char *path) 228{ 229 struct type_and_oid_list *list; 230 int ret = 0; 231 232 list = strmap_get(&ctx->paths_to_lists, path); 233 234 if (!list) 235 BUG("provided path '%s' that had no associated list", path); 236 237 if (!list->oids.nr) 238 return 0; 239 240 if (ctx->info->prune_all_uninteresting) { 241 /* 242 * This is true if all objects were UNINTERESTING 243 * when added to the list. 244 */ 245 if (!list->maybe_interesting) 246 return 0; 247 248 /* 249 * But it's still possible that the objects were set 250 * as UNINTERESTING after being added. Do a quick check. 251 */ 252 list->maybe_interesting = 0; 253 for (size_t i = 0; 254 !list->maybe_interesting && i < list->oids.nr; 255 i++) { 256 if (list->type == OBJ_TREE) { 257 struct tree *t = lookup_tree(ctx->repo, 258 &list->oids.oid[i]); 259 if (t && !(t->object.flags & UNINTERESTING)) 260 list->maybe_interesting = 1; 261 } else if (list->type == OBJ_BLOB) { 262 struct blob *b = lookup_blob(ctx->repo, 263 &list->oids.oid[i]); 264 if (b && !(b->object.flags & UNINTERESTING)) 265 list->maybe_interesting = 1; 266 } else { 267 /* Tags are always interesting if visited. */ 268 list->maybe_interesting = 1; 269 } 270 } 271 272 /* We have confirmed that all objects are UNINTERESTING. */ 273 if (!list->maybe_interesting) 274 return 0; 275 } 276 277 /* Evaluate function pointer on this data, if requested. */ 278 if ((list->type == OBJ_TREE && ctx->info->trees) || 279 (list->type == OBJ_BLOB && ctx->info->blobs) || 280 (list->type == OBJ_TAG && ctx->info->tags)) 281 ret = ctx->info->path_fn(path, &list->oids, list->type, 282 ctx->info->path_fn_data); 283 284 /* Expand data for children. */ 285 if (list->type == OBJ_TREE) { 286 for (size_t i = 0; i < list->oids.nr; i++) { 287 ret |= add_tree_entries(ctx, 288 path, 289 &list->oids.oid[i]); 290 } 291 } 292 293 oid_array_clear(&list->oids); 294 strmap_remove(&ctx->paths_to_lists, path, 1); 295 return ret; 296} 297 298static void clear_paths_to_lists(struct strmap *map) 299{ 300 struct hashmap_iter iter; 301 struct strmap_entry *e; 302 303 hashmap_for_each_entry(&map->map, &iter, e, ent) { 304 struct type_and_oid_list *list = e->value; 305 oid_array_clear(&list->oids); 306 } 307 strmap_clear(map, 1); 308 strmap_init(map); 309} 310 311static struct repository *edge_repo; 312static struct type_and_oid_list *edge_tree_list; 313 314static void show_edge(struct commit *commit) 315{ 316 struct tree *t = repo_get_commit_tree(edge_repo, commit); 317 318 if (!t) 319 return; 320 321 if (commit->object.flags & UNINTERESTING) 322 t->object.flags |= UNINTERESTING; 323 324 if (t->object.flags & SEEN) 325 return; 326 t->object.flags |= SEEN; 327 328 oid_array_append(&edge_tree_list->oids, &t->object.oid); 329} 330 331static int setup_pending_objects(struct path_walk_info *info, 332 struct path_walk_context *ctx) 333{ 334 struct type_and_oid_list *tags = NULL; 335 struct type_and_oid_list *tagged_blobs = NULL; 336 struct type_and_oid_list *root_tree_list = NULL; 337 338 if (info->tags) 339 CALLOC_ARRAY(tags, 1); 340 if (info->blobs) 341 CALLOC_ARRAY(tagged_blobs, 1); 342 if (info->trees) 343 root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); 344 345 /* 346 * Pending objects include: 347 * * Commits at branch tips. 348 * * Annotated tags at tag tips. 349 * * Any kind of object at lightweight tag tips. 350 * * Trees and blobs in the index (with an associated path). 351 */ 352 for (size_t i = 0; i < info->revs->pending.nr; i++) { 353 struct object_array_entry *pending = info->revs->pending.objects + i; 354 struct object *obj = pending->item; 355 356 /* Commits will be picked up by revision walk. */ 357 if (obj->type == OBJ_COMMIT) 358 continue; 359 360 /* Navigate annotated tag object chains. */ 361 while (obj->type == OBJ_TAG) { 362 struct tag *tag = lookup_tag(info->revs->repo, &obj->oid); 363 if (!tag) { 364 error(_("failed to find tag %s"), 365 oid_to_hex(&obj->oid)); 366 return -1; 367 } 368 if (tag->object.flags & SEEN) 369 break; 370 tag->object.flags |= SEEN; 371 372 if (tags) 373 oid_array_append(&tags->oids, &obj->oid); 374 obj = tag->tagged; 375 } 376 377 if (obj->type == OBJ_TAG) 378 continue; 379 380 /* We are now at a non-tag object. */ 381 if (obj->flags & SEEN) 382 continue; 383 obj->flags |= SEEN; 384 385 switch (obj->type) { 386 case OBJ_TREE: 387 if (!info->trees) 388 continue; 389 if (pending->path) { 390 char *path = *pending->path ? xstrfmt("%s/", pending->path) 391 : xstrdup(""); 392 add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1); 393 free(path); 394 } else { 395 /* assume a root tree, such as a lightweight tag. */ 396 oid_array_append(&root_tree_list->oids, &obj->oid); 397 } 398 break; 399 400 case OBJ_BLOB: 401 if (!info->blobs) 402 continue; 403 if (pending->path) 404 add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1); 405 else 406 oid_array_append(&tagged_blobs->oids, &obj->oid); 407 break; 408 409 case OBJ_COMMIT: 410 /* Make sure it is in the object walk */ 411 if (obj != pending->item) 412 add_pending_object(info->revs, obj, ""); 413 break; 414 415 default: 416 BUG("should not see any other type here"); 417 } 418 } 419 420 /* 421 * Add tag objects and tagged blobs if they exist. 422 */ 423 if (tagged_blobs) { 424 if (tagged_blobs->oids.nr) { 425 const char *tagged_blob_path = "/tagged-blobs"; 426 tagged_blobs->type = OBJ_BLOB; 427 tagged_blobs->maybe_interesting = 1; 428 strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); 429 push_to_stack(ctx, tagged_blob_path); 430 } else { 431 oid_array_clear(&tagged_blobs->oids); 432 free(tagged_blobs); 433 } 434 } 435 if (tags) { 436 if (tags->oids.nr) { 437 const char *tag_path = "/tags"; 438 tags->type = OBJ_TAG; 439 tags->maybe_interesting = 1; 440 strmap_put(&ctx->paths_to_lists, tag_path, tags); 441 push_to_stack(ctx, tag_path); 442 } else { 443 oid_array_clear(&tags->oids); 444 free(tags); 445 } 446 } 447 448 return 0; 449} 450 451/** 452 * Given the configuration of 'info', walk the commits based on 'info->revs' and 453 * call 'info->path_fn' on each discovered path. 454 * 455 * Returns nonzero on an error. 456 */ 457int walk_objects_by_path(struct path_walk_info *info) 458{ 459 int ret; 460 size_t commits_nr = 0, paths_nr = 0; 461 struct commit *c; 462 struct type_and_oid_list *root_tree_list; 463 struct type_and_oid_list *commit_list; 464 struct path_walk_context ctx = { 465 .repo = info->revs->repo, 466 .revs = info->revs, 467 .info = info, 468 .path_stack = { 469 .compare = compare_by_type, 470 .cb_data = &ctx 471 }, 472 .path_stack_pushed = STRSET_INIT, 473 .paths_to_lists = STRMAP_INIT 474 }; 475 476 trace2_region_enter("path-walk", "commit-walk", info->revs->repo); 477 478 CALLOC_ARRAY(commit_list, 1); 479 commit_list->type = OBJ_COMMIT; 480 481 if (info->tags) 482 info->revs->tag_objects = 1; 483 484 /* Insert a single list for the root tree into the paths. */ 485 CALLOC_ARRAY(root_tree_list, 1); 486 root_tree_list->type = OBJ_TREE; 487 root_tree_list->maybe_interesting = 1; 488 strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); 489 push_to_stack(&ctx, root_path); 490 491 /* 492 * Set these values before preparing the walk to catch 493 * lightweight tags pointing to non-commits and indexed objects. 494 */ 495 info->revs->blob_objects = info->blobs; 496 info->revs->tree_objects = info->trees; 497 498 if (prepare_revision_walk(info->revs)) 499 die(_("failed to setup revision walk")); 500 501 /* 502 * Walk trees to mark them as UNINTERESTING. 503 * This is particularly important when 'edge_aggressive' is set. 504 */ 505 info->revs->edge_hint_aggressive = info->edge_aggressive; 506 edge_repo = info->revs->repo; 507 edge_tree_list = root_tree_list; 508 mark_edges_uninteresting(info->revs, show_edge, 509 info->prune_all_uninteresting); 510 edge_repo = NULL; 511 edge_tree_list = NULL; 512 513 info->revs->blob_objects = info->revs->tree_objects = 0; 514 515 trace2_region_enter("path-walk", "pending-walk", info->revs->repo); 516 ret = setup_pending_objects(info, &ctx); 517 trace2_region_leave("path-walk", "pending-walk", info->revs->repo); 518 519 if (ret) 520 return ret; 521 522 while ((c = get_revision(info->revs))) { 523 struct object_id *oid; 524 struct tree *t; 525 commits_nr++; 526 527 if (info->commits) 528 oid_array_append(&commit_list->oids, 529 &c->object.oid); 530 531 /* If we only care about commits, then skip trees. */ 532 if (!info->trees && !info->blobs) 533 continue; 534 535 oid = get_commit_tree_oid(c); 536 t = lookup_tree(info->revs->repo, oid); 537 538 if (!t) { 539 error("could not find tree %s", oid_to_hex(oid)); 540 return -1; 541 } 542 543 if (t->object.flags & SEEN) 544 continue; 545 t->object.flags |= SEEN; 546 oid_array_append(&root_tree_list->oids, oid); 547 } 548 549 trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); 550 trace2_region_leave("path-walk", "commit-walk", info->revs->repo); 551 552 /* Track all commits. */ 553 if (info->commits && commit_list->oids.nr) 554 ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT, 555 info->path_fn_data); 556 oid_array_clear(&commit_list->oids); 557 free(commit_list); 558 559 trace2_region_enter("path-walk", "path-walk", info->revs->repo); 560 while (!ret && ctx.path_stack.nr) { 561 char *path = prio_queue_get(&ctx.path_stack); 562 paths_nr++; 563 564 ret = walk_path(&ctx, path); 565 566 free(path); 567 } 568 569 /* Are there paths remaining? Likely they are from indexed objects. */ 570 if (!strmap_empty(&ctx.paths_to_lists)) { 571 struct hashmap_iter iter; 572 struct strmap_entry *entry; 573 574 strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry) 575 push_to_stack(&ctx, entry->key); 576 577 while (!ret && ctx.path_stack.nr) { 578 char *path = prio_queue_get(&ctx.path_stack); 579 paths_nr++; 580 581 ret = walk_path(&ctx, path); 582 583 free(path); 584 } 585 } 586 587 trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); 588 trace2_region_leave("path-walk", "path-walk", info->revs->repo); 589 590 clear_paths_to_lists(&ctx.paths_to_lists); 591 strset_clear(&ctx.path_stack_pushed); 592 clear_prio_queue(&ctx.path_stack); 593 return ret; 594} 595 596void path_walk_info_init(struct path_walk_info *info) 597{ 598 struct path_walk_info empty = PATH_WALK_INFO_INIT; 599 memcpy(info, &empty, sizeof(empty)); 600} 601 602void path_walk_info_clear(struct path_walk_info *info) 603{ 604 if (info->pl) { 605 clear_pattern_list(info->pl); 606 free(info->pl); 607 } 608}