Git fork

builtin/pack-objects.c: --cruft with expiration

In a previous patch, pack-objects learned how to generate a cruft pack
so long as no objects are dropped.

This patch teaches pack-objects to handle the case where a non-never
`--cruft-expiration` value is passed. This case is slightly more
complicated than before, because we want pack-objects to save
unreachable objects which would have been pruned when there is another
recent (i.e., non-prunable) unreachable object which reaches the other.
We'll call these objects "unreachable but reachable-from-recent".

Here is how pack-objects handles `--cruft-expiration`:

- Instead of adding all objects outside of the kept pack(s) into the
packing list, only handle the ones whose mtime is within the grace
period.

- Construct a reachability traversal whose tips are the
unreachable-but-recent objects.

- Then, walk along that traversal, stopping if we reach an object in
the kept pack. At each step along the traversal, we add the object
we are visiting to the packing list.

In the majority of these cases, any object we visit in this traversal
will already be in our packing list. But we will sometimes encounter
reachable-from-recent cruft objects, which we want to retain even if
they aged out of the grace period.

The most subtle point of this process is that we actually don't need to
bother to update the rescued object's mtime. Even though we will write
an .mtimes file with a value that is older than the expiration window,
it will continue to survive cruft repacks so long as any objects which
reach it haven't aged out.

That is, a future repack will also exclude that object from the initial
packing list, only to discover it later on when doing the reachability
traversal.

Finally, stopping early once an object is found in a kept pack is safe
to do because the kept packs ordinarily represent which packs will
survive after repacking. Assuming that it _isn't_ safe to halt a
traversal early would mean that there is some ancestor object which is
missing, which implies repository corruption (i.e., the complete set of
reachable objects isn't present).

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Taylor Blau and committed by
Junio C Hamano
a7d49383 fb546d6e

+228 -3
+83 -1
builtin/pack-objects.c
··· 3447 3447 return; 3448 3448 } 3449 3449 3450 + static void show_cruft_object(struct object *obj, const char *name, void *data) 3451 + { 3452 + /* 3453 + * if we did not record it earlier, it's at least as old as our 3454 + * expiration value. Rather than find it exactly, just use that 3455 + * value. This may bump it forward from its real mtime, but it 3456 + * will still be "too old" next time we run with the same 3457 + * expiration. 3458 + * 3459 + * if obj does appear in the packing list, this call is a noop (or may 3460 + * set the namehash). 3461 + */ 3462 + add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration); 3463 + } 3464 + 3465 + static void show_cruft_commit(struct commit *commit, void *data) 3466 + { 3467 + show_cruft_object((struct object*)commit, NULL, data); 3468 + } 3469 + 3470 + static int cruft_include_check_obj(struct object *obj, void *data) 3471 + { 3472 + return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS); 3473 + } 3474 + 3475 + static int cruft_include_check(struct commit *commit, void *data) 3476 + { 3477 + return cruft_include_check_obj((struct object*)commit, data); 3478 + } 3479 + 3480 + static void set_cruft_mtime(const struct object *object, 3481 + struct packed_git *pack, 3482 + off_t offset, time_t mtime) 3483 + { 3484 + add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL, 3485 + mtime); 3486 + } 3487 + 3450 3488 static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) 3451 3489 { 3452 3490 struct string_list_item *item = NULL; ··· 3472 3510 stop_progress(&progress_state); 3473 3511 } 3474 3512 3513 + static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) 3514 + { 3515 + struct packed_git *p; 3516 + struct rev_info revs; 3517 + int ret; 3518 + 3519 + repo_init_revisions(the_repository, &revs, NULL); 3520 + 3521 + revs.tag_objects = 1; 3522 + revs.tree_objects = 1; 3523 + revs.blob_objects = 1; 3524 + 3525 + revs.include_check = cruft_include_check; 3526 + revs.include_check_obj = cruft_include_check_obj; 3527 + 3528 + revs.ignore_missing_links = 1; 3529 + 3530 + if (progress) 3531 + progress_state = start_progress(_("Enumerating cruft objects"), 0); 3532 + ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, 3533 + set_cruft_mtime, 1); 3534 + stop_progress(&progress_state); 3535 + 3536 + if (ret) 3537 + die(_("unable to add cruft objects")); 3538 + 3539 + /* 3540 + * Re-mark only the fresh packs as kept so that objects in 3541 + * unknown packs do not halt the reachability traversal early. 3542 + */ 3543 + for (p = get_all_packs(the_repository); p; p = p->next) 3544 + p->pack_keep_in_core = 0; 3545 + mark_pack_kept_in_core(fresh_packs, 1); 3546 + 3547 + if (prepare_revision_walk(&revs)) 3548 + die(_("revision walk setup failed")); 3549 + if (progress) 3550 + progress_state = start_progress(_("Traversing cruft objects"), 0); 3551 + nr_seen = 0; 3552 + traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); 3553 + 3554 + stop_progress(&progress_state); 3555 + } 3556 + 3475 3557 static void read_cruft_objects(void) 3476 3558 { 3477 3559 struct strbuf buf = STRBUF_INIT; ··· 3523 3605 mark_pack_kept_in_core(&discard_packs, 0); 3524 3606 3525 3607 if (cruft_expiration) 3526 - die("--cruft-expiration not yet implemented"); 3608 + enumerate_and_traverse_cruft_objects(&fresh_packs); 3527 3609 else 3528 3610 enumerate_cruft_objects(); 3529 3611
+2 -2
reachable.h
··· 1 1 #ifndef REACHEABLE_H 2 2 #define REACHEABLE_H 3 3 4 - #include "object.h" 5 - 6 4 struct progress; 7 5 struct rev_info; 6 + struct object; 7 + struct packed_git; 8 8 9 9 typedef void report_recent_object_fn(const struct object *, struct packed_git *, 10 10 off_t, time_t);
+143
t/t5329-pack-objects-cruft.sh
··· 214 214 } 215 215 216 216 basic_cruft_pack_tests never 217 + basic_cruft_pack_tests 2.weeks.ago 218 + 219 + test_expect_success 'cruft tags rescue tagged objects' ' 220 + git init repo && 221 + test_when_finished "rm -fr repo" && 222 + ( 223 + cd repo && 224 + 225 + test_commit packed && 226 + git repack -Ad && 227 + 228 + test_commit tagged && 229 + git tag -a annotated -m tag && 230 + 231 + git rev-list --objects --no-object-names packed.. >objects && 232 + while read oid 233 + do 234 + test-tool chmtime -1000 \ 235 + "$objdir/$(test_oid_to_path $oid)" 236 + done <objects && 237 + 238 + test-tool chmtime -500 \ 239 + "$objdir/$(test_oid_to_path $(git rev-parse annotated))" && 240 + 241 + keep="$(basename "$(ls $packdir/pack-*.pack)")" && 242 + cruft="$(echo $keep | git pack-objects --cruft \ 243 + --cruft-expiration=750.seconds.ago \ 244 + $packdir/pack)" && 245 + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && 246 + cut -f1 -d" " <actual.raw | sort >actual && 247 + 248 + ( 249 + cat objects && 250 + git rev-parse annotated 251 + ) >expect.raw && 252 + sort <expect.raw >expect && 253 + 254 + test_cmp expect actual && 255 + cat actual 256 + ) 257 + ' 258 + 259 + test_expect_success 'cruft commits rescue parents, trees' ' 260 + git init repo && 261 + test_when_finished "rm -fr repo" && 262 + ( 263 + cd repo && 264 + 265 + test_commit packed && 266 + git repack -Ad && 267 + 268 + test_commit old && 269 + test_commit new && 270 + 271 + git rev-list --objects --no-object-names packed..new >objects && 272 + while read object 273 + do 274 + test-tool chmtime -1000 \ 275 + "$objdir/$(test_oid_to_path $object)" 276 + done <objects && 277 + test-tool chmtime +500 "$objdir/$(test_oid_to_path \ 278 + $(git rev-parse HEAD))" && 279 + 280 + keep="$(basename "$(ls $packdir/pack-*.pack)")" && 281 + cruft="$(echo $keep | git pack-objects --cruft \ 282 + --cruft-expiration=750.seconds.ago \ 283 + $packdir/pack)" && 284 + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && 285 + 286 + cut -d" " -f1 <actual.raw | sort >actual && 287 + sort <objects >expect && 288 + 289 + test_cmp expect actual 290 + ) 291 + ' 292 + 293 + test_expect_success 'cruft trees rescue sub-trees, blobs' ' 294 + git init repo && 295 + test_when_finished "rm -fr repo" && 296 + ( 297 + cd repo && 298 + 299 + test_commit packed && 300 + git repack -Ad && 301 + 302 + mkdir -p dir/sub && 303 + echo foo >foo && 304 + echo bar >dir/bar && 305 + echo baz >dir/sub/baz && 306 + 307 + test_tick && 308 + git add . && 309 + git commit -m "pruned" && 310 + 311 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD))" && 312 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD^{tree}))" && 313 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:foo))" && 314 + test-tool chmtime -500 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir))" && 315 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/bar))" && 316 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub))" && 317 + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub/baz))" && 318 + 319 + keep="$(basename "$(ls $packdir/pack-*.pack)")" && 320 + cruft="$(echo $keep | git pack-objects --cruft \ 321 + --cruft-expiration=750.seconds.ago \ 322 + $packdir/pack)" && 323 + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && 324 + cut -f1 -d" " <actual.raw | sort >actual && 325 + 326 + git rev-parse HEAD:dir HEAD:dir/bar HEAD:dir/sub HEAD:dir/sub/baz >expect.raw && 327 + sort <expect.raw >expect && 328 + 329 + test_cmp expect actual 330 + ) 331 + ' 332 + 333 + test_expect_success 'expired objects are pruned' ' 334 + git init repo && 335 + test_when_finished "rm -fr repo" && 336 + ( 337 + cd repo && 338 + 339 + test_commit packed && 340 + git repack -Ad && 341 + 342 + test_commit pruned && 343 + 344 + git rev-list --objects --no-object-names packed..pruned >objects && 345 + while read object 346 + do 347 + test-tool chmtime -1000 \ 348 + "$objdir/$(test_oid_to_path $object)" 349 + done <objects && 350 + 351 + keep="$(basename "$(ls $packdir/pack-*.pack)")" && 352 + cruft="$(echo $keep | git pack-objects --cruft \ 353 + --cruft-expiration=750.seconds.ago \ 354 + $packdir/pack)" && 355 + 356 + test-tool pack-mtimes "pack-$cruft.mtimes" >actual && 357 + test_must_be_empty actual 358 + ) 359 + ' 217 360 218 361 test_done