Git fork

parallel-checkout: add tests related to path collisions

Add tests to confirm that path collisions are properly detected by
checkout workers, both to avoid race conditions and to report colliding
entries on clone.

Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

Matheus Tavares
Jeff Hostetler
and committed by
Junio C Hamano
6a7bc9d1 d0e5d357

+168 -2
+4
parallel-checkout.c
··· 8 #include "sigchain.h" 9 #include "streaming.h" 10 #include "thread-utils.h" 11 12 struct pc_worker { 13 struct child_process cp; ··· 326 if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf, 327 state->base_dir_len)) { 328 pc_item->status = PC_ITEM_COLLIDED; 329 goto out; 330 } 331 ··· 341 * call should have already caught these cases. 342 */ 343 pc_item->status = PC_ITEM_COLLIDED; 344 } else { 345 error_errno("failed to open file '%s'", path.buf); 346 pc_item->status = PC_ITEM_FAILED;
··· 8 #include "sigchain.h" 9 #include "streaming.h" 10 #include "thread-utils.h" 11 + #include "trace2.h" 12 13 struct pc_worker { 14 struct child_process cp; ··· 327 if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf, 328 state->base_dir_len)) { 329 pc_item->status = PC_ITEM_COLLIDED; 330 + trace2_data_string("pcheckout", NULL, "collision/dirname", path.buf); 331 goto out; 332 } 333 ··· 343 * call should have already caught these cases. 344 */ 345 pc_item->status = PC_ITEM_COLLIDED; 346 + trace2_data_string("pcheckout", NULL, 347 + "collision/basename", path.buf); 348 } else { 349 error_errno("failed to open file '%s'", path.buf); 350 pc_item->status = PC_ITEM_FAILED;
+2 -2
t/lib-parallel-checkout.sh
··· 22 23 local trace_file=trace-test-checkout-workers && 24 rm -f "$trace_file" && 25 - GIT_TRACE2="$(pwd)/$trace_file" "$@" && 26 27 local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) && 28 test $workers -eq $expected_workers && 29 rm "$trace_file" 30 - } 31 32 # Verify that both the working tree and the index were created correctly 33 verify_checkout () {
··· 22 23 local trace_file=trace-test-checkout-workers && 24 rm -f "$trace_file" && 25 + GIT_TRACE2="$(pwd)/$trace_file" "$@" 2>&8 && 26 27 local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) && 28 test $workers -eq $expected_workers && 29 rm "$trace_file" 30 + } 8>&2 2>&4 31 32 # Verify that both the working tree and the index were created correctly 33 verify_checkout () {
+162
t/t2081-parallel-checkout-collisions.sh
···
··· 1 + #!/bin/sh 2 + 3 + test_description="path collisions during parallel checkout 4 + 5 + Parallel checkout must detect path collisions to: 6 + 7 + 1) Avoid racily writing to different paths that represent the same file on disk. 8 + 2) Report the colliding entries on clone. 9 + 10 + The tests in this file exercise parallel checkout's collision detection code in 11 + both these mechanics. 12 + " 13 + 14 + . ./test-lib.sh 15 + . "$TEST_DIRECTORY/lib-parallel-checkout.sh" 16 + 17 + TEST_ROOT="$PWD" 18 + 19 + test_expect_success CASE_INSENSITIVE_FS 'setup' ' 20 + empty_oid=$(git hash-object -w --stdin </dev/null) && 21 + cat >objs <<-EOF && 22 + 100644 $empty_oid FILE_X 23 + 100644 $empty_oid FILE_x 24 + 100644 $empty_oid file_X 25 + 100644 $empty_oid file_x 26 + EOF 27 + git update-index --index-info <objs && 28 + git commit -m "colliding files" && 29 + git tag basename_collision && 30 + 31 + write_script "$TEST_ROOT"/logger_script <<-\EOF 32 + echo "$@" >>filter.log 33 + EOF 34 + ' 35 + 36 + test_workers_in_event_trace () 37 + { 38 + test $1 -eq $(grep ".event.:.child_start..*checkout--worker" $2 | wc -l) 39 + } 40 + 41 + test_expect_success CASE_INSENSITIVE_FS 'worker detects basename collision' ' 42 + GIT_TRACE2_EVENT="$(pwd)/trace" git \ 43 + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ 44 + checkout . && 45 + 46 + test_workers_in_event_trace 2 trace && 47 + collisions=$(grep -i "category.:.pcheckout.,.key.:.collision/basename.,.value.:.file_x.}" trace | wc -l) && 48 + test $collisions -eq 3 49 + ' 50 + 51 + test_expect_success CASE_INSENSITIVE_FS 'worker detects dirname collision' ' 52 + test_config filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && 53 + empty_oid=$(git hash-object -w --stdin </dev/null) && 54 + 55 + # By setting a filter command to "a", we make it ineligible for parallel 56 + # checkout, and thus it is checked out *first*. This way we can ensure 57 + # that "A/B" and "A/C" will both collide with the regular file "a". 58 + # 59 + attr_oid=$(echo "a filter=logger" | git hash-object -w --stdin) && 60 + 61 + cat >objs <<-EOF && 62 + 100644 $empty_oid A/B 63 + 100644 $empty_oid A/C 64 + 100644 $empty_oid a 65 + 100644 $attr_oid .gitattributes 66 + EOF 67 + git rm -rf . && 68 + git update-index --index-info <objs && 69 + 70 + rm -f trace filter.log && 71 + GIT_TRACE2_EVENT="$(pwd)/trace" git \ 72 + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ 73 + checkout . && 74 + 75 + # Check that "a" (and only "a") was filtered 76 + echo a >expected.log && 77 + test_cmp filter.log expected.log && 78 + 79 + # Check that it used the right number of workers and detected the collisions 80 + test_workers_in_event_trace 2 trace && 81 + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/B.}" trace && 82 + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/C.}" trace 83 + ' 84 + 85 + test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'do not follow symlinks colliding with leading dir' ' 86 + empty_oid=$(git hash-object -w --stdin </dev/null) && 87 + symlink_oid=$(echo "./e" | git hash-object -w --stdin) && 88 + mkdir e && 89 + 90 + cat >objs <<-EOF && 91 + 120000 $symlink_oid D 92 + 100644 $empty_oid d/x 93 + 100644 $empty_oid e/y 94 + EOF 95 + git rm -rf . && 96 + git update-index --index-info <objs && 97 + 98 + set_checkout_config 2 0 && 99 + test_checkout_workers 2 git checkout . && 100 + test_path_is_dir e && 101 + test_path_is_missing e/x 102 + ' 103 + 104 + # The two following tests check that parallel checkout correctly reports 105 + # colliding entries on clone. The sequential code detects a collision by 106 + # calling lstat() before trying to open(O_CREAT) a file. (Note that this only 107 + # works for clone.) Then, to find the pair of a colliding item k, it searches 108 + # cache_entry[0, k-1]. This is not sufficient in parallel checkout because: 109 + # 110 + # - A colliding file may be created between the lstat() and open() calls; 111 + # - A colliding entry might appear in the second half of the cache_entry array. 112 + # 113 + test_expect_success CASE_INSENSITIVE_FS 'collision report on clone (w/ racy file creation)' ' 114 + git reset --hard basename_collision && 115 + set_checkout_config 2 0 && 116 + test_checkout_workers 2 git clone . clone-repo 2>stderr && 117 + 118 + grep FILE_X stderr && 119 + grep FILE_x stderr && 120 + grep file_X stderr && 121 + grep file_x stderr && 122 + grep "the following paths have collided" stderr 123 + ' 124 + 125 + # This test ensures that the collision report code is correctly looking for 126 + # colliding peers in the second half of the cache_entry array. This is done by 127 + # defining a smudge command for the *last* array entry, which makes it 128 + # non-eligible for parallel-checkout. Thus, it is checked out *first*, before 129 + # spawning the workers. 130 + # 131 + # Note: this test doesn't work on Windows because, on this system, the 132 + # collision report code uses strcmp() to find the colliding pairs when 133 + # core.ignoreCase is false. And we need this setting for this test so that only 134 + # 'file_x' matches the pattern of the filter attribute. But the test works on 135 + # OSX, where the colliding pairs are found using inode. 136 + # 137 + test_expect_success CASE_INSENSITIVE_FS,!MINGW,!CYGWIN \ 138 + 'collision report on clone (w/ colliding peer after the detected entry)' ' 139 + 140 + test_config_global filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && 141 + git reset --hard basename_collision && 142 + echo "file_x filter=logger" >.gitattributes && 143 + git add .gitattributes && 144 + git commit -m "filter for file_x" && 145 + 146 + rm -rf clone-repo && 147 + set_checkout_config 2 0 && 148 + test_checkout_workers 2 \ 149 + git -c core.ignoreCase=false clone . clone-repo 2>stderr && 150 + 151 + grep FILE_X stderr && 152 + grep FILE_x stderr && 153 + grep file_X stderr && 154 + grep file_x stderr && 155 + grep "the following paths have collided" stderr && 156 + 157 + # Check that only "file_x" was filtered 158 + echo file_x >expected.log && 159 + test_cmp clone-repo/filter.log expected.log 160 + ' 161 + 162 + test_done