Git fork

Merge branch 'jt/diff-pairs'

A post-processing filter for "diff --raw" output has been
introduced.

* jt/diff-pairs:
builtin/diff-pairs: allow explicit diff queue flush
builtin: introduce diff-pairs command
diff: add option to skip resolving diff statuses
diff: return diff_filepair from diff queue helpers

+449 -21
+1
.gitignore
··· 55 55 /git-diff 56 56 /git-diff-files 57 57 /git-diff-index 58 + /git-diff-pairs 58 59 /git-diff-tree 59 60 /git-difftool 60 61 /git-difftool--helper
+60
Documentation/git-diff-pairs.adoc
··· 1 + git-diff-pairs(1) 2 + ================= 3 + 4 + NAME 5 + ---- 6 + git-diff-pairs - Compare the content and mode of provided blob pairs 7 + 8 + SYNOPSIS 9 + -------- 10 + [synopsis] 11 + git diff-pairs -z [<diff-options>] 12 + 13 + DESCRIPTION 14 + ----------- 15 + Show changes for file pairs provided on stdin. Input for this command must be 16 + in the NUL-terminated raw output format as generated by commands such as `git 17 + diff-tree -z -r --raw`. By default, the outputted diffs are computed and shown 18 + in the patch format when stdin closes. 19 + 20 + A single NUL byte may be written to stdin between raw input lines to compute 21 + file pair diffs up to that point instead of waiting for stdin to close. A NUL 22 + byte is also written to the output to delimit between these batches of diffs. 23 + 24 + Usage of this command enables the traditional diff pipeline to be broken up 25 + into separate stages where `diff-pairs` acts as the output phase. Other 26 + commands, such as `diff-tree`, may serve as a frontend to compute the raw 27 + diff format used as input. 28 + 29 + Instead of computing diffs via `git diff-tree -p -M` in one step, `diff-tree` 30 + can compute the file pairs and rename information without the blob diffs. This 31 + output can be fed to `diff-pairs` to generate the underlying blob diffs as done 32 + in the following example: 33 + 34 + ----------------------------- 35 + git diff-tree -z -r -M $a $b | 36 + git diff-pairs -z 37 + ----------------------------- 38 + 39 + Computing the tree diff upfront with rename information allows patch output 40 + from `diff-pairs` to be progressively computed over the course of potentially 41 + multiple invocations. 42 + 43 + Pathspecs are not currently supported by `diff-pairs`. Pathspec limiting should 44 + be performed by the upstream command generating the raw diffs used as input. 45 + 46 + Tree objects are not currently supported as input and are rejected. 47 + 48 + Abbreviated object IDs in the `diff-pairs` input are not supported. Outputted 49 + object IDs can be abbreviated using the `--abbrev` option. 50 + 51 + OPTIONS 52 + ------- 53 + 54 + include::diff-options.adoc[] 55 + 56 + include::diff-generate-patch.adoc[] 57 + 58 + GIT 59 + --- 60 + Part of the linkgit:git[1] suite
+1
Documentation/meson.build
··· 42 42 'git-diagnose.adoc' : 1, 43 43 'git-diff-files.adoc' : 1, 44 44 'git-diff-index.adoc' : 1, 45 + 'git-diff-pairs.adoc' : 1, 45 46 'git-difftool.adoc' : 1, 46 47 'git-diff-tree.adoc' : 1, 47 48 'git-diff.adoc' : 1,
+1
Makefile
··· 1242 1242 BUILTIN_OBJS += builtin/diagnose.o 1243 1243 BUILTIN_OBJS += builtin/diff-files.o 1244 1244 BUILTIN_OBJS += builtin/diff-index.o 1245 + BUILTIN_OBJS += builtin/diff-pairs.o 1245 1246 BUILTIN_OBJS += builtin/diff-tree.o 1246 1247 BUILTIN_OBJS += builtin/diff.o 1247 1248 BUILTIN_OBJS += builtin/difftool.o
+1
builtin.h
··· 153 153 int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repository *repo); 154 154 int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo); 155 155 int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo); 156 + int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo); 156 157 int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo); 157 158 int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo); 158 159 int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo);
+207
builtin/diff-pairs.c
··· 1 + #include "builtin.h" 2 + #include "config.h" 3 + #include "diff.h" 4 + #include "diffcore.h" 5 + #include "gettext.h" 6 + #include "hash.h" 7 + #include "hex.h" 8 + #include "object.h" 9 + #include "parse-options.h" 10 + #include "revision.h" 11 + #include "strbuf.h" 12 + 13 + static unsigned parse_mode_or_die(const char *mode, const char **end) 14 + { 15 + uint16_t ret; 16 + 17 + *end = parse_mode(mode, &ret); 18 + if (!*end) 19 + die(_("unable to parse mode: %s"), mode); 20 + return ret; 21 + } 22 + 23 + static void parse_oid_or_die(const char *hex, struct object_id *oid, 24 + const char **end, const struct git_hash_algo *algop) 25 + { 26 + if (parse_oid_hex_algop(hex, oid, end, algop) || *(*end)++ != ' ') 27 + die(_("unable to parse object id: %s"), hex); 28 + } 29 + 30 + int cmd_diff_pairs(int argc, const char **argv, const char *prefix, 31 + struct repository *repo) 32 + { 33 + struct strbuf path_dst = STRBUF_INIT; 34 + struct strbuf path = STRBUF_INIT; 35 + struct strbuf meta = STRBUF_INIT; 36 + struct option *parseopts; 37 + struct rev_info revs; 38 + int line_term = '\0'; 39 + int ret; 40 + 41 + const char * const builtin_diff_pairs_usage[] = { 42 + N_("git diff-pairs -z [<diff-options>]"), 43 + NULL 44 + }; 45 + struct option builtin_diff_pairs_options[] = { 46 + OPT_END() 47 + }; 48 + 49 + repo_init_revisions(repo, &revs, prefix); 50 + 51 + /* 52 + * Diff options are usually parsed implicitly as part of 53 + * setup_revisions(). Explicitly handle parsing to ensure options are 54 + * printed in the usage message. 55 + */ 56 + parseopts = add_diff_options(builtin_diff_pairs_options, &revs.diffopt); 57 + show_usage_with_options_if_asked(argc, argv, builtin_diff_pairs_usage, parseopts); 58 + 59 + repo_config(repo, git_diff_basic_config, NULL); 60 + revs.diffopt.no_free = 1; 61 + revs.disable_stdin = 1; 62 + revs.abbrev = 0; 63 + revs.diff = 1; 64 + 65 + argc = parse_options(argc, argv, prefix, parseopts, builtin_diff_pairs_usage, 66 + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_DASHDASH); 67 + 68 + if (setup_revisions(argc, argv, &revs, NULL) > 1) 69 + usagef(_("unrecognized argument: %s"), argv[0]); 70 + 71 + /* 72 + * With the -z option, both command input and raw output are 73 + * NUL-delimited (this mode does not affect patch output). At present 74 + * only NUL-delimited raw diff formatted input is supported. 75 + */ 76 + if (revs.diffopt.line_termination) 77 + usage(_("working without -z is not supported")); 78 + 79 + if (revs.prune_data.nr) 80 + usage(_("pathspec arguments not supported")); 81 + 82 + if (revs.pending.nr || revs.max_count != -1 || 83 + revs.min_age != (timestamp_t)-1 || 84 + revs.max_age != (timestamp_t)-1) 85 + usage(_("revision arguments not allowed")); 86 + 87 + if (!revs.diffopt.output_format) 88 + revs.diffopt.output_format = DIFF_FORMAT_PATCH; 89 + 90 + /* 91 + * If rename detection is not requested, use rename information from the 92 + * raw diff formatted input. Setting skip_resolving_statuses ensures 93 + * diffcore_std() does not mess with rename information already present 94 + * in queued filepairs. 95 + */ 96 + if (!revs.diffopt.detect_rename) 97 + revs.diffopt.skip_resolving_statuses = 1; 98 + 99 + while (1) { 100 + struct object_id oid_a, oid_b; 101 + struct diff_filepair *pair; 102 + unsigned mode_a, mode_b; 103 + const char *p; 104 + char status; 105 + 106 + if (strbuf_getwholeline(&meta, stdin, line_term) == EOF) 107 + break; 108 + 109 + p = meta.buf; 110 + if (!*p) { 111 + diffcore_std(&revs.diffopt); 112 + diff_flush(&revs.diffopt); 113 + /* 114 + * When the diff queue is explicitly flushed, append a 115 + * NUL byte to separate batches of diffs. 116 + */ 117 + fputc('\0', revs.diffopt.file); 118 + fflush(revs.diffopt.file); 119 + continue; 120 + } 121 + 122 + if (*p != ':') 123 + die(_("invalid raw diff input")); 124 + p++; 125 + 126 + mode_a = parse_mode_or_die(p, &p); 127 + mode_b = parse_mode_or_die(p, &p); 128 + 129 + if (S_ISDIR(mode_a) || S_ISDIR(mode_b)) 130 + die(_("tree objects not supported")); 131 + 132 + parse_oid_or_die(p, &oid_a, &p, repo->hash_algo); 133 + parse_oid_or_die(p, &oid_b, &p, repo->hash_algo); 134 + 135 + status = *p++; 136 + 137 + if (strbuf_getwholeline(&path, stdin, line_term) == EOF) 138 + die(_("got EOF while reading path")); 139 + 140 + switch (status) { 141 + case DIFF_STATUS_ADDED: 142 + pair = diff_queue_addremove(&diff_queued_diff, 143 + &revs.diffopt, '+', mode_b, 144 + &oid_b, 1, path.buf, 0); 145 + if (pair) 146 + pair->status = status; 147 + break; 148 + 149 + case DIFF_STATUS_DELETED: 150 + pair = diff_queue_addremove(&diff_queued_diff, 151 + &revs.diffopt, '-', mode_a, 152 + &oid_a, 1, path.buf, 0); 153 + if (pair) 154 + pair->status = status; 155 + break; 156 + 157 + case DIFF_STATUS_TYPE_CHANGED: 158 + case DIFF_STATUS_MODIFIED: 159 + pair = diff_queue_change(&diff_queued_diff, &revs.diffopt, 160 + mode_a, mode_b, &oid_a, &oid_b, 161 + 1, 1, path.buf, 0, 0); 162 + if (pair) 163 + pair->status = status; 164 + break; 165 + 166 + case DIFF_STATUS_RENAMED: 167 + case DIFF_STATUS_COPIED: { 168 + struct diff_filespec *a, *b; 169 + unsigned int score; 170 + 171 + if (strbuf_getwholeline(&path_dst, stdin, line_term) == EOF) 172 + die(_("got EOF while reading destination path")); 173 + 174 + a = alloc_filespec(path.buf); 175 + b = alloc_filespec(path_dst.buf); 176 + fill_filespec(a, &oid_a, 1, mode_a); 177 + fill_filespec(b, &oid_b, 1, mode_b); 178 + 179 + pair = diff_queue(&diff_queued_diff, a, b); 180 + 181 + if (strtoul_ui(p, 10, &score)) 182 + die(_("unable to parse rename/copy score: %s"), p); 183 + 184 + pair->score = score * MAX_SCORE / 100; 185 + pair->status = status; 186 + pair->renamed_pair = 1; 187 + } 188 + break; 189 + 190 + default: 191 + die(_("unknown diff status: %c"), status); 192 + } 193 + } 194 + 195 + revs.diffopt.no_free = 0; 196 + diffcore_std(&revs.diffopt); 197 + diff_flush(&revs.diffopt); 198 + ret = diff_result_code(&revs); 199 + 200 + strbuf_release(&path_dst); 201 + strbuf_release(&path); 202 + strbuf_release(&meta); 203 + release_revisions(&revs); 204 + FREE_AND_NULL(parseopts); 205 + 206 + return ret; 207 + }
+1
command-list.txt
··· 96 96 git-diff mainporcelain info 97 97 git-diff-files plumbinginterrogators 98 98 git-diff-index plumbinginterrogators 99 + git-diff-pairs plumbinginterrogators 99 100 git-diff-tree plumbinginterrogators 100 101 git-difftool ancillaryinterrogators complete 101 102 git-fast-export ancillarymanipulators
+51 -21
diff.c
··· 7085 7085 diffcore_order(options->orderfile); 7086 7086 if (options->rotate_to) 7087 7087 diffcore_rotate(options); 7088 - if (!options->found_follow) 7088 + if (!options->found_follow && !options->skip_resolving_statuses) 7089 7089 /* See try_to_follow_renames() in tree-diff.c */ 7090 7090 diff_resolve_rename_copy(); 7091 7091 diffcore_apply_filter(options); ··· 7161 7161 options->found_changes = !!diffstat->nr; 7162 7162 } 7163 7163 7164 - void diff_addremove(struct diff_options *options, 7165 - int addremove, unsigned mode, 7166 - const struct object_id *oid, 7167 - int oid_valid, 7168 - const char *concatpath, unsigned dirty_submodule) 7164 + struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue, 7165 + struct diff_options *options, 7166 + int addremove, unsigned mode, 7167 + const struct object_id *oid, 7168 + int oid_valid, 7169 + const char *concatpath, 7170 + unsigned dirty_submodule) 7169 7171 { 7170 7172 struct diff_filespec *one, *two; 7173 + struct diff_filepair *pair; 7171 7174 7172 7175 if (S_ISGITLINK(mode) && is_submodule_ignored(concatpath, options)) 7173 - return; 7176 + return NULL; 7174 7177 7175 7178 /* This may look odd, but it is a preparation for 7176 7179 * feeding "there are unchanged files which should ··· 7190 7193 7191 7194 if (options->prefix && 7192 7195 strncmp(concatpath, options->prefix, options->prefix_length)) 7193 - return; 7196 + return NULL; 7194 7197 7195 7198 one = alloc_filespec(concatpath); 7196 7199 two = alloc_filespec(concatpath); ··· 7202 7205 two->dirty_submodule = dirty_submodule; 7203 7206 } 7204 7207 7205 - diff_queue(&diff_queued_diff, one, two); 7208 + pair = diff_queue(queue, one, two); 7206 7209 if (!options->flags.diff_from_contents) 7207 7210 options->flags.has_changes = 1; 7211 + 7212 + return pair; 7208 7213 } 7209 7214 7210 - void diff_change(struct diff_options *options, 7211 - unsigned old_mode, unsigned new_mode, 7212 - const struct object_id *old_oid, 7213 - const struct object_id *new_oid, 7214 - int old_oid_valid, int new_oid_valid, 7215 - const char *concatpath, 7216 - unsigned old_dirty_submodule, unsigned new_dirty_submodule) 7215 + struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue, 7216 + struct diff_options *options, 7217 + unsigned old_mode, unsigned new_mode, 7218 + const struct object_id *old_oid, 7219 + const struct object_id *new_oid, 7220 + int old_oid_valid, int new_oid_valid, 7221 + const char *concatpath, 7222 + unsigned old_dirty_submodule, 7223 + unsigned new_dirty_submodule) 7217 7224 { 7218 7225 struct diff_filespec *one, *two; 7219 7226 struct diff_filepair *p; 7220 7227 7221 7228 if (S_ISGITLINK(old_mode) && S_ISGITLINK(new_mode) && 7222 7229 is_submodule_ignored(concatpath, options)) 7223 - return; 7230 + return NULL; 7224 7231 7225 7232 if (options->flags.reverse_diff) { 7226 7233 SWAP(old_mode, new_mode); ··· 7231 7238 7232 7239 if (options->prefix && 7233 7240 strncmp(concatpath, options->prefix, options->prefix_length)) 7234 - return; 7241 + return NULL; 7235 7242 7236 7243 one = alloc_filespec(concatpath); 7237 7244 two = alloc_filespec(concatpath); ··· 7239 7246 fill_filespec(two, new_oid, new_oid_valid, new_mode); 7240 7247 one->dirty_submodule = old_dirty_submodule; 7241 7248 two->dirty_submodule = new_dirty_submodule; 7242 - p = diff_queue(&diff_queued_diff, one, two); 7249 + p = diff_queue(queue, one, two); 7243 7250 7244 7251 if (options->flags.diff_from_contents) 7245 - return; 7252 + return p; 7246 7253 7247 7254 if (options->flags.quick && options->skip_stat_unmatch && 7248 7255 !diff_filespec_check_stat_unmatch(options->repo, p)) { 7249 7256 diff_free_filespec_data(p->one); 7250 7257 diff_free_filespec_data(p->two); 7251 - return; 7258 + return p; 7252 7259 } 7253 7260 7254 7261 options->flags.has_changes = 1; 7262 + 7263 + return p; 7264 + } 7265 + 7266 + void diff_addremove(struct diff_options *options, int addremove, unsigned mode, 7267 + const struct object_id *oid, int oid_valid, 7268 + const char *concatpath, unsigned dirty_submodule) 7269 + { 7270 + diff_queue_addremove(&diff_queued_diff, options, addremove, mode, oid, 7271 + oid_valid, concatpath, dirty_submodule); 7272 + } 7273 + 7274 + void diff_change(struct diff_options *options, 7275 + unsigned old_mode, unsigned new_mode, 7276 + const struct object_id *old_oid, 7277 + const struct object_id *new_oid, 7278 + int old_oid_valid, int new_oid_valid, 7279 + const char *concatpath, 7280 + unsigned old_dirty_submodule, unsigned new_dirty_submodule) 7281 + { 7282 + diff_queue_change(&diff_queued_diff, options, old_mode, new_mode, 7283 + old_oid, new_oid, old_oid_valid, new_oid_valid, 7284 + concatpath, old_dirty_submodule, new_dirty_submodule); 7255 7285 } 7256 7286 7257 7287 struct diff_filepair *diff_unmerge(struct diff_options *options, const char *path)
+33
diff.h
··· 353 353 /* to support internal diff recursion by --follow hack*/ 354 354 int found_follow; 355 355 356 + /* 357 + * By default, diffcore_std() resolves the statuses for queued diff file 358 + * pairs by calling diff_resolve_rename_copy(). If status information 359 + * has already been manually set, this option prevents diffcore_std() 360 + * from resetting statuses. 361 + */ 362 + int skip_resolving_statuses; 363 + 356 364 /* Callback which allows tweaking the options in diff_setup_done(). */ 357 365 void (*set_default)(struct diff_options *); 358 366 ··· 507 515 void diff_set_default_prefix(struct diff_options *options); 508 516 509 517 int diff_can_quit_early(struct diff_options *); 518 + 519 + /* 520 + * Stages changes in the provided diff queue for file additions and deletions. 521 + * If a file pair gets queued, it is returned. 522 + */ 523 + struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue, 524 + struct diff_options *, 525 + int addremove, unsigned mode, 526 + const struct object_id *oid, 527 + int oid_valid, const char *fullpath, 528 + unsigned dirty_submodule); 529 + 530 + /* 531 + * Stages changes in the provided diff queue for file modifications. 532 + * If a file pair gets queued, it is returned. 533 + */ 534 + struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue, 535 + struct diff_options *, 536 + unsigned mode1, unsigned mode2, 537 + const struct object_id *old_oid, 538 + const struct object_id *new_oid, 539 + int old_oid_valid, int new_oid_valid, 540 + const char *fullpath, 541 + unsigned dirty_submodule1, 542 + unsigned dirty_submodule2); 510 543 511 544 void diff_addremove(struct diff_options *, 512 545 int addremove,
+1
git.c
··· 541 541 { "diff", cmd_diff, NO_PARSEOPT }, 542 542 { "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, 543 543 { "diff-index", cmd_diff_index, RUN_SETUP | NO_PARSEOPT }, 544 + { "diff-pairs", cmd_diff_pairs, RUN_SETUP | NO_PARSEOPT }, 544 545 { "diff-tree", cmd_diff_tree, RUN_SETUP | NO_PARSEOPT }, 545 546 { "difftool", cmd_difftool, RUN_SETUP_GENTLY }, 546 547 { "fast-export", cmd_fast_export, RUN_SETUP },
+1
meson.build
··· 540 540 'builtin/diagnose.c', 541 541 'builtin/diff-files.c', 542 542 'builtin/diff-index.c', 543 + 'builtin/diff-pairs.c', 543 544 'builtin/diff-tree.c', 544 545 'builtin/diff.c', 545 546 'builtin/difftool.c',
+1
t/meson.build
··· 500 500 't4067-diff-partial-clone.sh', 501 501 't4068-diff-symmetric-merge-base.sh', 502 502 't4069-remerge-diff.sh', 503 + 't4070-diff-pairs.sh', 503 504 't4100-apply-stat.sh', 504 505 't4101-apply-nonl.sh', 505 506 't4102-apply-rename.sh',
+90
t/t4070-diff-pairs.sh
··· 1 + #!/bin/sh 2 + 3 + test_description='basic diff-pairs tests' 4 + . ./test-lib.sh 5 + 6 + # This creates a diff with added, modified, deleted, renamed, copied, and 7 + # typechange entries. This includes a submodule to test submodule diff support. 8 + test_expect_success 'setup' ' 9 + test_config_global protocol.file.allow always && 10 + git init sub && 11 + test_commit -C sub initial && 12 + 13 + git init main && 14 + cd main && 15 + echo to-be-gone >deleted && 16 + echo original >modified && 17 + echo now-a-file >symlink && 18 + test_seq 200 >two-hundred && 19 + test_seq 201 500 >five-hundred && 20 + git add . && 21 + test_tick && 22 + git commit -m base && 23 + git tag base && 24 + 25 + git submodule add ../sub && 26 + echo now-here >added && 27 + echo new >modified && 28 + rm deleted && 29 + mkdir subdir && 30 + echo content >subdir/file && 31 + mv two-hundred renamed && 32 + test_seq 201 500 | sed s/300/modified/ >copied && 33 + rm symlink && 34 + git add -A . && 35 + test_ln_s_add dest symlink && 36 + test_tick && 37 + git commit -m new && 38 + git tag new 39 + ' 40 + 41 + test_expect_success 'diff-pairs recreates --raw' ' 42 + git diff-tree -r -M -C -C -z base new >expect && 43 + git diff-pairs --raw -z >actual <expect && 44 + test_cmp expect actual 45 + ' 46 + 47 + test_expect_success 'diff-pairs can create -p output' ' 48 + git diff-tree -p -M -C -C base new >expect && 49 + git diff-tree -r -M -C -C -z base new | 50 + git diff-pairs -p -z >actual && 51 + test_cmp expect actual 52 + ' 53 + 54 + test_expect_success 'diff-pairs does not support normal raw diff input' ' 55 + git diff-tree -r base new | 56 + test_must_fail git diff-pairs >out 2>err && 57 + 58 + echo "usage: working without -z is not supported" >expect && 59 + test_must_be_empty out && 60 + test_cmp expect err 61 + ' 62 + 63 + test_expect_success 'diff-pairs does not support tree objects as input' ' 64 + git diff-tree -z base new | 65 + test_must_fail git diff-pairs -z >out 2>err && 66 + 67 + echo "fatal: tree objects not supported" >expect && 68 + test_must_be_empty out && 69 + test_cmp expect err 70 + ' 71 + 72 + test_expect_success 'diff-pairs does not support pathspec arguments' ' 73 + git diff-tree -r -z base new | 74 + test_must_fail git diff-pairs -z -- new >out 2>err && 75 + 76 + echo "usage: pathspec arguments not supported" >expect && 77 + test_must_be_empty out && 78 + test_cmp expect err 79 + ' 80 + 81 + test_expect_success 'diff-pairs explicit queue flush' ' 82 + git diff-tree -r -M -C -C -z base new >expect && 83 + printf "\0" >>expect && 84 + git diff-tree -r -M -C -C -z base new >>expect && 85 + 86 + git diff-pairs --raw -z <expect >actual && 87 + test_cmp expect actual 88 + ' 89 + 90 + test_done