Git fork
at reftables-rust 1423 lines 38 kB view raw
1/* 2 * "git fast-export" builtin command 3 * 4 * Copyright (C) 2007 Johannes E. Schindelin 5 */ 6 7#define USE_THE_REPOSITORY_VARIABLE 8#define DISABLE_SIGN_COMPARE_WARNINGS 9 10#include "builtin.h" 11#include "config.h" 12#include "environment.h" 13#include "gettext.h" 14#include "hex.h" 15#include "refs.h" 16#include "refspec.h" 17#include "object-file.h" 18#include "odb.h" 19#include "commit.h" 20#include "object.h" 21#include "tag.h" 22#include "diff.h" 23#include "diffcore.h" 24#include "log-tree.h" 25#include "revision.h" 26#include "decorate.h" 27#include "string-list.h" 28#include "utf8.h" 29#include "parse-options.h" 30#include "quote.h" 31#include "remote.h" 32#include "blob.h" 33#include "gpg-interface.h" 34 35static const char *const fast_export_usage[] = { 36 N_("git fast-export [<rev-list-opts>]"), 37 NULL 38}; 39 40static int progress; 41static enum sign_mode signed_tag_mode = SIGN_ABORT; 42static enum sign_mode signed_commit_mode = SIGN_STRIP; 43static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; 44static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT; 45static int fake_missing_tagger; 46static int use_done_feature; 47static int no_data; 48static int full_tree; 49static int reference_excluded_commits; 50static int show_original_ids; 51static int mark_tags; 52static struct string_list extra_refs = STRING_LIST_INIT_DUP; 53static struct string_list tag_refs = STRING_LIST_INIT_DUP; 54static struct refspec refspecs = REFSPEC_INIT_FETCH; 55static int anonymize; 56static struct hashmap anonymized_seeds; 57static struct revision_sources revision_sources; 58 59static int parse_opt_sign_mode(const struct option *opt, 60 const char *arg, int unset) 61{ 62 enum sign_mode *val = opt->value; 63 64 if (unset) 65 return 0; 66 67 if (parse_sign_mode(arg, val)) 68 return error("Unknown %s mode: %s", opt->long_name, arg); 69 70 return 0; 71} 72 73static int parse_opt_tag_of_filtered_mode(const struct option *opt, 74 const char *arg, int unset) 75{ 76 enum tag_of_filtered_mode *val = opt->value; 77 78 if (unset || !strcmp(arg, "abort")) 79 *val = TAG_FILTERING_ABORT; 80 else if (!strcmp(arg, "drop")) 81 *val = DROP; 82 else if (!strcmp(arg, "rewrite")) 83 *val = REWRITE; 84 else 85 return error("Unknown tag-of-filtered mode: %s", arg); 86 return 0; 87} 88 89static int parse_opt_reencode_mode(const struct option *opt, 90 const char *arg, int unset) 91{ 92 enum reencode_mode *val = opt->value; 93 94 if (unset) { 95 *val = REENCODE_ABORT; 96 return 0; 97 } 98 99 switch (git_parse_maybe_bool(arg)) { 100 case 0: 101 *val = REENCODE_NO; 102 break; 103 case 1: 104 *val = REENCODE_YES; 105 break; 106 default: 107 if (!strcasecmp(arg, "abort")) 108 *val = REENCODE_ABORT; 109 else 110 return error("Unknown reencoding mode: %s", arg); 111 } 112 113 return 0; 114} 115 116static struct decoration idnums; 117static uint32_t last_idnum; 118struct anonymized_entry { 119 struct hashmap_entry hash; 120 char *anon; 121 const char orig[FLEX_ARRAY]; 122}; 123 124struct anonymized_entry_key { 125 struct hashmap_entry hash; 126 const char *orig; 127 size_t orig_len; 128}; 129 130static int anonymized_entry_cmp(const void *cmp_data UNUSED, 131 const struct hashmap_entry *eptr, 132 const struct hashmap_entry *entry_or_key, 133 const void *keydata) 134{ 135 const struct anonymized_entry *a, *b; 136 137 a = container_of(eptr, const struct anonymized_entry, hash); 138 if (keydata) { 139 const struct anonymized_entry_key *key = keydata; 140 int equal = !xstrncmpz(a->orig, key->orig, key->orig_len); 141 return !equal; 142 } 143 144 b = container_of(entry_or_key, const struct anonymized_entry, hash); 145 return strcmp(a->orig, b->orig); 146} 147 148static struct anonymized_entry *add_anonymized_entry(struct hashmap *map, 149 unsigned hash, 150 const char *orig, size_t len, 151 char *anon) 152{ 153 struct anonymized_entry *ret, *old; 154 155 if (!map->cmpfn) 156 hashmap_init(map, anonymized_entry_cmp, NULL, 0); 157 158 FLEX_ALLOC_MEM(ret, orig, orig, len); 159 hashmap_entry_init(&ret->hash, hash); 160 ret->anon = anon; 161 old = hashmap_put_entry(map, ret, hash); 162 163 if (old) { 164 free(old->anon); 165 free(old); 166 } 167 168 return ret; 169} 170 171/* 172 * Basically keep a cache of X->Y so that we can repeatedly replace 173 * the same anonymized string with another. The actual generation 174 * is farmed out to the generate function. 175 */ 176static const char *anonymize_str(struct hashmap *map, 177 char *(*generate)(void), 178 const char *orig, size_t len) 179{ 180 struct anonymized_entry_key key; 181 struct anonymized_entry *ret; 182 183 hashmap_entry_init(&key.hash, memhash(orig, len)); 184 key.orig = orig; 185 key.orig_len = len; 186 187 /* First check if it's a token the user configured manually... */ 188 ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key); 189 190 /* ...otherwise check if we've already seen it in this context... */ 191 if (!ret) 192 ret = hashmap_get_entry(map, &key, hash, &key); 193 194 /* ...and finally generate a new mapping if necessary */ 195 if (!ret) 196 ret = add_anonymized_entry(map, key.hash.hash, 197 orig, len, generate()); 198 199 return ret->anon; 200} 201 202/* 203 * We anonymize each component of a path individually, 204 * so that paths a/b and a/c will share a common root. 205 * The paths are cached via anonymize_mem so that repeated 206 * lookups for "a" will yield the same value. 207 */ 208static void anonymize_path(struct strbuf *out, const char *path, 209 struct hashmap *map, 210 char *(*generate)(void)) 211{ 212 while (*path) { 213 const char *end_of_component = strchrnul(path, '/'); 214 size_t len = end_of_component - path; 215 const char *c = anonymize_str(map, generate, path, len); 216 strbuf_addstr(out, c); 217 path = end_of_component; 218 if (*path) 219 strbuf_addch(out, *path++); 220 } 221} 222 223static inline void *mark_to_ptr(uint32_t mark) 224{ 225 return (void *)(uintptr_t)mark; 226} 227 228static inline uint32_t ptr_to_mark(void * mark) 229{ 230 return (uint32_t)(uintptr_t)mark; 231} 232 233static inline void mark_object(struct object *object, uint32_t mark) 234{ 235 add_decoration(&idnums, object, mark_to_ptr(mark)); 236} 237 238static inline void mark_next_object(struct object *object) 239{ 240 mark_object(object, ++last_idnum); 241} 242 243static int get_object_mark(struct object *object) 244{ 245 void *decoration = lookup_decoration(&idnums, object); 246 if (!decoration) 247 return 0; 248 return ptr_to_mark(decoration); 249} 250 251static struct commit *rewrite_commit(struct commit *p) 252{ 253 for (;;) { 254 if (p->parents && p->parents->next) 255 break; 256 if (p->object.flags & UNINTERESTING) 257 break; 258 if (!(p->object.flags & TREESAME)) 259 break; 260 if (!p->parents) 261 return NULL; 262 p = p->parents->item; 263 } 264 return p; 265} 266 267static void show_progress(void) 268{ 269 static int counter = 0; 270 if (!progress) 271 return; 272 if ((++counter % progress) == 0) 273 printf("progress %d objects\n", counter); 274} 275 276/* 277 * Ideally we would want some transformation of the blob data here 278 * that is unreversible, but would still be the same size and have 279 * the same data relationship to other blobs (so that we get the same 280 * delta and packing behavior as the original). But the first and last 281 * requirements there are probably mutually exclusive, so let's take 282 * the easy way out for now, and just generate arbitrary content. 283 * 284 * There's no need to cache this result with anonymize_mem, since 285 * we already handle blob content caching with marks. 286 */ 287static char *anonymize_blob(unsigned long *size) 288{ 289 static int counter; 290 struct strbuf out = STRBUF_INIT; 291 strbuf_addf(&out, "anonymous blob %d", counter++); 292 *size = out.len; 293 return strbuf_detach(&out, NULL); 294} 295 296static void export_blob(const struct object_id *oid) 297{ 298 unsigned long size; 299 enum object_type type; 300 char *buf; 301 struct object *object; 302 int eaten; 303 304 if (no_data) 305 return; 306 307 if (is_null_oid(oid)) 308 return; 309 310 object = lookup_object(the_repository, oid); 311 if (object && object->flags & SHOWN) 312 return; 313 314 if (anonymize) { 315 buf = anonymize_blob(&size); 316 object = (struct object *)lookup_blob(the_repository, oid); 317 eaten = 0; 318 } else { 319 buf = odb_read_object(the_repository->objects, oid, &type, &size); 320 if (!buf) 321 die("could not read blob %s", oid_to_hex(oid)); 322 if (check_object_signature(the_repository, oid, buf, size, 323 type) < 0) 324 die("oid mismatch in blob %s", oid_to_hex(oid)); 325 object = parse_object_buffer(the_repository, oid, type, 326 size, buf, &eaten); 327 } 328 329 if (!object) 330 die("Could not read blob %s", oid_to_hex(oid)); 331 332 mark_next_object(object); 333 334 printf("blob\nmark :%"PRIu32"\n", last_idnum); 335 if (show_original_ids) 336 printf("original-oid %s\n", oid_to_hex(oid)); 337 printf("data %"PRIuMAX"\n", (uintmax_t)size); 338 if (size && fwrite(buf, size, 1, stdout) != 1) 339 die_errno("could not write blob '%s'", oid_to_hex(oid)); 340 printf("\n"); 341 342 show_progress(); 343 344 object->flags |= SHOWN; 345 if (!eaten) 346 free(buf); 347} 348 349static int depth_first(const void *a_, const void *b_) 350{ 351 const struct diff_filepair *a = *((const struct diff_filepair **)a_); 352 const struct diff_filepair *b = *((const struct diff_filepair **)b_); 353 const char *name_a, *name_b; 354 int len_a, len_b, len; 355 int cmp; 356 357 name_a = a->one ? a->one->path : a->two->path; 358 name_b = b->one ? b->one->path : b->two->path; 359 360 len_a = strlen(name_a); 361 len_b = strlen(name_b); 362 len = (len_a < len_b) ? len_a : len_b; 363 364 /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */ 365 cmp = memcmp(name_a, name_b, len); 366 if (cmp) 367 return cmp; 368 cmp = len_b - len_a; 369 if (cmp) 370 return cmp; 371 /* 372 * Move 'R'ename entries last so that all references of the file 373 * appear in the output before it is renamed (e.g., when a file 374 * was copied and renamed in the same commit). 375 */ 376 return (a->status == 'R') - (b->status == 'R'); 377} 378 379static void print_path_1(const char *path) 380{ 381 int need_quote = quote_c_style(path, NULL, NULL, 0); 382 if (need_quote) 383 quote_c_style(path, NULL, stdout, 0); 384 else if (strchr(path, ' ')) 385 printf("\"%s\"", path); 386 else 387 printf("%s", path); 388} 389 390static char *anonymize_path_component(void) 391{ 392 static int counter; 393 struct strbuf out = STRBUF_INIT; 394 strbuf_addf(&out, "path%d", counter++); 395 return strbuf_detach(&out, NULL); 396} 397 398static void print_path(const char *path) 399{ 400 if (!anonymize) 401 print_path_1(path); 402 else { 403 static struct hashmap paths; 404 static struct strbuf anon = STRBUF_INIT; 405 406 anonymize_path(&anon, path, &paths, anonymize_path_component); 407 print_path_1(anon.buf); 408 strbuf_reset(&anon); 409 } 410} 411 412static char *generate_fake_oid(void) 413{ 414 static uint32_t counter = 1; /* avoid null oid */ 415 const unsigned hashsz = the_hash_algo->rawsz; 416 struct object_id oid; 417 char *hex = xmallocz(GIT_MAX_HEXSZ); 418 419 oidclr(&oid, the_repository->hash_algo); 420 put_be32(oid.hash + hashsz - 4, counter++); 421 return oid_to_hex_r(hex, &oid); 422} 423 424static const char *anonymize_oid(const char *oid_hex) 425{ 426 static struct hashmap objs; 427 size_t len = strlen(oid_hex); 428 return anonymize_str(&objs, generate_fake_oid, oid_hex, len); 429} 430 431static void show_filemodify(struct diff_queue_struct *q, 432 struct diff_options *options UNUSED, void *data) 433{ 434 int i; 435 struct string_list *changed = data; 436 437 /* 438 * Handle files below a directory first, in case they are all deleted 439 * and the directory changes to a file or symlink. 440 */ 441 QSORT(q->queue, q->nr, depth_first); 442 443 for (i = 0; i < q->nr; i++) { 444 struct diff_filespec *ospec = q->queue[i]->one; 445 struct diff_filespec *spec = q->queue[i]->two; 446 447 switch (q->queue[i]->status) { 448 case DIFF_STATUS_DELETED: 449 printf("D "); 450 print_path(spec->path); 451 string_list_insert(changed, spec->path); 452 putchar('\n'); 453 break; 454 455 case DIFF_STATUS_COPIED: 456 case DIFF_STATUS_RENAMED: 457 /* 458 * If a change in the file corresponding to ospec->path 459 * has been observed, we cannot trust its contents 460 * because the diff is calculated based on the prior 461 * contents, not the current contents. So, declare a 462 * copy or rename only if there was no change observed. 463 */ 464 if (!string_list_has_string(changed, ospec->path)) { 465 printf("%c ", q->queue[i]->status); 466 print_path(ospec->path); 467 putchar(' '); 468 print_path(spec->path); 469 string_list_insert(changed, spec->path); 470 putchar('\n'); 471 472 if (oideq(&ospec->oid, &spec->oid) && 473 ospec->mode == spec->mode) 474 break; 475 } 476 /* fallthrough */ 477 478 case DIFF_STATUS_TYPE_CHANGED: 479 case DIFF_STATUS_MODIFIED: 480 case DIFF_STATUS_ADDED: 481 /* 482 * Links refer to objects in another repositories; 483 * output the SHA-1 verbatim. 484 */ 485 if (no_data || S_ISGITLINK(spec->mode)) 486 printf("M %06o %s ", spec->mode, 487 anonymize ? 488 anonymize_oid(oid_to_hex(&spec->oid)) : 489 oid_to_hex(&spec->oid)); 490 else { 491 struct object *object = lookup_object(the_repository, 492 &spec->oid); 493 printf("M %06o :%d ", spec->mode, 494 get_object_mark(object)); 495 } 496 print_path(spec->path); 497 string_list_insert(changed, spec->path); 498 putchar('\n'); 499 break; 500 501 default: 502 die("Unexpected comparison status '%c' for %s, %s", 503 q->queue[i]->status, 504 ospec->path ? ospec->path : "none", 505 spec->path ? spec->path : "none"); 506 } 507 } 508} 509 510static char *anonymize_ref_component(void) 511{ 512 static int counter; 513 struct strbuf out = STRBUF_INIT; 514 strbuf_addf(&out, "ref%d", counter++); 515 return strbuf_detach(&out, NULL); 516} 517 518static const char *anonymize_refname(const char *refname) 519{ 520 /* 521 * If any of these prefixes is found, we will leave it intact 522 * so that tags remain tags and so forth. 523 */ 524 static const char *prefixes[] = { 525 "refs/heads/", 526 "refs/tags/", 527 "refs/remotes/", 528 "refs/" 529 }; 530 static struct hashmap refs; 531 static struct strbuf anon = STRBUF_INIT; 532 int i; 533 534 strbuf_reset(&anon); 535 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 536 if (skip_prefix(refname, prefixes[i], &refname)) { 537 strbuf_addstr(&anon, prefixes[i]); 538 break; 539 } 540 } 541 542 anonymize_path(&anon, refname, &refs, anonymize_ref_component); 543 return anon.buf; 544} 545 546/* 547 * We do not even bother to cache commit messages, as they are unlikely 548 * to be repeated verbatim, and it is not that interesting when they are. 549 */ 550static char *anonymize_commit_message(void) 551{ 552 static int counter; 553 return xstrfmt("subject %d\n\nbody\n", counter++); 554} 555 556static char *anonymize_ident(void) 557{ 558 static int counter; 559 struct strbuf out = STRBUF_INIT; 560 strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter); 561 counter++; 562 return strbuf_detach(&out, NULL); 563} 564 565/* 566 * Our strategy here is to anonymize the names and email addresses, 567 * but keep timestamps intact, as they influence things like traversal 568 * order (and by themselves should not be too revealing). 569 */ 570static void anonymize_ident_line(const char **beg, const char **end) 571{ 572 static struct hashmap idents; 573 static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT }; 574 static unsigned which_buffer; 575 576 struct strbuf *out; 577 struct ident_split split; 578 const char *end_of_header; 579 580 out = &buffers[which_buffer++]; 581 which_buffer %= ARRAY_SIZE(buffers); 582 strbuf_reset(out); 583 584 /* skip "committer", "author", "tagger", etc */ 585 end_of_header = strchr(*beg, ' '); 586 if (!end_of_header) 587 BUG("malformed line fed to anonymize_ident_line: %.*s", 588 (int)(*end - *beg), *beg); 589 end_of_header++; 590 strbuf_add(out, *beg, end_of_header - *beg); 591 592 if (!split_ident_line(&split, end_of_header, *end - end_of_header) && 593 split.date_begin) { 594 const char *ident; 595 size_t len; 596 597 len = split.mail_end - split.name_begin; 598 ident = anonymize_str(&idents, anonymize_ident, 599 split.name_begin, len); 600 strbuf_addstr(out, ident); 601 strbuf_addch(out, ' '); 602 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin); 603 } else { 604 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000"); 605 } 606 607 *beg = out->buf; 608 *end = out->buf + out->len; 609} 610 611/* 612 * find_commit_multiline_header is similar to find_commit_header, 613 * except that it handles multi-line headers, rather than simply 614 * returning the first line of the header. 615 * 616 * The returned string has had the ' ' line continuation markers 617 * removed, and points to allocated memory that must be free()d (not 618 * to memory within 'msg'). 619 * 620 * If the header is found, then *end is set to point at the '\n' in 621 * msg that immediately follows the header value. 622 */ 623static const char *find_commit_multiline_header(const char *msg, 624 const char *key, 625 const char **end) 626{ 627 struct strbuf val = STRBUF_INIT; 628 const char *bol, *eol; 629 size_t len; 630 631 bol = find_commit_header(msg, key, &len); 632 if (!bol) 633 return NULL; 634 eol = bol + len; 635 strbuf_add(&val, bol, len); 636 637 while (eol[0] == '\n' && eol[1] == ' ') { 638 bol = eol + 2; 639 eol = strchrnul(bol, '\n'); 640 strbuf_addch(&val, '\n'); 641 strbuf_add(&val, bol, eol - bol); 642 } 643 644 *end = eol; 645 return strbuf_detach(&val, NULL); 646} 647 648static void print_signature(const char *signature, const char *object_hash) 649{ 650 if (!signature) 651 return; 652 653 printf("gpgsig %s %s\ndata %u\n%s\n", 654 object_hash, 655 get_signature_format(signature), 656 (unsigned)strlen(signature), 657 signature); 658} 659 660static const char *append_signatures_for_header(struct string_list *signatures, 661 const char *pos, 662 const char *header, 663 const char *object_hash) 664{ 665 const char *signature; 666 const char *start = pos; 667 const char *end = pos; 668 669 while ((signature = find_commit_multiline_header(start + 1, 670 header, 671 &end))) { 672 string_list_append(signatures, signature)->util = (void *)object_hash; 673 free((char *)signature); 674 start = end; 675 } 676 677 return end; 678} 679 680static void handle_commit(struct commit *commit, struct rev_info *rev, 681 struct string_list *paths_of_changed_objects) 682{ 683 int saved_output_format = rev->diffopt.output_format; 684 const char *commit_buffer, *commit_buffer_cursor; 685 const char *author, *author_end, *committer, *committer_end; 686 const char *encoding = NULL; 687 size_t encoding_len; 688 struct string_list signatures = STRING_LIST_INIT_DUP; 689 const char *message; 690 char *reencoded = NULL; 691 struct commit_list *p; 692 const char *refname; 693 int i; 694 695 rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; 696 697 parse_commit_or_die(commit); 698 commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL); 699 700 author = strstr(commit_buffer_cursor, "\nauthor "); 701 if (!author) 702 die("could not find author in commit %s", 703 oid_to_hex(&commit->object.oid)); 704 author++; 705 commit_buffer_cursor = author_end = strchrnul(author, '\n'); 706 707 committer = strstr(commit_buffer_cursor, "\ncommitter "); 708 if (!committer) 709 die("could not find committer in commit %s", 710 oid_to_hex(&commit->object.oid)); 711 committer++; 712 commit_buffer_cursor = committer_end = strchrnul(committer, '\n'); 713 714 /* 715 * find_commit_header() and find_commit_multiline_header() get 716 * a `+ 1` because commit_buffer_cursor points at the trailing 717 * "\n" at the end of the previous line, but they want a 718 * pointer to the beginning of the next line. 719 */ 720 721 if (*commit_buffer_cursor == '\n') { 722 encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len); 723 if (encoding) 724 commit_buffer_cursor = encoding + encoding_len; 725 } 726 727 if (*commit_buffer_cursor == '\n') { 728 const char *after_sha1 = append_signatures_for_header(&signatures, commit_buffer_cursor, 729 "gpgsig", "sha1"); 730 const char *after_sha256 = append_signatures_for_header(&signatures, commit_buffer_cursor, 731 "gpgsig-sha256", "sha256"); 732 commit_buffer_cursor = (after_sha1 > after_sha256) ? after_sha1 : after_sha256; 733 } 734 735 message = strstr(commit_buffer_cursor, "\n\n"); 736 if (message) 737 message += 2; 738 739 if (commit->parents && 740 (get_object_mark(&commit->parents->item->object) != 0 || 741 reference_excluded_commits) && 742 !full_tree) { 743 parse_commit_or_die(commit->parents->item); 744 diff_tree_oid(get_commit_tree_oid(commit->parents->item), 745 get_commit_tree_oid(commit), "", &rev->diffopt); 746 } 747 else 748 diff_root_tree_oid(get_commit_tree_oid(commit), 749 "", &rev->diffopt); 750 751 /* Export the referenced blobs, and remember the marks. */ 752 for (i = 0; i < diff_queued_diff.nr; i++) 753 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode)) 754 export_blob(&diff_queued_diff.queue[i]->two->oid); 755 756 refname = *revision_sources_at(&revision_sources, commit); 757 /* 758 * FIXME: string_list_remove() below for each ref is overall 759 * O(N^2). Compared to a history walk and diffing trees, this is 760 * just lost in the noise in practice. However, theoretically a 761 * repo may have enough refs for this to become slow. 762 */ 763 string_list_remove(&extra_refs, refname, 0); 764 if (anonymize) { 765 refname = anonymize_refname(refname); 766 anonymize_ident_line(&committer, &committer_end); 767 anonymize_ident_line(&author, &author_end); 768 } 769 770 mark_next_object(&commit->object); 771 if (anonymize) { 772 reencoded = anonymize_commit_message(); 773 } else if (encoding) { 774 char *buf; 775 switch (reencode_mode) { 776 case REENCODE_YES: 777 buf = xstrfmt("%.*s", (int)encoding_len, encoding); 778 reencoded = reencode_string(message, "UTF-8", buf); 779 free(buf); 780 break; 781 case REENCODE_NO: 782 break; 783 case REENCODE_ABORT: 784 die("Encountered commit-specific encoding %.*s in commit " 785 "%s; use --reencode=[yes|no] to handle it", 786 (int)encoding_len, encoding, 787 oid_to_hex(&commit->object.oid)); 788 } 789 } 790 if (!commit->parents) 791 printf("reset %s\n", refname); 792 printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum); 793 if (show_original_ids) 794 printf("original-oid %s\n", oid_to_hex(&commit->object.oid)); 795 printf("%.*s\n%.*s\n", 796 (int)(author_end - author), author, 797 (int)(committer_end - committer), committer); 798 if (signatures.nr) { 799 switch (signed_commit_mode) { 800 case SIGN_ABORT: 801 die("encountered signed commit %s; use " 802 "--signed-commits=<mode> to handle it", 803 oid_to_hex(&commit->object.oid)); 804 case SIGN_WARN_VERBATIM: 805 warning("exporting %"PRIuMAX" signature(s) for commit %s", 806 (uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid)); 807 /* fallthru */ 808 case SIGN_VERBATIM: 809 for (size_t i = 0; i < signatures.nr; i++) { 810 struct string_list_item *item = &signatures.items[i]; 811 print_signature(item->string, item->util); 812 } 813 break; 814 case SIGN_WARN_STRIP: 815 warning("stripping signature(s) from commit %s", 816 oid_to_hex(&commit->object.oid)); 817 /* fallthru */ 818 case SIGN_STRIP: 819 break; 820 } 821 string_list_clear(&signatures, 0); 822 } 823 if (!reencoded && encoding) 824 printf("encoding %.*s\n", (int)encoding_len, encoding); 825 printf("data %u\n%s", 826 (unsigned)(reencoded 827 ? strlen(reencoded) : message 828 ? strlen(message) : 0), 829 reencoded ? reencoded : message ? message : ""); 830 free(reencoded); 831 repo_unuse_commit_buffer(the_repository, commit, commit_buffer); 832 833 for (i = 0, p = commit->parents; p; p = p->next) { 834 struct object *obj = &p->item->object; 835 int mark = get_object_mark(obj); 836 837 if (!mark && !reference_excluded_commits) 838 continue; 839 if (i == 0) 840 printf("from "); 841 else 842 printf("merge "); 843 if (mark) 844 printf(":%d\n", mark); 845 else 846 printf("%s\n", 847 anonymize ? 848 anonymize_oid(oid_to_hex(&obj->oid)) : 849 oid_to_hex(&obj->oid)); 850 i++; 851 } 852 853 if (full_tree) 854 printf("deleteall\n"); 855 log_tree_diff_flush(rev); 856 string_list_clear(paths_of_changed_objects, 0); 857 rev->diffopt.output_format = saved_output_format; 858 859 printf("\n"); 860 861 show_progress(); 862} 863 864static char *anonymize_tag(void) 865{ 866 static int counter; 867 struct strbuf out = STRBUF_INIT; 868 strbuf_addf(&out, "tag message %d", counter++); 869 return strbuf_detach(&out, NULL); 870} 871 872 873static void handle_tag(const char *name, struct tag *tag) 874{ 875 unsigned long size; 876 enum object_type type; 877 char *buf; 878 const char *tagger, *tagger_end, *message; 879 size_t message_size = 0; 880 struct object *tagged; 881 int tagged_mark; 882 struct commit *p; 883 884 /* Trees have no identifier in fast-export output, thus we have no way 885 * to output tags of trees, tags of tags of trees, etc. Simply omit 886 * such tags. 887 */ 888 tagged = tag->tagged; 889 while (tagged->type == OBJ_TAG) { 890 tagged = ((struct tag *)tagged)->tagged; 891 } 892 if (tagged->type == OBJ_TREE) { 893 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.", 894 oid_to_hex(&tag->object.oid)); 895 return; 896 } 897 898 buf = odb_read_object(the_repository->objects, &tag->object.oid, 899 &type, &size); 900 if (!buf) 901 die("could not read tag %s", oid_to_hex(&tag->object.oid)); 902 message = memmem(buf, size, "\n\n", 2); 903 if (message) { 904 message += 2; 905 message_size = strlen(message); 906 } 907 tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8); 908 if (!tagger) { 909 if (fake_missing_tagger) 910 tagger = "tagger Unspecified Tagger " 911 "<unspecified-tagger> 0 +0000"; 912 else 913 tagger = ""; 914 tagger_end = tagger + strlen(tagger); 915 } else { 916 tagger++; 917 tagger_end = strchrnul(tagger, '\n'); 918 if (anonymize) 919 anonymize_ident_line(&tagger, &tagger_end); 920 } 921 922 if (anonymize) { 923 name = anonymize_refname(name); 924 if (message) { 925 static struct hashmap tags; 926 message = anonymize_str(&tags, anonymize_tag, 927 message, message_size); 928 message_size = strlen(message); 929 } 930 } 931 932 /* handle signed tags */ 933 if (message) { 934 size_t sig_offset = parse_signed_buffer(message, message_size); 935 if (sig_offset < message_size) 936 switch (signed_tag_mode) { 937 case SIGN_ABORT: 938 die("encountered signed tag %s; use " 939 "--signed-tags=<mode> to handle it", 940 oid_to_hex(&tag->object.oid)); 941 case SIGN_WARN_VERBATIM: 942 warning("exporting signed tag %s", 943 oid_to_hex(&tag->object.oid)); 944 /* fallthru */ 945 case SIGN_VERBATIM: 946 break; 947 case SIGN_WARN_STRIP: 948 warning("stripping signature from tag %s", 949 oid_to_hex(&tag->object.oid)); 950 /* fallthru */ 951 case SIGN_STRIP: 952 message_size = sig_offset; 953 break; 954 } 955 } 956 957 /* handle tag->tagged having been filtered out due to paths specified */ 958 tagged = tag->tagged; 959 tagged_mark = get_object_mark(tagged); 960 if (!tagged_mark) { 961 switch (tag_of_filtered_mode) { 962 case TAG_FILTERING_ABORT: 963 die("tag %s tags unexported object; use " 964 "--tag-of-filtered-object=<mode> to handle it", 965 oid_to_hex(&tag->object.oid)); 966 case DROP: 967 /* Ignore this tag altogether */ 968 free(buf); 969 return; 970 case REWRITE: 971 if (tagged->type == OBJ_TAG && !mark_tags) { 972 die(_("Error: Cannot export nested tags unless --mark-tags is specified.")); 973 } else if (tagged->type == OBJ_COMMIT) { 974 p = rewrite_commit((struct commit *)tagged); 975 if (!p) { 976 printf("reset %s\nfrom %s\n\n", 977 name, oid_to_hex(null_oid(the_hash_algo))); 978 free(buf); 979 return; 980 } 981 tagged_mark = get_object_mark(&p->object); 982 } else { 983 /* tagged->type is either OBJ_BLOB or OBJ_TAG */ 984 tagged_mark = get_object_mark(tagged); 985 } 986 } 987 } 988 989 if (tagged->type == OBJ_TAG) { 990 printf("reset %s\nfrom %s\n\n", 991 name, oid_to_hex(null_oid(the_hash_algo))); 992 } 993 skip_prefix(name, "refs/tags/", &name); 994 printf("tag %s\n", name); 995 if (mark_tags) { 996 mark_next_object(&tag->object); 997 printf("mark :%"PRIu32"\n", last_idnum); 998 } 999 if (tagged_mark) 1000 printf("from :%d\n", tagged_mark); 1001 else 1002 printf("from %s\n", oid_to_hex(&tagged->oid)); 1003 1004 if (show_original_ids) 1005 printf("original-oid %s\n", oid_to_hex(&tag->object.oid)); 1006 printf("%.*s%sdata %d\n%.*s\n", 1007 (int)(tagger_end - tagger), tagger, 1008 tagger == tagger_end ? "" : "\n", 1009 (int)message_size, (int)message_size, message ? message : ""); 1010 free(buf); 1011} 1012 1013static struct commit *get_commit(struct rev_cmdline_entry *e, const char *full_name) 1014{ 1015 switch (e->item->type) { 1016 case OBJ_COMMIT: 1017 return (struct commit *)e->item; 1018 case OBJ_TAG: { 1019 struct tag *tag = (struct tag *)e->item; 1020 1021 /* handle nested tags */ 1022 while (tag && tag->object.type == OBJ_TAG) { 1023 parse_object(the_repository, &tag->object.oid); 1024 string_list_append(&tag_refs, full_name)->util = tag; 1025 tag = (struct tag *)tag->tagged; 1026 } 1027 if (!tag) 1028 die("Tag %s points nowhere?", e->name); 1029 return (struct commit *)tag; 1030 } 1031 default: 1032 return NULL; 1033 } 1034} 1035 1036static void get_tags_and_duplicates(struct rev_cmdline_info *info) 1037{ 1038 int i; 1039 1040 for (i = 0; i < info->nr; i++) { 1041 struct rev_cmdline_entry *e = info->rev + i; 1042 struct object_id oid; 1043 struct commit *commit; 1044 char *full_name = NULL; 1045 1046 if (e->flags & UNINTERESTING) 1047 continue; 1048 1049 if (repo_dwim_ref(the_repository, e->name, strlen(e->name), 1050 &oid, &full_name, 0) != 1) { 1051 free(full_name); 1052 continue; 1053 } 1054 1055 if (refspecs.nr) { 1056 char *private; 1057 private = apply_refspecs(&refspecs, full_name); 1058 if (private) { 1059 free(full_name); 1060 full_name = private; 1061 } 1062 } 1063 1064 commit = get_commit(e, full_name); 1065 if (!commit) { 1066 warning("%s: Unexpected object of type %s, skipping.", 1067 e->name, 1068 type_name(e->item->type)); 1069 free(full_name); 1070 continue; 1071 } 1072 1073 switch (commit->object.type) { 1074 case OBJ_COMMIT: 1075 break; 1076 case OBJ_BLOB: 1077 export_blob(&commit->object.oid); 1078 free(full_name); 1079 continue; 1080 default: /* OBJ_TAG (nested tags) is already handled */ 1081 warning("Tag points to object of unexpected type %s, skipping.", 1082 type_name(commit->object.type)); 1083 free(full_name); 1084 continue; 1085 } 1086 1087 /* 1088 * Make sure this ref gets properly updated eventually, whether 1089 * through a commit or manually at the end. 1090 */ 1091 if (e->item->type != OBJ_TAG) 1092 string_list_append(&extra_refs, full_name)->util = commit; 1093 1094 if (!*revision_sources_at(&revision_sources, commit)) 1095 *revision_sources_at(&revision_sources, commit) = full_name; 1096 else 1097 free(full_name); 1098 } 1099 1100 string_list_sort(&extra_refs); 1101 string_list_remove_duplicates(&extra_refs, 0); 1102} 1103 1104static void handle_tags_and_duplicates(struct string_list *extras) 1105{ 1106 struct commit *commit; 1107 int i; 1108 1109 for (i = extras->nr - 1; i >= 0; i--) { 1110 const char *name = extras->items[i].string; 1111 struct object *object = extras->items[i].util; 1112 int mark; 1113 1114 switch (object->type) { 1115 case OBJ_TAG: 1116 handle_tag(name, (struct tag *)object); 1117 break; 1118 case OBJ_COMMIT: 1119 if (anonymize) 1120 name = anonymize_refname(name); 1121 /* create refs pointing to already seen commits */ 1122 commit = rewrite_commit((struct commit *)object); 1123 if (!commit) { 1124 /* 1125 * Neither this object nor any of its 1126 * ancestors touch any relevant paths, so 1127 * it has been filtered to nothing. Delete 1128 * it. 1129 */ 1130 printf("reset %s\nfrom %s\n\n", 1131 name, oid_to_hex(null_oid(the_hash_algo))); 1132 continue; 1133 } 1134 1135 mark = get_object_mark(&commit->object); 1136 if (!mark) { 1137 /* 1138 * Getting here means we have a commit which 1139 * was excluded by a negative refspec (e.g. 1140 * fast-export ^HEAD HEAD). If we are 1141 * referencing excluded commits, set the ref 1142 * to the exact commit. Otherwise, the user 1143 * wants the branch exported but every commit 1144 * in its history to be deleted, which basically 1145 * just means deletion of the ref. 1146 */ 1147 if (!reference_excluded_commits) { 1148 /* delete the ref */ 1149 printf("reset %s\nfrom %s\n\n", 1150 name, oid_to_hex(null_oid(the_hash_algo))); 1151 continue; 1152 } 1153 /* set ref to commit using oid, not mark */ 1154 printf("reset %s\nfrom %s\n\n", name, 1155 oid_to_hex(&commit->object.oid)); 1156 continue; 1157 } 1158 1159 printf("reset %s\nfrom :%d\n\n", name, mark 1160 ); 1161 show_progress(); 1162 break; 1163 } 1164 } 1165} 1166 1167static void export_marks(char *file) 1168{ 1169 unsigned int i; 1170 uint32_t mark; 1171 struct decoration_entry *deco = idnums.entries; 1172 FILE *f; 1173 int e = 0; 1174 1175 f = fopen_for_writing(file); 1176 if (!f) 1177 die_errno("Unable to open marks file %s for writing.", file); 1178 1179 for (i = 0; i < idnums.size; i++) { 1180 if (deco->base && deco->base->type == 1) { 1181 mark = ptr_to_mark(deco->decoration); 1182 if (fprintf(f, ":%"PRIu32" %s\n", mark, 1183 oid_to_hex(&deco->base->oid)) < 0) { 1184 e = 1; 1185 break; 1186 } 1187 } 1188 deco++; 1189 } 1190 1191 e |= ferror(f); 1192 e |= fclose(f); 1193 if (e) 1194 error("Unable to write marks file %s.", file); 1195} 1196 1197static void import_marks(char *input_file, int check_exists) 1198{ 1199 char line[512]; 1200 FILE *f; 1201 struct stat sb; 1202 1203 if (check_exists && stat(input_file, &sb)) 1204 return; 1205 1206 f = xfopen(input_file, "r"); 1207 while (fgets(line, sizeof(line), f)) { 1208 uint32_t mark; 1209 char *line_end, *mark_end; 1210 struct object_id oid; 1211 struct object *object; 1212 struct commit *commit; 1213 enum object_type type; 1214 1215 line_end = strchr(line, '\n'); 1216 if (line[0] != ':' || !line_end) 1217 die("corrupt mark line: %s", line); 1218 *line_end = '\0'; 1219 1220 mark = strtoumax(line + 1, &mark_end, 10); 1221 if (!mark || mark_end == line + 1 1222 || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid)) 1223 die("corrupt mark line: %s", line); 1224 1225 if (last_idnum < mark) 1226 last_idnum = mark; 1227 1228 type = odb_read_object_info(the_repository->objects, &oid, NULL); 1229 if (type < 0) 1230 die("object not found: %s", oid_to_hex(&oid)); 1231 1232 if (type != OBJ_COMMIT) 1233 /* only commits */ 1234 continue; 1235 1236 commit = lookup_commit(the_repository, &oid); 1237 if (!commit) 1238 die("not a commit? can't happen: %s", oid_to_hex(&oid)); 1239 1240 object = &commit->object; 1241 1242 if (object->flags & SHOWN) 1243 error("Object %s already has a mark", oid_to_hex(&oid)); 1244 1245 mark_object(object, mark); 1246 1247 object->flags |= SHOWN; 1248 } 1249 fclose(f); 1250} 1251 1252static void handle_deletes(void) 1253{ 1254 int i; 1255 for (i = 0; i < refspecs.nr; i++) { 1256 struct refspec_item *refspec = &refspecs.items[i]; 1257 if (*refspec->src) 1258 continue; 1259 1260 printf("reset %s\nfrom %s\n\n", 1261 refspec->dst, oid_to_hex(null_oid(the_hash_algo))); 1262 } 1263} 1264 1265static int parse_opt_anonymize_map(const struct option *opt, 1266 const char *arg, int unset) 1267{ 1268 struct hashmap *map = opt->value; 1269 const char *delim, *value; 1270 size_t keylen; 1271 1272 BUG_ON_OPT_NEG(unset); 1273 1274 delim = strchr(arg, ':'); 1275 if (delim) { 1276 keylen = delim - arg; 1277 value = delim + 1; 1278 } else { 1279 keylen = strlen(arg); 1280 value = arg; 1281 } 1282 1283 if (!keylen || !*value) 1284 return error(_("--anonymize-map token cannot be empty")); 1285 1286 add_anonymized_entry(map, memhash(arg, keylen), arg, keylen, 1287 xstrdup(value)); 1288 1289 return 0; 1290} 1291 1292int cmd_fast_export(int argc, 1293 const char **argv, 1294 const char *prefix, 1295 struct repository *repo UNUSED) 1296{ 1297 struct rev_info revs; 1298 struct commit *commit; 1299 char *export_filename = NULL, 1300 *import_filename = NULL, 1301 *import_filename_if_exists = NULL; 1302 uint32_t lastimportid; 1303 struct string_list refspecs_list = STRING_LIST_INIT_NODUP; 1304 struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP; 1305 struct option options[] = { 1306 OPT_INTEGER(0, "progress", &progress, 1307 N_("show progress after <n> objects")), 1308 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"), 1309 N_("select handling of signed tags"), 1310 parse_opt_sign_mode), 1311 OPT_CALLBACK(0, "signed-commits", &signed_commit_mode, N_("mode"), 1312 N_("select handling of signed commits"), 1313 parse_opt_sign_mode), 1314 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"), 1315 N_("select handling of tags that tag filtered objects"), 1316 parse_opt_tag_of_filtered_mode), 1317 OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"), 1318 N_("select handling of commit messages in an alternate encoding"), 1319 parse_opt_reencode_mode), 1320 OPT_STRING(0, "export-marks", &export_filename, N_("file"), 1321 N_("dump marks to this file")), 1322 OPT_STRING(0, "import-marks", &import_filename, N_("file"), 1323 N_("import marks from this file")), 1324 OPT_STRING(0, "import-marks-if-exists", 1325 &import_filename_if_exists, 1326 N_("file"), 1327 N_("import marks from this file if it exists")), 1328 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger, 1329 N_("fake a tagger when tags lack one")), 1330 OPT_BOOL(0, "full-tree", &full_tree, 1331 N_("output full tree for each commit")), 1332 OPT_BOOL(0, "use-done-feature", &use_done_feature, 1333 N_("use the done feature to terminate the stream")), 1334 OPT_BOOL(0, "no-data", &no_data, N_("skip output of blob data")), 1335 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"), 1336 N_("apply refspec to exported refs")), 1337 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")), 1338 OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"), 1339 N_("convert <from> to <to> in anonymized output"), 1340 PARSE_OPT_NONEG, parse_opt_anonymize_map), 1341 OPT_BOOL(0, "reference-excluded-parents", 1342 &reference_excluded_commits, N_("reference parents which are not in fast-export stream by object id")), 1343 OPT_BOOL(0, "show-original-ids", &show_original_ids, 1344 N_("show original object ids of blobs/commits")), 1345 OPT_BOOL(0, "mark-tags", &mark_tags, 1346 N_("label tags with mark ids")), 1347 1348 OPT_END() 1349 }; 1350 1351 if (argc == 1) 1352 usage_with_options (fast_export_usage, options); 1353 1354 /* we handle encodings */ 1355 repo_config(the_repository, git_default_config, NULL); 1356 1357 repo_init_revisions(the_repository, &revs, prefix); 1358 init_revision_sources(&revision_sources); 1359 revs.topo_order = 1; 1360 revs.sources = &revision_sources; 1361 revs.rewrite_parents = 1; 1362 argc = parse_options(argc, argv, prefix, options, fast_export_usage, 1363 PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); 1364 argc = setup_revisions(argc, argv, &revs, NULL); 1365 if (argc > 1) 1366 usage_with_options (fast_export_usage, options); 1367 1368 if (anonymized_seeds.cmpfn && !anonymize) 1369 die(_("the option '%s' requires '%s'"), "--anonymize-map", "--anonymize"); 1370 1371 if (refspecs_list.nr) { 1372 int i; 1373 1374 for (i = 0; i < refspecs_list.nr; i++) 1375 refspec_append(&refspecs, refspecs_list.items[i].string); 1376 1377 string_list_clear(&refspecs_list, 1); 1378 } 1379 1380 if (use_done_feature) 1381 printf("feature done\n"); 1382 1383 if (import_filename && import_filename_if_exists) 1384 die(_("options '%s' and '%s' cannot be used together"), "--import-marks", "--import-marks-if-exists"); 1385 if (import_filename) 1386 import_marks(import_filename, 0); 1387 else if (import_filename_if_exists) 1388 import_marks(import_filename_if_exists, 1); 1389 lastimportid = last_idnum; 1390 1391 if (import_filename && revs.prune_data.nr) 1392 full_tree = 1; 1393 1394 get_tags_and_duplicates(&revs.cmdline); 1395 1396 if (prepare_revision_walk(&revs)) 1397 die("revision walk setup failed"); 1398 1399 revs.reverse = 1; 1400 revs.diffopt.format_callback = show_filemodify; 1401 revs.diffopt.format_callback_data = &paths_of_changed_objects; 1402 revs.diffopt.flags.recursive = 1; 1403 1404 revs.diffopt.no_free = 1; 1405 while ((commit = get_revision(&revs))) 1406 handle_commit(commit, &revs, &paths_of_changed_objects); 1407 revs.diffopt.no_free = 0; 1408 1409 handle_tags_and_duplicates(&extra_refs); 1410 handle_tags_and_duplicates(&tag_refs); 1411 handle_deletes(); 1412 1413 if (export_filename && lastimportid != last_idnum) 1414 export_marks(export_filename); 1415 1416 if (use_done_feature) 1417 printf("done\n"); 1418 1419 refspec_clear(&refspecs); 1420 release_revisions(&revs); 1421 1422 return 0; 1423}