Git fork
1/*
2 * "git fast-export" builtin command
3 *
4 * Copyright (C) 2007 Johannes E. Schindelin
5 */
6
7#define USE_THE_REPOSITORY_VARIABLE
8#define DISABLE_SIGN_COMPARE_WARNINGS
9
10#include "builtin.h"
11#include "config.h"
12#include "environment.h"
13#include "gettext.h"
14#include "hex.h"
15#include "refs.h"
16#include "refspec.h"
17#include "object-file.h"
18#include "odb.h"
19#include "commit.h"
20#include "object.h"
21#include "tag.h"
22#include "diff.h"
23#include "diffcore.h"
24#include "log-tree.h"
25#include "revision.h"
26#include "decorate.h"
27#include "string-list.h"
28#include "utf8.h"
29#include "parse-options.h"
30#include "quote.h"
31#include "remote.h"
32#include "blob.h"
33#include "gpg-interface.h"
34
35static const char *const fast_export_usage[] = {
36 N_("git fast-export [<rev-list-opts>]"),
37 NULL
38};
39
40static int progress;
41static enum sign_mode signed_tag_mode = SIGN_ABORT;
42static enum sign_mode signed_commit_mode = SIGN_STRIP;
43static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
44static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
45static int fake_missing_tagger;
46static int use_done_feature;
47static int no_data;
48static int full_tree;
49static int reference_excluded_commits;
50static int show_original_ids;
51static int mark_tags;
52static struct string_list extra_refs = STRING_LIST_INIT_DUP;
53static struct string_list tag_refs = STRING_LIST_INIT_DUP;
54static struct refspec refspecs = REFSPEC_INIT_FETCH;
55static int anonymize;
56static struct hashmap anonymized_seeds;
57static struct revision_sources revision_sources;
58
59static int parse_opt_sign_mode(const struct option *opt,
60 const char *arg, int unset)
61{
62 enum sign_mode *val = opt->value;
63
64 if (unset)
65 return 0;
66
67 if (parse_sign_mode(arg, val))
68 return error("Unknown %s mode: %s", opt->long_name, arg);
69
70 return 0;
71}
72
73static int parse_opt_tag_of_filtered_mode(const struct option *opt,
74 const char *arg, int unset)
75{
76 enum tag_of_filtered_mode *val = opt->value;
77
78 if (unset || !strcmp(arg, "abort"))
79 *val = TAG_FILTERING_ABORT;
80 else if (!strcmp(arg, "drop"))
81 *val = DROP;
82 else if (!strcmp(arg, "rewrite"))
83 *val = REWRITE;
84 else
85 return error("Unknown tag-of-filtered mode: %s", arg);
86 return 0;
87}
88
89static int parse_opt_reencode_mode(const struct option *opt,
90 const char *arg, int unset)
91{
92 enum reencode_mode *val = opt->value;
93
94 if (unset) {
95 *val = REENCODE_ABORT;
96 return 0;
97 }
98
99 switch (git_parse_maybe_bool(arg)) {
100 case 0:
101 *val = REENCODE_NO;
102 break;
103 case 1:
104 *val = REENCODE_YES;
105 break;
106 default:
107 if (!strcasecmp(arg, "abort"))
108 *val = REENCODE_ABORT;
109 else
110 return error("Unknown reencoding mode: %s", arg);
111 }
112
113 return 0;
114}
115
116static struct decoration idnums;
117static uint32_t last_idnum;
118struct anonymized_entry {
119 struct hashmap_entry hash;
120 char *anon;
121 const char orig[FLEX_ARRAY];
122};
123
124struct anonymized_entry_key {
125 struct hashmap_entry hash;
126 const char *orig;
127 size_t orig_len;
128};
129
130static int anonymized_entry_cmp(const void *cmp_data UNUSED,
131 const struct hashmap_entry *eptr,
132 const struct hashmap_entry *entry_or_key,
133 const void *keydata)
134{
135 const struct anonymized_entry *a, *b;
136
137 a = container_of(eptr, const struct anonymized_entry, hash);
138 if (keydata) {
139 const struct anonymized_entry_key *key = keydata;
140 int equal = !xstrncmpz(a->orig, key->orig, key->orig_len);
141 return !equal;
142 }
143
144 b = container_of(entry_or_key, const struct anonymized_entry, hash);
145 return strcmp(a->orig, b->orig);
146}
147
148static struct anonymized_entry *add_anonymized_entry(struct hashmap *map,
149 unsigned hash,
150 const char *orig, size_t len,
151 char *anon)
152{
153 struct anonymized_entry *ret, *old;
154
155 if (!map->cmpfn)
156 hashmap_init(map, anonymized_entry_cmp, NULL, 0);
157
158 FLEX_ALLOC_MEM(ret, orig, orig, len);
159 hashmap_entry_init(&ret->hash, hash);
160 ret->anon = anon;
161 old = hashmap_put_entry(map, ret, hash);
162
163 if (old) {
164 free(old->anon);
165 free(old);
166 }
167
168 return ret;
169}
170
171/*
172 * Basically keep a cache of X->Y so that we can repeatedly replace
173 * the same anonymized string with another. The actual generation
174 * is farmed out to the generate function.
175 */
176static const char *anonymize_str(struct hashmap *map,
177 char *(*generate)(void),
178 const char *orig, size_t len)
179{
180 struct anonymized_entry_key key;
181 struct anonymized_entry *ret;
182
183 hashmap_entry_init(&key.hash, memhash(orig, len));
184 key.orig = orig;
185 key.orig_len = len;
186
187 /* First check if it's a token the user configured manually... */
188 ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
189
190 /* ...otherwise check if we've already seen it in this context... */
191 if (!ret)
192 ret = hashmap_get_entry(map, &key, hash, &key);
193
194 /* ...and finally generate a new mapping if necessary */
195 if (!ret)
196 ret = add_anonymized_entry(map, key.hash.hash,
197 orig, len, generate());
198
199 return ret->anon;
200}
201
202/*
203 * We anonymize each component of a path individually,
204 * so that paths a/b and a/c will share a common root.
205 * The paths are cached via anonymize_mem so that repeated
206 * lookups for "a" will yield the same value.
207 */
208static void anonymize_path(struct strbuf *out, const char *path,
209 struct hashmap *map,
210 char *(*generate)(void))
211{
212 while (*path) {
213 const char *end_of_component = strchrnul(path, '/');
214 size_t len = end_of_component - path;
215 const char *c = anonymize_str(map, generate, path, len);
216 strbuf_addstr(out, c);
217 path = end_of_component;
218 if (*path)
219 strbuf_addch(out, *path++);
220 }
221}
222
223static inline void *mark_to_ptr(uint32_t mark)
224{
225 return (void *)(uintptr_t)mark;
226}
227
228static inline uint32_t ptr_to_mark(void * mark)
229{
230 return (uint32_t)(uintptr_t)mark;
231}
232
233static inline void mark_object(struct object *object, uint32_t mark)
234{
235 add_decoration(&idnums, object, mark_to_ptr(mark));
236}
237
238static inline void mark_next_object(struct object *object)
239{
240 mark_object(object, ++last_idnum);
241}
242
243static int get_object_mark(struct object *object)
244{
245 void *decoration = lookup_decoration(&idnums, object);
246 if (!decoration)
247 return 0;
248 return ptr_to_mark(decoration);
249}
250
251static struct commit *rewrite_commit(struct commit *p)
252{
253 for (;;) {
254 if (p->parents && p->parents->next)
255 break;
256 if (p->object.flags & UNINTERESTING)
257 break;
258 if (!(p->object.flags & TREESAME))
259 break;
260 if (!p->parents)
261 return NULL;
262 p = p->parents->item;
263 }
264 return p;
265}
266
267static void show_progress(void)
268{
269 static int counter = 0;
270 if (!progress)
271 return;
272 if ((++counter % progress) == 0)
273 printf("progress %d objects\n", counter);
274}
275
276/*
277 * Ideally we would want some transformation of the blob data here
278 * that is unreversible, but would still be the same size and have
279 * the same data relationship to other blobs (so that we get the same
280 * delta and packing behavior as the original). But the first and last
281 * requirements there are probably mutually exclusive, so let's take
282 * the easy way out for now, and just generate arbitrary content.
283 *
284 * There's no need to cache this result with anonymize_mem, since
285 * we already handle blob content caching with marks.
286 */
287static char *anonymize_blob(unsigned long *size)
288{
289 static int counter;
290 struct strbuf out = STRBUF_INIT;
291 strbuf_addf(&out, "anonymous blob %d", counter++);
292 *size = out.len;
293 return strbuf_detach(&out, NULL);
294}
295
296static void export_blob(const struct object_id *oid)
297{
298 unsigned long size;
299 enum object_type type;
300 char *buf;
301 struct object *object;
302 int eaten;
303
304 if (no_data)
305 return;
306
307 if (is_null_oid(oid))
308 return;
309
310 object = lookup_object(the_repository, oid);
311 if (object && object->flags & SHOWN)
312 return;
313
314 if (anonymize) {
315 buf = anonymize_blob(&size);
316 object = (struct object *)lookup_blob(the_repository, oid);
317 eaten = 0;
318 } else {
319 buf = odb_read_object(the_repository->objects, oid, &type, &size);
320 if (!buf)
321 die("could not read blob %s", oid_to_hex(oid));
322 if (check_object_signature(the_repository, oid, buf, size,
323 type) < 0)
324 die("oid mismatch in blob %s", oid_to_hex(oid));
325 object = parse_object_buffer(the_repository, oid, type,
326 size, buf, &eaten);
327 }
328
329 if (!object)
330 die("Could not read blob %s", oid_to_hex(oid));
331
332 mark_next_object(object);
333
334 printf("blob\nmark :%"PRIu32"\n", last_idnum);
335 if (show_original_ids)
336 printf("original-oid %s\n", oid_to_hex(oid));
337 printf("data %"PRIuMAX"\n", (uintmax_t)size);
338 if (size && fwrite(buf, size, 1, stdout) != 1)
339 die_errno("could not write blob '%s'", oid_to_hex(oid));
340 printf("\n");
341
342 show_progress();
343
344 object->flags |= SHOWN;
345 if (!eaten)
346 free(buf);
347}
348
349static int depth_first(const void *a_, const void *b_)
350{
351 const struct diff_filepair *a = *((const struct diff_filepair **)a_);
352 const struct diff_filepair *b = *((const struct diff_filepair **)b_);
353 const char *name_a, *name_b;
354 int len_a, len_b, len;
355 int cmp;
356
357 name_a = a->one ? a->one->path : a->two->path;
358 name_b = b->one ? b->one->path : b->two->path;
359
360 len_a = strlen(name_a);
361 len_b = strlen(name_b);
362 len = (len_a < len_b) ? len_a : len_b;
363
364 /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
365 cmp = memcmp(name_a, name_b, len);
366 if (cmp)
367 return cmp;
368 cmp = len_b - len_a;
369 if (cmp)
370 return cmp;
371 /*
372 * Move 'R'ename entries last so that all references of the file
373 * appear in the output before it is renamed (e.g., when a file
374 * was copied and renamed in the same commit).
375 */
376 return (a->status == 'R') - (b->status == 'R');
377}
378
379static void print_path_1(const char *path)
380{
381 int need_quote = quote_c_style(path, NULL, NULL, 0);
382 if (need_quote)
383 quote_c_style(path, NULL, stdout, 0);
384 else if (strchr(path, ' '))
385 printf("\"%s\"", path);
386 else
387 printf("%s", path);
388}
389
390static char *anonymize_path_component(void)
391{
392 static int counter;
393 struct strbuf out = STRBUF_INIT;
394 strbuf_addf(&out, "path%d", counter++);
395 return strbuf_detach(&out, NULL);
396}
397
398static void print_path(const char *path)
399{
400 if (!anonymize)
401 print_path_1(path);
402 else {
403 static struct hashmap paths;
404 static struct strbuf anon = STRBUF_INIT;
405
406 anonymize_path(&anon, path, &paths, anonymize_path_component);
407 print_path_1(anon.buf);
408 strbuf_reset(&anon);
409 }
410}
411
412static char *generate_fake_oid(void)
413{
414 static uint32_t counter = 1; /* avoid null oid */
415 const unsigned hashsz = the_hash_algo->rawsz;
416 struct object_id oid;
417 char *hex = xmallocz(GIT_MAX_HEXSZ);
418
419 oidclr(&oid, the_repository->hash_algo);
420 put_be32(oid.hash + hashsz - 4, counter++);
421 return oid_to_hex_r(hex, &oid);
422}
423
424static const char *anonymize_oid(const char *oid_hex)
425{
426 static struct hashmap objs;
427 size_t len = strlen(oid_hex);
428 return anonymize_str(&objs, generate_fake_oid, oid_hex, len);
429}
430
431static void show_filemodify(struct diff_queue_struct *q,
432 struct diff_options *options UNUSED, void *data)
433{
434 int i;
435 struct string_list *changed = data;
436
437 /*
438 * Handle files below a directory first, in case they are all deleted
439 * and the directory changes to a file or symlink.
440 */
441 QSORT(q->queue, q->nr, depth_first);
442
443 for (i = 0; i < q->nr; i++) {
444 struct diff_filespec *ospec = q->queue[i]->one;
445 struct diff_filespec *spec = q->queue[i]->two;
446
447 switch (q->queue[i]->status) {
448 case DIFF_STATUS_DELETED:
449 printf("D ");
450 print_path(spec->path);
451 string_list_insert(changed, spec->path);
452 putchar('\n');
453 break;
454
455 case DIFF_STATUS_COPIED:
456 case DIFF_STATUS_RENAMED:
457 /*
458 * If a change in the file corresponding to ospec->path
459 * has been observed, we cannot trust its contents
460 * because the diff is calculated based on the prior
461 * contents, not the current contents. So, declare a
462 * copy or rename only if there was no change observed.
463 */
464 if (!string_list_has_string(changed, ospec->path)) {
465 printf("%c ", q->queue[i]->status);
466 print_path(ospec->path);
467 putchar(' ');
468 print_path(spec->path);
469 string_list_insert(changed, spec->path);
470 putchar('\n');
471
472 if (oideq(&ospec->oid, &spec->oid) &&
473 ospec->mode == spec->mode)
474 break;
475 }
476 /* fallthrough */
477
478 case DIFF_STATUS_TYPE_CHANGED:
479 case DIFF_STATUS_MODIFIED:
480 case DIFF_STATUS_ADDED:
481 /*
482 * Links refer to objects in another repositories;
483 * output the SHA-1 verbatim.
484 */
485 if (no_data || S_ISGITLINK(spec->mode))
486 printf("M %06o %s ", spec->mode,
487 anonymize ?
488 anonymize_oid(oid_to_hex(&spec->oid)) :
489 oid_to_hex(&spec->oid));
490 else {
491 struct object *object = lookup_object(the_repository,
492 &spec->oid);
493 printf("M %06o :%d ", spec->mode,
494 get_object_mark(object));
495 }
496 print_path(spec->path);
497 string_list_insert(changed, spec->path);
498 putchar('\n');
499 break;
500
501 default:
502 die("Unexpected comparison status '%c' for %s, %s",
503 q->queue[i]->status,
504 ospec->path ? ospec->path : "none",
505 spec->path ? spec->path : "none");
506 }
507 }
508}
509
510static char *anonymize_ref_component(void)
511{
512 static int counter;
513 struct strbuf out = STRBUF_INIT;
514 strbuf_addf(&out, "ref%d", counter++);
515 return strbuf_detach(&out, NULL);
516}
517
518static const char *anonymize_refname(const char *refname)
519{
520 /*
521 * If any of these prefixes is found, we will leave it intact
522 * so that tags remain tags and so forth.
523 */
524 static const char *prefixes[] = {
525 "refs/heads/",
526 "refs/tags/",
527 "refs/remotes/",
528 "refs/"
529 };
530 static struct hashmap refs;
531 static struct strbuf anon = STRBUF_INIT;
532 int i;
533
534 strbuf_reset(&anon);
535 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
536 if (skip_prefix(refname, prefixes[i], &refname)) {
537 strbuf_addstr(&anon, prefixes[i]);
538 break;
539 }
540 }
541
542 anonymize_path(&anon, refname, &refs, anonymize_ref_component);
543 return anon.buf;
544}
545
546/*
547 * We do not even bother to cache commit messages, as they are unlikely
548 * to be repeated verbatim, and it is not that interesting when they are.
549 */
550static char *anonymize_commit_message(void)
551{
552 static int counter;
553 return xstrfmt("subject %d\n\nbody\n", counter++);
554}
555
556static char *anonymize_ident(void)
557{
558 static int counter;
559 struct strbuf out = STRBUF_INIT;
560 strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
561 counter++;
562 return strbuf_detach(&out, NULL);
563}
564
565/*
566 * Our strategy here is to anonymize the names and email addresses,
567 * but keep timestamps intact, as they influence things like traversal
568 * order (and by themselves should not be too revealing).
569 */
570static void anonymize_ident_line(const char **beg, const char **end)
571{
572 static struct hashmap idents;
573 static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
574 static unsigned which_buffer;
575
576 struct strbuf *out;
577 struct ident_split split;
578 const char *end_of_header;
579
580 out = &buffers[which_buffer++];
581 which_buffer %= ARRAY_SIZE(buffers);
582 strbuf_reset(out);
583
584 /* skip "committer", "author", "tagger", etc */
585 end_of_header = strchr(*beg, ' ');
586 if (!end_of_header)
587 BUG("malformed line fed to anonymize_ident_line: %.*s",
588 (int)(*end - *beg), *beg);
589 end_of_header++;
590 strbuf_add(out, *beg, end_of_header - *beg);
591
592 if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
593 split.date_begin) {
594 const char *ident;
595 size_t len;
596
597 len = split.mail_end - split.name_begin;
598 ident = anonymize_str(&idents, anonymize_ident,
599 split.name_begin, len);
600 strbuf_addstr(out, ident);
601 strbuf_addch(out, ' ');
602 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
603 } else {
604 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
605 }
606
607 *beg = out->buf;
608 *end = out->buf + out->len;
609}
610
611/*
612 * find_commit_multiline_header is similar to find_commit_header,
613 * except that it handles multi-line headers, rather than simply
614 * returning the first line of the header.
615 *
616 * The returned string has had the ' ' line continuation markers
617 * removed, and points to allocated memory that must be free()d (not
618 * to memory within 'msg').
619 *
620 * If the header is found, then *end is set to point at the '\n' in
621 * msg that immediately follows the header value.
622 */
623static const char *find_commit_multiline_header(const char *msg,
624 const char *key,
625 const char **end)
626{
627 struct strbuf val = STRBUF_INIT;
628 const char *bol, *eol;
629 size_t len;
630
631 bol = find_commit_header(msg, key, &len);
632 if (!bol)
633 return NULL;
634 eol = bol + len;
635 strbuf_add(&val, bol, len);
636
637 while (eol[0] == '\n' && eol[1] == ' ') {
638 bol = eol + 2;
639 eol = strchrnul(bol, '\n');
640 strbuf_addch(&val, '\n');
641 strbuf_add(&val, bol, eol - bol);
642 }
643
644 *end = eol;
645 return strbuf_detach(&val, NULL);
646}
647
648static void print_signature(const char *signature, const char *object_hash)
649{
650 if (!signature)
651 return;
652
653 printf("gpgsig %s %s\ndata %u\n%s\n",
654 object_hash,
655 get_signature_format(signature),
656 (unsigned)strlen(signature),
657 signature);
658}
659
660static const char *append_signatures_for_header(struct string_list *signatures,
661 const char *pos,
662 const char *header,
663 const char *object_hash)
664{
665 const char *signature;
666 const char *start = pos;
667 const char *end = pos;
668
669 while ((signature = find_commit_multiline_header(start + 1,
670 header,
671 &end))) {
672 string_list_append(signatures, signature)->util = (void *)object_hash;
673 free((char *)signature);
674 start = end;
675 }
676
677 return end;
678}
679
680static void handle_commit(struct commit *commit, struct rev_info *rev,
681 struct string_list *paths_of_changed_objects)
682{
683 int saved_output_format = rev->diffopt.output_format;
684 const char *commit_buffer, *commit_buffer_cursor;
685 const char *author, *author_end, *committer, *committer_end;
686 const char *encoding = NULL;
687 size_t encoding_len;
688 struct string_list signatures = STRING_LIST_INIT_DUP;
689 const char *message;
690 char *reencoded = NULL;
691 struct commit_list *p;
692 const char *refname;
693 int i;
694
695 rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
696
697 parse_commit_or_die(commit);
698 commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
699
700 author = strstr(commit_buffer_cursor, "\nauthor ");
701 if (!author)
702 die("could not find author in commit %s",
703 oid_to_hex(&commit->object.oid));
704 author++;
705 commit_buffer_cursor = author_end = strchrnul(author, '\n');
706
707 committer = strstr(commit_buffer_cursor, "\ncommitter ");
708 if (!committer)
709 die("could not find committer in commit %s",
710 oid_to_hex(&commit->object.oid));
711 committer++;
712 commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
713
714 /*
715 * find_commit_header() and find_commit_multiline_header() get
716 * a `+ 1` because commit_buffer_cursor points at the trailing
717 * "\n" at the end of the previous line, but they want a
718 * pointer to the beginning of the next line.
719 */
720
721 if (*commit_buffer_cursor == '\n') {
722 encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
723 if (encoding)
724 commit_buffer_cursor = encoding + encoding_len;
725 }
726
727 if (*commit_buffer_cursor == '\n') {
728 const char *after_sha1 = append_signatures_for_header(&signatures, commit_buffer_cursor,
729 "gpgsig", "sha1");
730 const char *after_sha256 = append_signatures_for_header(&signatures, commit_buffer_cursor,
731 "gpgsig-sha256", "sha256");
732 commit_buffer_cursor = (after_sha1 > after_sha256) ? after_sha1 : after_sha256;
733 }
734
735 message = strstr(commit_buffer_cursor, "\n\n");
736 if (message)
737 message += 2;
738
739 if (commit->parents &&
740 (get_object_mark(&commit->parents->item->object) != 0 ||
741 reference_excluded_commits) &&
742 !full_tree) {
743 parse_commit_or_die(commit->parents->item);
744 diff_tree_oid(get_commit_tree_oid(commit->parents->item),
745 get_commit_tree_oid(commit), "", &rev->diffopt);
746 }
747 else
748 diff_root_tree_oid(get_commit_tree_oid(commit),
749 "", &rev->diffopt);
750
751 /* Export the referenced blobs, and remember the marks. */
752 for (i = 0; i < diff_queued_diff.nr; i++)
753 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
754 export_blob(&diff_queued_diff.queue[i]->two->oid);
755
756 refname = *revision_sources_at(&revision_sources, commit);
757 /*
758 * FIXME: string_list_remove() below for each ref is overall
759 * O(N^2). Compared to a history walk and diffing trees, this is
760 * just lost in the noise in practice. However, theoretically a
761 * repo may have enough refs for this to become slow.
762 */
763 string_list_remove(&extra_refs, refname, 0);
764 if (anonymize) {
765 refname = anonymize_refname(refname);
766 anonymize_ident_line(&committer, &committer_end);
767 anonymize_ident_line(&author, &author_end);
768 }
769
770 mark_next_object(&commit->object);
771 if (anonymize) {
772 reencoded = anonymize_commit_message();
773 } else if (encoding) {
774 char *buf;
775 switch (reencode_mode) {
776 case REENCODE_YES:
777 buf = xstrfmt("%.*s", (int)encoding_len, encoding);
778 reencoded = reencode_string(message, "UTF-8", buf);
779 free(buf);
780 break;
781 case REENCODE_NO:
782 break;
783 case REENCODE_ABORT:
784 die("Encountered commit-specific encoding %.*s in commit "
785 "%s; use --reencode=[yes|no] to handle it",
786 (int)encoding_len, encoding,
787 oid_to_hex(&commit->object.oid));
788 }
789 }
790 if (!commit->parents)
791 printf("reset %s\n", refname);
792 printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
793 if (show_original_ids)
794 printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
795 printf("%.*s\n%.*s\n",
796 (int)(author_end - author), author,
797 (int)(committer_end - committer), committer);
798 if (signatures.nr) {
799 switch (signed_commit_mode) {
800 case SIGN_ABORT:
801 die("encountered signed commit %s; use "
802 "--signed-commits=<mode> to handle it",
803 oid_to_hex(&commit->object.oid));
804 case SIGN_WARN_VERBATIM:
805 warning("exporting %"PRIuMAX" signature(s) for commit %s",
806 (uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid));
807 /* fallthru */
808 case SIGN_VERBATIM:
809 for (size_t i = 0; i < signatures.nr; i++) {
810 struct string_list_item *item = &signatures.items[i];
811 print_signature(item->string, item->util);
812 }
813 break;
814 case SIGN_WARN_STRIP:
815 warning("stripping signature(s) from commit %s",
816 oid_to_hex(&commit->object.oid));
817 /* fallthru */
818 case SIGN_STRIP:
819 break;
820 }
821 string_list_clear(&signatures, 0);
822 }
823 if (!reencoded && encoding)
824 printf("encoding %.*s\n", (int)encoding_len, encoding);
825 printf("data %u\n%s",
826 (unsigned)(reencoded
827 ? strlen(reencoded) : message
828 ? strlen(message) : 0),
829 reencoded ? reencoded : message ? message : "");
830 free(reencoded);
831 repo_unuse_commit_buffer(the_repository, commit, commit_buffer);
832
833 for (i = 0, p = commit->parents; p; p = p->next) {
834 struct object *obj = &p->item->object;
835 int mark = get_object_mark(obj);
836
837 if (!mark && !reference_excluded_commits)
838 continue;
839 if (i == 0)
840 printf("from ");
841 else
842 printf("merge ");
843 if (mark)
844 printf(":%d\n", mark);
845 else
846 printf("%s\n",
847 anonymize ?
848 anonymize_oid(oid_to_hex(&obj->oid)) :
849 oid_to_hex(&obj->oid));
850 i++;
851 }
852
853 if (full_tree)
854 printf("deleteall\n");
855 log_tree_diff_flush(rev);
856 string_list_clear(paths_of_changed_objects, 0);
857 rev->diffopt.output_format = saved_output_format;
858
859 printf("\n");
860
861 show_progress();
862}
863
864static char *anonymize_tag(void)
865{
866 static int counter;
867 struct strbuf out = STRBUF_INIT;
868 strbuf_addf(&out, "tag message %d", counter++);
869 return strbuf_detach(&out, NULL);
870}
871
872
873static void handle_tag(const char *name, struct tag *tag)
874{
875 unsigned long size;
876 enum object_type type;
877 char *buf;
878 const char *tagger, *tagger_end, *message;
879 size_t message_size = 0;
880 struct object *tagged;
881 int tagged_mark;
882 struct commit *p;
883
884 /* Trees have no identifier in fast-export output, thus we have no way
885 * to output tags of trees, tags of tags of trees, etc. Simply omit
886 * such tags.
887 */
888 tagged = tag->tagged;
889 while (tagged->type == OBJ_TAG) {
890 tagged = ((struct tag *)tagged)->tagged;
891 }
892 if (tagged->type == OBJ_TREE) {
893 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
894 oid_to_hex(&tag->object.oid));
895 return;
896 }
897
898 buf = odb_read_object(the_repository->objects, &tag->object.oid,
899 &type, &size);
900 if (!buf)
901 die("could not read tag %s", oid_to_hex(&tag->object.oid));
902 message = memmem(buf, size, "\n\n", 2);
903 if (message) {
904 message += 2;
905 message_size = strlen(message);
906 }
907 tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
908 if (!tagger) {
909 if (fake_missing_tagger)
910 tagger = "tagger Unspecified Tagger "
911 "<unspecified-tagger> 0 +0000";
912 else
913 tagger = "";
914 tagger_end = tagger + strlen(tagger);
915 } else {
916 tagger++;
917 tagger_end = strchrnul(tagger, '\n');
918 if (anonymize)
919 anonymize_ident_line(&tagger, &tagger_end);
920 }
921
922 if (anonymize) {
923 name = anonymize_refname(name);
924 if (message) {
925 static struct hashmap tags;
926 message = anonymize_str(&tags, anonymize_tag,
927 message, message_size);
928 message_size = strlen(message);
929 }
930 }
931
932 /* handle signed tags */
933 if (message) {
934 size_t sig_offset = parse_signed_buffer(message, message_size);
935 if (sig_offset < message_size)
936 switch (signed_tag_mode) {
937 case SIGN_ABORT:
938 die("encountered signed tag %s; use "
939 "--signed-tags=<mode> to handle it",
940 oid_to_hex(&tag->object.oid));
941 case SIGN_WARN_VERBATIM:
942 warning("exporting signed tag %s",
943 oid_to_hex(&tag->object.oid));
944 /* fallthru */
945 case SIGN_VERBATIM:
946 break;
947 case SIGN_WARN_STRIP:
948 warning("stripping signature from tag %s",
949 oid_to_hex(&tag->object.oid));
950 /* fallthru */
951 case SIGN_STRIP:
952 message_size = sig_offset;
953 break;
954 }
955 }
956
957 /* handle tag->tagged having been filtered out due to paths specified */
958 tagged = tag->tagged;
959 tagged_mark = get_object_mark(tagged);
960 if (!tagged_mark) {
961 switch (tag_of_filtered_mode) {
962 case TAG_FILTERING_ABORT:
963 die("tag %s tags unexported object; use "
964 "--tag-of-filtered-object=<mode> to handle it",
965 oid_to_hex(&tag->object.oid));
966 case DROP:
967 /* Ignore this tag altogether */
968 free(buf);
969 return;
970 case REWRITE:
971 if (tagged->type == OBJ_TAG && !mark_tags) {
972 die(_("Error: Cannot export nested tags unless --mark-tags is specified."));
973 } else if (tagged->type == OBJ_COMMIT) {
974 p = rewrite_commit((struct commit *)tagged);
975 if (!p) {
976 printf("reset %s\nfrom %s\n\n",
977 name, oid_to_hex(null_oid(the_hash_algo)));
978 free(buf);
979 return;
980 }
981 tagged_mark = get_object_mark(&p->object);
982 } else {
983 /* tagged->type is either OBJ_BLOB or OBJ_TAG */
984 tagged_mark = get_object_mark(tagged);
985 }
986 }
987 }
988
989 if (tagged->type == OBJ_TAG) {
990 printf("reset %s\nfrom %s\n\n",
991 name, oid_to_hex(null_oid(the_hash_algo)));
992 }
993 skip_prefix(name, "refs/tags/", &name);
994 printf("tag %s\n", name);
995 if (mark_tags) {
996 mark_next_object(&tag->object);
997 printf("mark :%"PRIu32"\n", last_idnum);
998 }
999 if (tagged_mark)
1000 printf("from :%d\n", tagged_mark);
1001 else
1002 printf("from %s\n", oid_to_hex(&tagged->oid));
1003
1004 if (show_original_ids)
1005 printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
1006 printf("%.*s%sdata %d\n%.*s\n",
1007 (int)(tagger_end - tagger), tagger,
1008 tagger == tagger_end ? "" : "\n",
1009 (int)message_size, (int)message_size, message ? message : "");
1010 free(buf);
1011}
1012
1013static struct commit *get_commit(struct rev_cmdline_entry *e, const char *full_name)
1014{
1015 switch (e->item->type) {
1016 case OBJ_COMMIT:
1017 return (struct commit *)e->item;
1018 case OBJ_TAG: {
1019 struct tag *tag = (struct tag *)e->item;
1020
1021 /* handle nested tags */
1022 while (tag && tag->object.type == OBJ_TAG) {
1023 parse_object(the_repository, &tag->object.oid);
1024 string_list_append(&tag_refs, full_name)->util = tag;
1025 tag = (struct tag *)tag->tagged;
1026 }
1027 if (!tag)
1028 die("Tag %s points nowhere?", e->name);
1029 return (struct commit *)tag;
1030 }
1031 default:
1032 return NULL;
1033 }
1034}
1035
1036static void get_tags_and_duplicates(struct rev_cmdline_info *info)
1037{
1038 int i;
1039
1040 for (i = 0; i < info->nr; i++) {
1041 struct rev_cmdline_entry *e = info->rev + i;
1042 struct object_id oid;
1043 struct commit *commit;
1044 char *full_name = NULL;
1045
1046 if (e->flags & UNINTERESTING)
1047 continue;
1048
1049 if (repo_dwim_ref(the_repository, e->name, strlen(e->name),
1050 &oid, &full_name, 0) != 1) {
1051 free(full_name);
1052 continue;
1053 }
1054
1055 if (refspecs.nr) {
1056 char *private;
1057 private = apply_refspecs(&refspecs, full_name);
1058 if (private) {
1059 free(full_name);
1060 full_name = private;
1061 }
1062 }
1063
1064 commit = get_commit(e, full_name);
1065 if (!commit) {
1066 warning("%s: Unexpected object of type %s, skipping.",
1067 e->name,
1068 type_name(e->item->type));
1069 free(full_name);
1070 continue;
1071 }
1072
1073 switch (commit->object.type) {
1074 case OBJ_COMMIT:
1075 break;
1076 case OBJ_BLOB:
1077 export_blob(&commit->object.oid);
1078 free(full_name);
1079 continue;
1080 default: /* OBJ_TAG (nested tags) is already handled */
1081 warning("Tag points to object of unexpected type %s, skipping.",
1082 type_name(commit->object.type));
1083 free(full_name);
1084 continue;
1085 }
1086
1087 /*
1088 * Make sure this ref gets properly updated eventually, whether
1089 * through a commit or manually at the end.
1090 */
1091 if (e->item->type != OBJ_TAG)
1092 string_list_append(&extra_refs, full_name)->util = commit;
1093
1094 if (!*revision_sources_at(&revision_sources, commit))
1095 *revision_sources_at(&revision_sources, commit) = full_name;
1096 else
1097 free(full_name);
1098 }
1099
1100 string_list_sort(&extra_refs);
1101 string_list_remove_duplicates(&extra_refs, 0);
1102}
1103
1104static void handle_tags_and_duplicates(struct string_list *extras)
1105{
1106 struct commit *commit;
1107 int i;
1108
1109 for (i = extras->nr - 1; i >= 0; i--) {
1110 const char *name = extras->items[i].string;
1111 struct object *object = extras->items[i].util;
1112 int mark;
1113
1114 switch (object->type) {
1115 case OBJ_TAG:
1116 handle_tag(name, (struct tag *)object);
1117 break;
1118 case OBJ_COMMIT:
1119 if (anonymize)
1120 name = anonymize_refname(name);
1121 /* create refs pointing to already seen commits */
1122 commit = rewrite_commit((struct commit *)object);
1123 if (!commit) {
1124 /*
1125 * Neither this object nor any of its
1126 * ancestors touch any relevant paths, so
1127 * it has been filtered to nothing. Delete
1128 * it.
1129 */
1130 printf("reset %s\nfrom %s\n\n",
1131 name, oid_to_hex(null_oid(the_hash_algo)));
1132 continue;
1133 }
1134
1135 mark = get_object_mark(&commit->object);
1136 if (!mark) {
1137 /*
1138 * Getting here means we have a commit which
1139 * was excluded by a negative refspec (e.g.
1140 * fast-export ^HEAD HEAD). If we are
1141 * referencing excluded commits, set the ref
1142 * to the exact commit. Otherwise, the user
1143 * wants the branch exported but every commit
1144 * in its history to be deleted, which basically
1145 * just means deletion of the ref.
1146 */
1147 if (!reference_excluded_commits) {
1148 /* delete the ref */
1149 printf("reset %s\nfrom %s\n\n",
1150 name, oid_to_hex(null_oid(the_hash_algo)));
1151 continue;
1152 }
1153 /* set ref to commit using oid, not mark */
1154 printf("reset %s\nfrom %s\n\n", name,
1155 oid_to_hex(&commit->object.oid));
1156 continue;
1157 }
1158
1159 printf("reset %s\nfrom :%d\n\n", name, mark
1160 );
1161 show_progress();
1162 break;
1163 }
1164 }
1165}
1166
1167static void export_marks(char *file)
1168{
1169 unsigned int i;
1170 uint32_t mark;
1171 struct decoration_entry *deco = idnums.entries;
1172 FILE *f;
1173 int e = 0;
1174
1175 f = fopen_for_writing(file);
1176 if (!f)
1177 die_errno("Unable to open marks file %s for writing.", file);
1178
1179 for (i = 0; i < idnums.size; i++) {
1180 if (deco->base && deco->base->type == 1) {
1181 mark = ptr_to_mark(deco->decoration);
1182 if (fprintf(f, ":%"PRIu32" %s\n", mark,
1183 oid_to_hex(&deco->base->oid)) < 0) {
1184 e = 1;
1185 break;
1186 }
1187 }
1188 deco++;
1189 }
1190
1191 e |= ferror(f);
1192 e |= fclose(f);
1193 if (e)
1194 error("Unable to write marks file %s.", file);
1195}
1196
1197static void import_marks(char *input_file, int check_exists)
1198{
1199 char line[512];
1200 FILE *f;
1201 struct stat sb;
1202
1203 if (check_exists && stat(input_file, &sb))
1204 return;
1205
1206 f = xfopen(input_file, "r");
1207 while (fgets(line, sizeof(line), f)) {
1208 uint32_t mark;
1209 char *line_end, *mark_end;
1210 struct object_id oid;
1211 struct object *object;
1212 struct commit *commit;
1213 enum object_type type;
1214
1215 line_end = strchr(line, '\n');
1216 if (line[0] != ':' || !line_end)
1217 die("corrupt mark line: %s", line);
1218 *line_end = '\0';
1219
1220 mark = strtoumax(line + 1, &mark_end, 10);
1221 if (!mark || mark_end == line + 1
1222 || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
1223 die("corrupt mark line: %s", line);
1224
1225 if (last_idnum < mark)
1226 last_idnum = mark;
1227
1228 type = odb_read_object_info(the_repository->objects, &oid, NULL);
1229 if (type < 0)
1230 die("object not found: %s", oid_to_hex(&oid));
1231
1232 if (type != OBJ_COMMIT)
1233 /* only commits */
1234 continue;
1235
1236 commit = lookup_commit(the_repository, &oid);
1237 if (!commit)
1238 die("not a commit? can't happen: %s", oid_to_hex(&oid));
1239
1240 object = &commit->object;
1241
1242 if (object->flags & SHOWN)
1243 error("Object %s already has a mark", oid_to_hex(&oid));
1244
1245 mark_object(object, mark);
1246
1247 object->flags |= SHOWN;
1248 }
1249 fclose(f);
1250}
1251
1252static void handle_deletes(void)
1253{
1254 int i;
1255 for (i = 0; i < refspecs.nr; i++) {
1256 struct refspec_item *refspec = &refspecs.items[i];
1257 if (*refspec->src)
1258 continue;
1259
1260 printf("reset %s\nfrom %s\n\n",
1261 refspec->dst, oid_to_hex(null_oid(the_hash_algo)));
1262 }
1263}
1264
1265static int parse_opt_anonymize_map(const struct option *opt,
1266 const char *arg, int unset)
1267{
1268 struct hashmap *map = opt->value;
1269 const char *delim, *value;
1270 size_t keylen;
1271
1272 BUG_ON_OPT_NEG(unset);
1273
1274 delim = strchr(arg, ':');
1275 if (delim) {
1276 keylen = delim - arg;
1277 value = delim + 1;
1278 } else {
1279 keylen = strlen(arg);
1280 value = arg;
1281 }
1282
1283 if (!keylen || !*value)
1284 return error(_("--anonymize-map token cannot be empty"));
1285
1286 add_anonymized_entry(map, memhash(arg, keylen), arg, keylen,
1287 xstrdup(value));
1288
1289 return 0;
1290}
1291
1292int cmd_fast_export(int argc,
1293 const char **argv,
1294 const char *prefix,
1295 struct repository *repo UNUSED)
1296{
1297 struct rev_info revs;
1298 struct commit *commit;
1299 char *export_filename = NULL,
1300 *import_filename = NULL,
1301 *import_filename_if_exists = NULL;
1302 uint32_t lastimportid;
1303 struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
1304 struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
1305 struct option options[] = {
1306 OPT_INTEGER(0, "progress", &progress,
1307 N_("show progress after <n> objects")),
1308 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
1309 N_("select handling of signed tags"),
1310 parse_opt_sign_mode),
1311 OPT_CALLBACK(0, "signed-commits", &signed_commit_mode, N_("mode"),
1312 N_("select handling of signed commits"),
1313 parse_opt_sign_mode),
1314 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1315 N_("select handling of tags that tag filtered objects"),
1316 parse_opt_tag_of_filtered_mode),
1317 OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
1318 N_("select handling of commit messages in an alternate encoding"),
1319 parse_opt_reencode_mode),
1320 OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1321 N_("dump marks to this file")),
1322 OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1323 N_("import marks from this file")),
1324 OPT_STRING(0, "import-marks-if-exists",
1325 &import_filename_if_exists,
1326 N_("file"),
1327 N_("import marks from this file if it exists")),
1328 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1329 N_("fake a tagger when tags lack one")),
1330 OPT_BOOL(0, "full-tree", &full_tree,
1331 N_("output full tree for each commit")),
1332 OPT_BOOL(0, "use-done-feature", &use_done_feature,
1333 N_("use the done feature to terminate the stream")),
1334 OPT_BOOL(0, "no-data", &no_data, N_("skip output of blob data")),
1335 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1336 N_("apply refspec to exported refs")),
1337 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1338 OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
1339 N_("convert <from> to <to> in anonymized output"),
1340 PARSE_OPT_NONEG, parse_opt_anonymize_map),
1341 OPT_BOOL(0, "reference-excluded-parents",
1342 &reference_excluded_commits, N_("reference parents which are not in fast-export stream by object id")),
1343 OPT_BOOL(0, "show-original-ids", &show_original_ids,
1344 N_("show original object ids of blobs/commits")),
1345 OPT_BOOL(0, "mark-tags", &mark_tags,
1346 N_("label tags with mark ids")),
1347
1348 OPT_END()
1349 };
1350
1351 if (argc == 1)
1352 usage_with_options (fast_export_usage, options);
1353
1354 /* we handle encodings */
1355 repo_config(the_repository, git_default_config, NULL);
1356
1357 repo_init_revisions(the_repository, &revs, prefix);
1358 init_revision_sources(&revision_sources);
1359 revs.topo_order = 1;
1360 revs.sources = &revision_sources;
1361 revs.rewrite_parents = 1;
1362 argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1363 PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT);
1364 argc = setup_revisions(argc, argv, &revs, NULL);
1365 if (argc > 1)
1366 usage_with_options (fast_export_usage, options);
1367
1368 if (anonymized_seeds.cmpfn && !anonymize)
1369 die(_("the option '%s' requires '%s'"), "--anonymize-map", "--anonymize");
1370
1371 if (refspecs_list.nr) {
1372 int i;
1373
1374 for (i = 0; i < refspecs_list.nr; i++)
1375 refspec_append(&refspecs, refspecs_list.items[i].string);
1376
1377 string_list_clear(&refspecs_list, 1);
1378 }
1379
1380 if (use_done_feature)
1381 printf("feature done\n");
1382
1383 if (import_filename && import_filename_if_exists)
1384 die(_("options '%s' and '%s' cannot be used together"), "--import-marks", "--import-marks-if-exists");
1385 if (import_filename)
1386 import_marks(import_filename, 0);
1387 else if (import_filename_if_exists)
1388 import_marks(import_filename_if_exists, 1);
1389 lastimportid = last_idnum;
1390
1391 if (import_filename && revs.prune_data.nr)
1392 full_tree = 1;
1393
1394 get_tags_and_duplicates(&revs.cmdline);
1395
1396 if (prepare_revision_walk(&revs))
1397 die("revision walk setup failed");
1398
1399 revs.reverse = 1;
1400 revs.diffopt.format_callback = show_filemodify;
1401 revs.diffopt.format_callback_data = &paths_of_changed_objects;
1402 revs.diffopt.flags.recursive = 1;
1403
1404 revs.diffopt.no_free = 1;
1405 while ((commit = get_revision(&revs)))
1406 handle_commit(commit, &revs, &paths_of_changed_objects);
1407 revs.diffopt.no_free = 0;
1408
1409 handle_tags_and_duplicates(&extra_refs);
1410 handle_tags_and_duplicates(&tag_refs);
1411 handle_deletes();
1412
1413 if (export_filename && lastimportid != last_idnum)
1414 export_marks(export_filename);
1415
1416 if (use_done_feature)
1417 printf("done\n");
1418
1419 refspec_clear(&refspecs);
1420 release_revisions(&revs);
1421
1422 return 0;
1423}