Git fork
at reftables-rust 1319 lines 30 kB view raw
1#define DISABLE_SIGN_COMPARE_WARNINGS 2 3#include "git-compat-util.h" 4#include "config.h" 5#include "environment.h" 6#include "gettext.h" 7#include "hex-ll.h" 8#include "utf8.h" 9#include "strbuf.h" 10#include "mailinfo.h" 11 12static void cleanup_space(struct strbuf *sb) 13{ 14 size_t pos, cnt; 15 for (pos = 0; pos < sb->len; pos++) { 16 if (isspace(sb->buf[pos])) { 17 sb->buf[pos] = ' '; 18 for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); 19 strbuf_remove(sb, pos + 1, cnt); 20 } 21 } 22} 23 24static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) 25{ 26 struct strbuf *src = name; 27 if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>")) 28 src = email; 29 else if (name == out) 30 return; 31 strbuf_reset(out); 32 strbuf_addbuf(out, src); 33} 34 35static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) 36{ 37 /* John Doe <johndoe> */ 38 39 char *bra, *ket; 40 /* This is fallback, so do not bother if we already have an 41 * e-mail address. 42 */ 43 if (mi->email.len) 44 return; 45 46 bra = strchr(line->buf, '<'); 47 if (!bra) 48 return; 49 ket = strchr(bra, '>'); 50 if (!ket) 51 return; 52 53 strbuf_reset(&mi->email); 54 strbuf_add(&mi->email, bra + 1, ket - bra - 1); 55 56 strbuf_reset(&mi->name); 57 strbuf_add(&mi->name, line->buf, bra - line->buf); 58 strbuf_trim(&mi->name); 59 get_sane_name(&mi->name, &mi->name, &mi->email); 60} 61 62static const char *unquote_comment(struct strbuf *outbuf, const char *in) 63{ 64 int take_next_literally = 0; 65 int depth = 1; 66 67 strbuf_addch(outbuf, '('); 68 69 while (*in) { 70 int c = *in++; 71 if (take_next_literally == 1) { 72 take_next_literally = 0; 73 } else { 74 switch (c) { 75 case '\\': 76 take_next_literally = 1; 77 continue; 78 case '(': 79 strbuf_addch(outbuf, '('); 80 depth++; 81 continue; 82 case ')': 83 strbuf_addch(outbuf, ')'); 84 if (!--depth) 85 return in; 86 continue; 87 } 88 } 89 90 strbuf_addch(outbuf, c); 91 } 92 93 return in; 94} 95 96static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in) 97{ 98 int take_next_literally = 0; 99 100 while (*in) { 101 int c = *in++; 102 if (take_next_literally == 1) { 103 take_next_literally = 0; 104 } else { 105 switch (c) { 106 case '\\': 107 take_next_literally = 1; 108 continue; 109 case '"': 110 return in; 111 } 112 } 113 114 strbuf_addch(outbuf, c); 115 } 116 117 return in; 118} 119 120static void unquote_quoted_pair(struct strbuf *line) 121{ 122 struct strbuf outbuf; 123 const char *in = line->buf; 124 int c; 125 126 strbuf_init(&outbuf, line->len); 127 128 while ((c = *in++) != 0) { 129 switch (c) { 130 case '"': 131 in = unquote_quoted_string(&outbuf, in); 132 continue; 133 case '(': 134 in = unquote_comment(&outbuf, in); 135 continue; 136 } 137 138 strbuf_addch(&outbuf, c); 139 } 140 141 strbuf_swap(&outbuf, line); 142 strbuf_release(&outbuf); 143 144} 145 146static void handle_from(struct mailinfo *mi, const struct strbuf *from) 147{ 148 char *at; 149 size_t el; 150 struct strbuf f; 151 152 strbuf_init(&f, from->len); 153 strbuf_addbuf(&f, from); 154 155 unquote_quoted_pair(&f); 156 157 at = strchr(f.buf, '@'); 158 if (!at) { 159 parse_bogus_from(mi, from); 160 goto out; 161 } 162 163 /* 164 * If we already have one email, don't take any confusing lines 165 */ 166 if (mi->email.len && strchr(at + 1, '@')) 167 goto out; 168 169 /* Pick up the string around '@', possibly delimited with <> 170 * pair; that is the email part. 171 */ 172 while (at > f.buf) { 173 char c = at[-1]; 174 if (isspace(c)) 175 break; 176 if (c == '<') { 177 at[-1] = ' '; 178 break; 179 } 180 at--; 181 } 182 el = strcspn(at, " \n\t\r\v\f>"); 183 strbuf_reset(&mi->email); 184 strbuf_add(&mi->email, at, el); 185 strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); 186 187 /* The remainder is name. It could be 188 * 189 * - "John Doe <john.doe@xz>" (a), or 190 * - "john.doe@xz (John Doe)" (b), or 191 * - "John (zzz) Doe <john.doe@xz> (Comment)" (c) 192 * 193 * but we have removed the email part, so 194 * 195 * - remove extra spaces which could stay after email (case 'c'), and 196 * - trim from both ends, possibly removing the () pair at the end 197 * (cases 'a' and 'b'). 198 */ 199 cleanup_space(&f); 200 strbuf_trim(&f); 201 if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { 202 strbuf_remove(&f, 0, 1); 203 strbuf_setlen(&f, f.len - 1); 204 } 205 206 get_sane_name(&mi->name, &f, &mi->email); 207out: 208 strbuf_release(&f); 209} 210 211static void handle_header(struct strbuf **out, const struct strbuf *line) 212{ 213 if (!*out) { 214 *out = xmalloc(sizeof(struct strbuf)); 215 strbuf_init(*out, line->len); 216 } else 217 strbuf_reset(*out); 218 219 strbuf_addbuf(*out, line); 220} 221 222/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt 223 * to have enough heuristics to grok MIME encoded patches often found 224 * on our mailing lists. For example, we do not even treat header lines 225 * case insensitively. 226 */ 227 228static int slurp_attr(const char *line, const char *name, struct strbuf *attr) 229{ 230 const char *ends, *ap = strcasestr(line, name); 231 size_t sz; 232 233 strbuf_setlen(attr, 0); 234 if (!ap) 235 return 0; 236 ap += strlen(name); 237 if (*ap == '"') { 238 ap++; 239 ends = "\""; 240 } 241 else 242 ends = "; \t"; 243 sz = strcspn(ap, ends); 244 strbuf_add(attr, ap, sz); 245 return 1; 246} 247 248static int has_attr_value(const char *line, const char *name, const char *value) 249{ 250 struct strbuf sb = STRBUF_INIT; 251 int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value); 252 strbuf_release(&sb); 253 return rc; 254} 255 256static void handle_content_type(struct mailinfo *mi, struct strbuf *line) 257{ 258 struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); 259 strbuf_init(boundary, line->len); 260 261 mi->format_flowed = has_attr_value(line->buf, "format=", "flowed"); 262 mi->delsp = has_attr_value(line->buf, "delsp=", "yes"); 263 264 if (slurp_attr(line->buf, "boundary=", boundary)) { 265 strbuf_insertstr(boundary, 0, "--"); 266 if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { 267 error("Too many boundaries to handle"); 268 mi->input_error = -1; 269 mi->content_top = &mi->content[MAX_BOUNDARIES] - 1; 270 strbuf_release(boundary); 271 free(boundary); 272 return; 273 } 274 *(mi->content_top) = boundary; 275 boundary = NULL; 276 } 277 slurp_attr(line->buf, "charset=", &mi->charset); 278 279 if (boundary) { 280 strbuf_release(boundary); 281 free(boundary); 282 } 283} 284 285static void handle_content_transfer_encoding(struct mailinfo *mi, 286 const struct strbuf *line) 287{ 288 if (strcasestr(line->buf, "base64")) 289 mi->transfer_encoding = TE_BASE64; 290 else if (strcasestr(line->buf, "quoted-printable")) 291 mi->transfer_encoding = TE_QP; 292 else 293 mi->transfer_encoding = TE_DONTCARE; 294} 295 296static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) 297{ 298 struct strbuf *content_top = *(mi->content_top); 299 300 return ((content_top->len <= line->len) && 301 !memcmp(line->buf, content_top->buf, content_top->len)); 302} 303 304static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) 305{ 306 size_t at = 0; 307 308 while (at < subject->len) { 309 char *pos; 310 size_t remove; 311 312 switch (subject->buf[at]) { 313 case 'r': case 'R': 314 if (subject->len <= at + 3) 315 break; 316 if ((subject->buf[at + 1] == 'e' || 317 subject->buf[at + 1] == 'E') && 318 subject->buf[at + 2] == ':') { 319 strbuf_remove(subject, at, 3); 320 continue; 321 } 322 at++; 323 break; 324 case ' ': case '\t': case ':': 325 strbuf_remove(subject, at, 1); 326 continue; 327 case '[': 328 pos = strchr(subject->buf + at, ']'); 329 if (!pos) 330 break; 331 remove = pos - (subject->buf + at) + 1; 332 if (!mi->keep_non_patch_brackets_in_subject || 333 (7 <= remove && 334 memmem(subject->buf + at, remove, "PATCH", 5))) 335 strbuf_remove(subject, at, remove); 336 else { 337 at += remove; 338 /* 339 * If the input had a space after the ], keep 340 * it. We don't bother with finding the end of 341 * the space, since we later normalize it 342 * anyway. 343 */ 344 if (isspace(subject->buf[at])) 345 at += 1; 346 } 347 continue; 348 } 349 break; 350 } 351 strbuf_trim(subject); 352} 353 354static const char * const header[] = { 355 "From", "Subject", "Date", 356}; 357 358static inline int skip_header(const struct strbuf *line, const char *hdr, 359 const char **outval) 360{ 361 const char *val; 362 if (!skip_iprefix(line->buf, hdr, &val) || 363 *val++ != ':') 364 return 0; 365 while (isspace(*val)) 366 val++; 367 *outval = val; 368 return 1; 369} 370 371static int is_format_patch_separator(const char *line, int len) 372{ 373 static const char SAMPLE[] = 374 "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; 375 const char *cp; 376 377 if (len != strlen(SAMPLE)) 378 return 0; 379 if (!skip_prefix(line, "From ", &cp)) 380 return 0; 381 if (strspn(cp, "0123456789abcdef") != 40) 382 return 0; 383 cp += 40; 384 return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); 385} 386 387static int decode_q_segment(struct strbuf *out, const struct strbuf *q_seg, 388 int rfc2047) 389{ 390 const char *in = q_seg->buf; 391 int c; 392 strbuf_grow(out, q_seg->len); 393 394 while ((c = *in++) != 0) { 395 if (c == '=') { 396 int ch, d = *in; 397 if (d == '\n' || !d) 398 break; /* drop trailing newline */ 399 ch = hex2chr(in); 400 if (ch >= 0) { 401 strbuf_addch(out, ch); 402 in += 2; 403 continue; 404 } 405 /* garbage -- fall through */ 406 } 407 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ 408 c = 0x20; 409 strbuf_addch(out, c); 410 } 411 return 0; 412} 413 414static int decode_b_segment(struct strbuf *out, const struct strbuf *b_seg) 415{ 416 /* Decode in..ep, possibly in-place to ot */ 417 int c, pos = 0, acc = 0; 418 const char *in = b_seg->buf; 419 strbuf_grow(out, b_seg->len); 420 421 while ((c = *in++) != 0) { 422 if (c == '+') 423 c = 62; 424 else if (c == '/') 425 c = 63; 426 else if ('A' <= c && c <= 'Z') 427 c -= 'A'; 428 else if ('a' <= c && c <= 'z') 429 c -= 'a' - 26; 430 else if ('0' <= c && c <= '9') 431 c -= '0' - 52; 432 else 433 continue; /* garbage */ 434 switch (pos++) { 435 case 0: 436 acc = (c << 2); 437 break; 438 case 1: 439 strbuf_addch(out, (acc | (c >> 4))); 440 acc = (c & 15) << 4; 441 break; 442 case 2: 443 strbuf_addch(out, (acc | (c >> 2))); 444 acc = (c & 3) << 6; 445 break; 446 case 3: 447 strbuf_addch(out, (acc | c)); 448 acc = pos = 0; 449 break; 450 } 451 } 452 return 0; 453} 454 455static int convert_to_utf8(struct mailinfo *mi, 456 struct strbuf *line, const char *charset) 457{ 458 char *out; 459 size_t out_len; 460 461 if (!mi->metainfo_charset || !charset || !*charset) 462 return 0; 463 464 if (same_encoding(mi->metainfo_charset, charset)) 465 return 0; 466 out = reencode_string_len(line->buf, line->len, 467 mi->metainfo_charset, charset, &out_len); 468 if (!out) { 469 mi->input_error = -1; 470 return error("cannot convert from %s to %s", 471 charset, mi->metainfo_charset); 472 } 473 strbuf_attach(line, out, out_len, out_len); 474 return 0; 475} 476 477static void decode_header(struct mailinfo *mi, struct strbuf *it) 478{ 479 char *in, *ep, *cp; 480 struct strbuf outbuf = STRBUF_INIT, dec = STRBUF_INIT; 481 struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; 482 int found_error = 1; /* pessimism */ 483 484 in = it->buf; 485 while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { 486 int encoding; 487 strbuf_reset(&charset_q); 488 strbuf_reset(&piecebuf); 489 490 if (in != ep) { 491 /* 492 * We are about to process an encoded-word 493 * that begins at ep, but there is something 494 * before the encoded word. 495 */ 496 char *scan; 497 for (scan = in; scan < ep; scan++) 498 if (!isspace(*scan)) 499 break; 500 501 if (scan != ep || in == it->buf) { 502 /* 503 * We should not lose that "something", 504 * unless we have just processed an 505 * encoded-word, and there is only LWS 506 * before the one we are about to process. 507 */ 508 strbuf_add(&outbuf, in, ep - in); 509 } 510 } 511 /* E.g. 512 * ep : "=?iso-2022-jp?B?GyR...?= foo" 513 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" 514 */ 515 ep += 2; 516 517 if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) 518 goto release_return; 519 520 if (cp + 3 - it->buf > it->len) 521 goto release_return; 522 strbuf_add(&charset_q, ep, cp - ep); 523 524 encoding = cp[1]; 525 if (!encoding || cp[2] != '?') 526 goto release_return; 527 ep = strstr(cp + 3, "?="); 528 if (!ep) 529 goto release_return; 530 strbuf_add(&piecebuf, cp + 3, ep - cp - 3); 531 switch (tolower(encoding)) { 532 default: 533 goto release_return; 534 case 'b': 535 if ((found_error = decode_b_segment(&dec, &piecebuf))) 536 goto release_return; 537 break; 538 case 'q': 539 if ((found_error = decode_q_segment(&dec, &piecebuf, 1))) 540 goto release_return; 541 break; 542 } 543 if (convert_to_utf8(mi, &dec, charset_q.buf)) 544 goto release_return; 545 546 strbuf_addbuf(&outbuf, &dec); 547 strbuf_release(&dec); 548 in = ep + 2; 549 } 550 strbuf_addstr(&outbuf, in); 551 strbuf_reset(it); 552 strbuf_addbuf(it, &outbuf); 553 found_error = 0; 554release_return: 555 strbuf_release(&outbuf); 556 strbuf_release(&charset_q); 557 strbuf_release(&piecebuf); 558 strbuf_release(&dec); 559 560 if (found_error) 561 mi->input_error = -1; 562} 563 564/* 565 * Returns true if "line" contains a header matching "hdr", in which case "val" 566 * will contain the value of the header with any RFC2047 B and Q encoding 567 * unwrapped, and optionally normalize the meta information to utf8. 568 */ 569static int parse_header(const struct strbuf *line, 570 const char *hdr, 571 struct mailinfo *mi, 572 struct strbuf *val) 573{ 574 const char *val_str; 575 576 if (!skip_header(line, hdr, &val_str)) 577 return 0; 578 strbuf_addstr(val, val_str); 579 decode_header(mi, val); 580 return 1; 581} 582 583static int check_header(struct mailinfo *mi, 584 const struct strbuf *line, 585 struct strbuf *hdr_data[], int overwrite) 586{ 587 int i, ret = 0; 588 struct strbuf sb = STRBUF_INIT; 589 590 /* search for the interesting parts */ 591 for (i = 0; i < ARRAY_SIZE(header); i++) { 592 if ((!hdr_data[i] || overwrite) && 593 parse_header(line, header[i], mi, &sb)) { 594 handle_header(&hdr_data[i], &sb); 595 ret = 1; 596 goto check_header_out; 597 } 598 } 599 600 /* Content stuff */ 601 if (parse_header(line, "Content-Type", mi, &sb)) { 602 handle_content_type(mi, &sb); 603 ret = 1; 604 goto check_header_out; 605 } 606 if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) { 607 handle_content_transfer_encoding(mi, &sb); 608 ret = 1; 609 goto check_header_out; 610 } 611 if (parse_header(line, "Message-ID", mi, &sb)) { 612 if (mi->add_message_id) 613 mi->message_id = strbuf_detach(&sb, NULL); 614 ret = 1; 615 goto check_header_out; 616 } 617 618check_header_out: 619 strbuf_release(&sb); 620 return ret; 621} 622 623/* 624 * Returns 1 if the given line or any line beginning with the given line is an 625 * in-body header (that is, check_header will succeed when passed 626 * mi->s_hdr_data). 627 */ 628static int is_inbody_header(const struct mailinfo *mi, 629 const struct strbuf *line) 630{ 631 int i; 632 const char *val; 633 for (i = 0; i < ARRAY_SIZE(header); i++) 634 if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val)) 635 return 1; 636 return 0; 637} 638 639static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) 640{ 641 struct strbuf ret = STRBUF_INIT; 642 643 switch (mi->transfer_encoding) { 644 case TE_QP: 645 decode_q_segment(&ret, line, 0); 646 break; 647 case TE_BASE64: 648 decode_b_segment(&ret, line); 649 break; 650 case TE_DONTCARE: 651 default: 652 return; 653 } 654 strbuf_reset(line); 655 strbuf_addbuf(line, &ret); 656 strbuf_release(&ret); 657} 658 659static inline int patchbreak(const struct strbuf *line) 660{ 661 size_t i; 662 663 /* Beginning of a "diff -" header? */ 664 if (starts_with(line->buf, "diff -")) 665 return 1; 666 667 /* CVS "Index: " line? */ 668 if (starts_with(line->buf, "Index: ")) 669 return 1; 670 671 /* 672 * "--- <filename>" starts patches without headers 673 * "---<sp>*" is a manual separator 674 */ 675 if (line->len < 4) 676 return 0; 677 678 if (starts_with(line->buf, "---")) { 679 /* space followed by a filename? */ 680 if (line->buf[3] == ' ' && !isspace(line->buf[4])) 681 return 1; 682 /* Just whitespace? */ 683 for (i = 3; i < line->len; i++) { 684 unsigned char c = line->buf[i]; 685 if (c == '\n') 686 return 1; 687 if (!isspace(c)) 688 break; 689 } 690 return 0; 691 } 692 return 0; 693} 694 695static int is_scissors_line(const char *line) 696{ 697 const char *c; 698 int scissors = 0, gap = 0; 699 const char *first_nonblank = NULL, *last_nonblank = NULL; 700 int visible, perforation = 0, in_perforation = 0; 701 702 for (c = line; *c; c++) { 703 if (isspace(*c)) { 704 if (in_perforation) { 705 perforation++; 706 gap++; 707 } 708 continue; 709 } 710 last_nonblank = c; 711 if (!first_nonblank) 712 first_nonblank = c; 713 if (*c == '-') { 714 in_perforation = 1; 715 perforation++; 716 continue; 717 } 718 if (starts_with(c, ">8") || starts_with(c, "8<") || 719 starts_with(c, ">%") || starts_with(c, "%<")) { 720 in_perforation = 1; 721 perforation += 2; 722 scissors += 2; 723 c++; 724 continue; 725 } 726 in_perforation = 0; 727 } 728 729 /* 730 * The mark must be at least 8 bytes long (e.g. "-- >8 --"). 731 * Even though there can be arbitrary cruft on the same line 732 * (e.g. "cut here"), in order to avoid misidentification, the 733 * perforation must occupy more than a third of the visible 734 * width of the line, and dashes and scissors must occupy more 735 * than half of the perforation. 736 */ 737 738 if (first_nonblank && last_nonblank) 739 visible = last_nonblank - first_nonblank + 1; 740 else 741 visible = 0; 742 return (scissors && 8 <= visible && 743 visible < perforation * 3 && 744 gap * 2 < perforation); 745} 746 747static void flush_inbody_header_accum(struct mailinfo *mi) 748{ 749 if (!mi->inbody_header_accum.len) 750 return; 751 if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0)) 752 BUG("inbody_header_accum, if not empty, must always contain a valid in-body header"); 753 strbuf_reset(&mi->inbody_header_accum); 754} 755 756static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line) 757{ 758 if (mi->inbody_header_accum.len && 759 (line->buf[0] == ' ' || line->buf[0] == '\t')) { 760 if (mi->use_scissors && is_scissors_line(line->buf)) { 761 /* 762 * This is a scissors line; do not consider this line 763 * as a header continuation line. 764 */ 765 flush_inbody_header_accum(mi); 766 return 0; 767 } 768 strbuf_strip_suffix(&mi->inbody_header_accum, "\n"); 769 strbuf_addbuf(&mi->inbody_header_accum, line); 770 return 1; 771 } 772 773 flush_inbody_header_accum(mi); 774 775 if (starts_with(line->buf, ">From") && isspace(line->buf[5])) 776 return is_format_patch_separator(line->buf + 1, line->len - 1); 777 if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { 778 int i; 779 for (i = 0; i < ARRAY_SIZE(header); i++) 780 if (!strcmp("Subject", header[i])) { 781 handle_header(&mi->s_hdr_data[i], line); 782 return 1; 783 } 784 return 0; 785 } 786 if (is_inbody_header(mi, line)) { 787 strbuf_addbuf(&mi->inbody_header_accum, line); 788 return 1; 789 } 790 return 0; 791} 792 793static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) 794{ 795 assert(!mi->filter_stage); 796 797 if (mi->header_stage) { 798 if (!line->len || (line->len == 1 && line->buf[0] == '\n')) { 799 if (mi->inbody_header_accum.len) { 800 flush_inbody_header_accum(mi); 801 mi->header_stage = 0; 802 } 803 return 0; 804 } 805 } 806 807 if (mi->use_inbody_headers && mi->header_stage) { 808 mi->header_stage = check_inbody_header(mi, line); 809 if (mi->header_stage) 810 return 0; 811 } else 812 /* Only trim the first (blank) line of the commit message 813 * when ignoring in-body headers. 814 */ 815 mi->header_stage = 0; 816 817 /* normalize the log message to UTF-8. */ 818 if (convert_to_utf8(mi, line, mi->charset.buf)) 819 return 0; /* mi->input_error already set */ 820 821 if (mi->use_scissors && is_scissors_line(line->buf)) { 822 int i; 823 824 strbuf_setlen(&mi->log_message, 0); 825 mi->header_stage = 1; 826 827 /* 828 * We may have already read "secondary headers"; purge 829 * them to give ourselves a clean restart. 830 */ 831 for (i = 0; i < ARRAY_SIZE(header); i++) { 832 if (mi->s_hdr_data[i]) 833 strbuf_release(mi->s_hdr_data[i]); 834 FREE_AND_NULL(mi->s_hdr_data[i]); 835 } 836 return 0; 837 } 838 839 if (patchbreak(line)) { 840 if (mi->message_id) 841 strbuf_addf(&mi->log_message, 842 "Message-ID: %s\n", mi->message_id); 843 return 1; 844 } 845 846 strbuf_addbuf(&mi->log_message, line); 847 return 0; 848} 849 850static void handle_patch(struct mailinfo *mi, const struct strbuf *line) 851{ 852 fwrite(line->buf, 1, line->len, mi->patchfile); 853 mi->patch_lines++; 854} 855 856static void handle_filter(struct mailinfo *mi, struct strbuf *line) 857{ 858 switch (mi->filter_stage) { 859 case 0: 860 if (!handle_commit_msg(mi, line)) 861 break; 862 mi->filter_stage++; 863 /* fallthrough */ 864 case 1: 865 handle_patch(mi, line); 866 break; 867 } 868} 869 870static int is_rfc2822_header(const struct strbuf *line) 871{ 872 /* 873 * The section that defines the loosest possible 874 * field name is "3.6.8 Optional fields". 875 * 876 * optional-field = field-name ":" unstructured CRLF 877 * field-name = 1*ftext 878 * ftext = %d33-57 / %59-126 879 */ 880 int ch; 881 char *cp = line->buf; 882 883 /* Count mbox From headers as headers */ 884 if (starts_with(cp, "From ") || starts_with(cp, ">From ")) 885 return 1; 886 887 while ((ch = *cp++)) { 888 if (ch == ':') 889 return 1; 890 if ((33 <= ch && ch <= 57) || 891 (59 <= ch && ch <= 126)) 892 continue; 893 break; 894 } 895 return 0; 896} 897 898static int read_one_header_line(struct strbuf *line, FILE *in) 899{ 900 struct strbuf continuation = STRBUF_INIT; 901 902 /* Get the first part of the line. */ 903 if (strbuf_getline_lf(line, in)) 904 return 0; 905 906 /* 907 * Is it an empty line or not a valid rfc2822 header? 908 * If so, stop here, and return false ("not a header") 909 */ 910 strbuf_rtrim(line); 911 if (!line->len || !is_rfc2822_header(line)) { 912 /* Re-add the newline */ 913 strbuf_addch(line, '\n'); 914 return 0; 915 } 916 917 /* 918 * Now we need to eat all the continuation lines.. 919 * Yuck, 2822 header "folding" 920 */ 921 for (;;) { 922 int peek; 923 924 peek = fgetc(in); 925 if (peek == EOF) 926 break; 927 ungetc(peek, in); 928 if (peek != ' ' && peek != '\t') 929 break; 930 if (strbuf_getline_lf(&continuation, in)) 931 break; 932 continuation.buf[0] = ' '; 933 strbuf_rtrim(&continuation); 934 strbuf_addbuf(line, &continuation); 935 } 936 strbuf_release(&continuation); 937 938 return 1; 939} 940 941static int find_boundary(struct mailinfo *mi, struct strbuf *line) 942{ 943 while (!strbuf_getline_lf(line, mi->input)) { 944 if (*(mi->content_top) && is_multipart_boundary(mi, line)) 945 return 1; 946 } 947 return 0; 948} 949 950static int handle_boundary(struct mailinfo *mi, struct strbuf *line) 951{ 952 struct strbuf newline = STRBUF_INIT; 953 954 strbuf_addch(&newline, '\n'); 955again: 956 if (line->len >= (*(mi->content_top))->len + 2 && 957 !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { 958 /* we hit an end boundary */ 959 /* pop the current boundary off the stack */ 960 strbuf_release(*(mi->content_top)); 961 FREE_AND_NULL(*(mi->content_top)); 962 963 /* technically won't happen as is_multipart_boundary() 964 will fail first. But just in case.. 965 */ 966 if (--mi->content_top < mi->content) { 967 error("Detected mismatched boundaries, can't recover"); 968 mi->input_error = -1; 969 mi->content_top = mi->content; 970 strbuf_release(&newline); 971 return 0; 972 } 973 handle_filter(mi, &newline); 974 strbuf_release(&newline); 975 if (mi->input_error) 976 return 0; 977 978 /* skip to the next boundary */ 979 if (!find_boundary(mi, line)) 980 return 0; 981 goto again; 982 } 983 984 /* set some defaults */ 985 mi->transfer_encoding = TE_DONTCARE; 986 strbuf_reset(&mi->charset); 987 988 /* slurp in this section's info */ 989 while (read_one_header_line(line, mi->input)) 990 check_header(mi, line, mi->p_hdr_data, 0); 991 992 strbuf_release(&newline); 993 /* replenish line */ 994 if (strbuf_getline_lf(line, mi->input)) 995 return 0; 996 strbuf_addch(line, '\n'); 997 return 1; 998} 999 1000static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, 1001 struct strbuf *prev) 1002{ 1003 size_t len = line->len; 1004 const char *rest; 1005 1006 if (!mi->format_flowed) { 1007 if (len >= 2 && 1008 line->buf[len - 2] == '\r' && 1009 line->buf[len - 1] == '\n') { 1010 mi->have_quoted_cr = 1; 1011 if (mi->quoted_cr == quoted_cr_strip) { 1012 strbuf_setlen(line, len - 2); 1013 strbuf_addch(line, '\n'); 1014 len--; 1015 } 1016 } 1017 handle_filter(mi, line); 1018 return; 1019 } 1020 1021 if (line->buf[len - 1] == '\n') { 1022 len--; 1023 if (len && line->buf[len - 1] == '\r') 1024 len--; 1025 } 1026 1027 /* Keep signature separator as-is. */ 1028 if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) { 1029 if (prev->len) { 1030 handle_filter(mi, prev); 1031 strbuf_reset(prev); 1032 } 1033 handle_filter(mi, line); 1034 return; 1035 } 1036 1037 /* Unstuff space-stuffed line. */ 1038 if (len && line->buf[0] == ' ') { 1039 strbuf_remove(line, 0, 1); 1040 len--; 1041 } 1042 1043 /* Save flowed line for later, but without the soft line break. */ 1044 if (len && line->buf[len - 1] == ' ') { 1045 strbuf_add(prev, line->buf, len - !!mi->delsp); 1046 return; 1047 } 1048 1049 /* Prepend any previous partial lines */ 1050 strbuf_insert(line, 0, prev->buf, prev->len); 1051 strbuf_reset(prev); 1052 1053 handle_filter(mi, line); 1054} 1055 1056static void summarize_quoted_cr(struct mailinfo *mi) 1057{ 1058 if (mi->have_quoted_cr && 1059 mi->quoted_cr == quoted_cr_warn) 1060 warning(_("quoted CRLF detected")); 1061} 1062 1063static void handle_body(struct mailinfo *mi, struct strbuf *line) 1064{ 1065 struct strbuf prev = STRBUF_INIT; 1066 1067 /* Skip up to the first boundary */ 1068 if (*(mi->content_top)) { 1069 if (!find_boundary(mi, line)) 1070 goto handle_body_out; 1071 } 1072 1073 do { 1074 /* process any boundary lines */ 1075 if (*(mi->content_top) && is_multipart_boundary(mi, line)) { 1076 /* flush any leftover */ 1077 if (prev.len) { 1078 handle_filter(mi, &prev); 1079 strbuf_reset(&prev); 1080 } 1081 summarize_quoted_cr(mi); 1082 mi->have_quoted_cr = 0; 1083 if (!handle_boundary(mi, line)) 1084 goto handle_body_out; 1085 } 1086 1087 /* Unwrap transfer encoding */ 1088 decode_transfer_encoding(mi, line); 1089 1090 switch (mi->transfer_encoding) { 1091 case TE_BASE64: 1092 case TE_QP: 1093 { 1094 struct strbuf **lines, **it, *sb; 1095 1096 /* Prepend any previous partial lines */ 1097 strbuf_insert(line, 0, prev.buf, prev.len); 1098 strbuf_reset(&prev); 1099 1100 /* 1101 * This is a decoded line that may contain 1102 * multiple new lines. Pass only one chunk 1103 * at a time to handle_filter() 1104 */ 1105 lines = strbuf_split(line, '\n'); 1106 for (it = lines; (sb = *it); it++) { 1107 if (!*(it + 1)) /* The last line */ 1108 if (sb->buf[sb->len - 1] != '\n') { 1109 /* Partial line, save it for later. */ 1110 strbuf_addbuf(&prev, sb); 1111 break; 1112 } 1113 handle_filter_flowed(mi, sb, &prev); 1114 } 1115 /* 1116 * The partial chunk is saved in "prev" and will be 1117 * appended by the next iteration of read_line_with_nul(). 1118 */ 1119 strbuf_list_free(lines); 1120 break; 1121 } 1122 default: 1123 handle_filter_flowed(mi, line, &prev); 1124 } 1125 1126 if (mi->input_error) 1127 break; 1128 } while (!strbuf_getwholeline(line, mi->input, '\n')); 1129 1130 if (prev.len) 1131 handle_filter(mi, &prev); 1132 summarize_quoted_cr(mi); 1133 1134 flush_inbody_header_accum(mi); 1135 1136handle_body_out: 1137 strbuf_release(&prev); 1138} 1139 1140static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data) 1141{ 1142 const char *sp = data->buf; 1143 while (1) { 1144 char *ep = strchr(sp, '\n'); 1145 int len; 1146 if (!ep) 1147 len = strlen(sp); 1148 else 1149 len = ep - sp; 1150 fprintf(fout, "%s: %.*s\n", hdr, len, sp); 1151 if (!ep) 1152 break; 1153 sp = ep + 1; 1154 } 1155} 1156 1157static void handle_info(struct mailinfo *mi) 1158{ 1159 struct strbuf *hdr; 1160 int i; 1161 1162 for (i = 0; i < ARRAY_SIZE(header); i++) { 1163 /* only print inbody headers if we output a patch file */ 1164 if (mi->patch_lines && mi->s_hdr_data[i]) 1165 hdr = mi->s_hdr_data[i]; 1166 else if (mi->p_hdr_data[i]) 1167 hdr = mi->p_hdr_data[i]; 1168 else 1169 continue; 1170 1171 if (memchr(hdr->buf, '\0', hdr->len)) { 1172 error("a NUL byte in '%s' is not allowed.", header[i]); 1173 mi->input_error = -1; 1174 } 1175 1176 if (!strcmp(header[i], "Subject")) { 1177 if (!mi->keep_subject) { 1178 cleanup_subject(mi, hdr); 1179 cleanup_space(hdr); 1180 } 1181 output_header_lines(mi->output, "Subject", hdr); 1182 } else if (!strcmp(header[i], "From")) { 1183 cleanup_space(hdr); 1184 handle_from(mi, hdr); 1185 fprintf(mi->output, "Author: %s\n", mi->name.buf); 1186 fprintf(mi->output, "Email: %s\n", mi->email.buf); 1187 } else { 1188 cleanup_space(hdr); 1189 fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); 1190 } 1191 } 1192 fprintf(mi->output, "\n"); 1193} 1194 1195int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) 1196{ 1197 FILE *cmitmsg; 1198 int peek; 1199 struct strbuf line = STRBUF_INIT; 1200 1201 cmitmsg = fopen(msg, "w"); 1202 if (!cmitmsg) { 1203 perror(msg); 1204 return -1; 1205 } 1206 mi->patchfile = fopen(patch, "w"); 1207 if (!mi->patchfile) { 1208 perror(patch); 1209 fclose(cmitmsg); 1210 return -1; 1211 } 1212 1213 mi->p_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->p_hdr_data))); 1214 mi->s_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->s_hdr_data))); 1215 1216 do { 1217 peek = fgetc(mi->input); 1218 if (peek == EOF) { 1219 fclose(cmitmsg); 1220 return error("empty patch: '%s'", patch); 1221 } 1222 } while (isspace(peek)); 1223 ungetc(peek, mi->input); 1224 1225 /* process the email header */ 1226 while (read_one_header_line(&line, mi->input)) 1227 check_header(mi, &line, mi->p_hdr_data, 1); 1228 1229 handle_body(mi, &line); 1230 fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); 1231 fclose(cmitmsg); 1232 fclose(mi->patchfile); 1233 1234 handle_info(mi); 1235 strbuf_release(&line); 1236 return mi->input_error; 1237} 1238 1239int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action) 1240{ 1241 if (!strcmp(actionstr, "nowarn")) 1242 *action = quoted_cr_nowarn; 1243 else if (!strcmp(actionstr, "warn")) 1244 *action = quoted_cr_warn; 1245 else if (!strcmp(actionstr, "strip")) 1246 *action = quoted_cr_strip; 1247 else 1248 return -1; 1249 return 0; 1250} 1251 1252static int git_mailinfo_config(const char *var, const char *value, 1253 const struct config_context *ctx, void *mi_) 1254{ 1255 struct mailinfo *mi = mi_; 1256 1257 if (!starts_with(var, "mailinfo.")) 1258 return git_default_config(var, value, ctx, NULL); 1259 if (!strcmp(var, "mailinfo.scissors")) { 1260 mi->use_scissors = git_config_bool(var, value); 1261 return 0; 1262 } 1263 if (!strcmp(var, "mailinfo.quotedcr")) { 1264 if (!value) 1265 return config_error_nonbool(var); 1266 if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0) 1267 return error(_("bad action '%s' for '%s'"), value, var); 1268 return 0; 1269 } 1270 /* perhaps others here */ 1271 return 0; 1272} 1273 1274void setup_mailinfo(struct repository *r, struct mailinfo *mi) 1275{ 1276 memset(mi, 0, sizeof(*mi)); 1277 strbuf_init(&mi->name, 0); 1278 strbuf_init(&mi->email, 0); 1279 strbuf_init(&mi->charset, 0); 1280 strbuf_init(&mi->log_message, 0); 1281 strbuf_init(&mi->inbody_header_accum, 0); 1282 mi->quoted_cr = quoted_cr_warn; 1283 mi->header_stage = 1; 1284 mi->use_inbody_headers = 1; 1285 mi->content_top = mi->content; 1286 repo_config(r, git_mailinfo_config, mi); 1287} 1288 1289void clear_mailinfo(struct mailinfo *mi) 1290{ 1291 strbuf_release(&mi->name); 1292 strbuf_release(&mi->email); 1293 strbuf_release(&mi->charset); 1294 strbuf_release(&mi->inbody_header_accum); 1295 free(mi->message_id); 1296 1297 for (size_t i = 0; i < ARRAY_SIZE(header); i++) { 1298 if (!mi->p_hdr_data[i]) 1299 continue; 1300 strbuf_release(mi->p_hdr_data[i]); 1301 free(mi->p_hdr_data[i]); 1302 } 1303 free(mi->p_hdr_data); 1304 1305 for (size_t i = 0; i < ARRAY_SIZE(header); i++) { 1306 if (!mi->s_hdr_data[i]) 1307 continue; 1308 strbuf_release(mi->s_hdr_data[i]); 1309 free(mi->s_hdr_data[i]); 1310 } 1311 free(mi->s_hdr_data); 1312 1313 while (mi->content < mi->content_top) { 1314 free(*(mi->content_top)); 1315 mi->content_top--; 1316 } 1317 1318 strbuf_release(&mi->log_message); 1319}