Git fork

mailinfo: support format=flowed

Add best-effort support for patches sent using format=flowed (RFC 3676).
Remove leading spaces ("unstuff"), remove soft line breaks (indicated
by space + newline), but leave the signature separator (dash dash space
newline) alone.

Warn in git am when encountering a format=flowed patch, because any
trailing spaces would most probably be lost, as the sending MUA is
encouraged to remove them when preparing the email.

Provide a test patch formatted by Mozilla Thunderbird 60 using its
default configuration. It reuses the contents of the file mailinfo.c
before and after this patch.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

authored by

René Scharfe and committed by
Junio C Hamano
3aa4d81f 53f9a3e1

+2646 -2
+4
builtin/am.c
··· 1243 1243 fclose(mi.input); 1244 1244 fclose(mi.output); 1245 1245 1246 + if (mi.format_flowed) 1247 + warning(_("Patch sent with format=flowed; " 1248 + "space at the end of lines might be lost.")); 1249 + 1246 1250 /* Extract message and author information */ 1247 1251 fp = xfopen(am_path(state, "info"), "r"); 1248 1252 while (!strbuf_getline_lf(&sb, fp)) {
+62 -2
mailinfo.c
··· 237 237 return 1; 238 238 } 239 239 240 + static int has_attr_value(const char *line, const char *name, const char *value) 241 + { 242 + struct strbuf sb = STRBUF_INIT; 243 + int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value); 244 + strbuf_release(&sb); 245 + return rc; 246 + } 247 + 240 248 static void handle_content_type(struct mailinfo *mi, struct strbuf *line) 241 249 { 242 250 struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); 243 251 strbuf_init(boundary, line->len); 252 + 253 + mi->format_flowed = has_attr_value(line->buf, "format=", "flowed"); 254 + mi->delsp = has_attr_value(line->buf, "delsp=", "yes"); 244 255 245 256 if (slurp_attr(line->buf, "boundary=", boundary)) { 246 257 strbuf_insert(boundary, 0, "--", 2); ··· 964 975 return 1; 965 976 } 966 977 978 + static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, 979 + struct strbuf *prev) 980 + { 981 + size_t len = line->len; 982 + const char *rest; 983 + 984 + if (!mi->format_flowed) { 985 + handle_filter(mi, line); 986 + return; 987 + } 988 + 989 + if (line->buf[len - 1] == '\n') { 990 + len--; 991 + if (len && line->buf[len - 1] == '\r') 992 + len--; 993 + } 994 + 995 + /* Keep signature separator as-is. */ 996 + if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) { 997 + if (prev->len) { 998 + handle_filter(mi, prev); 999 + strbuf_reset(prev); 1000 + } 1001 + handle_filter(mi, line); 1002 + return; 1003 + } 1004 + 1005 + /* Unstuff space-stuffed line. */ 1006 + if (len && line->buf[0] == ' ') { 1007 + strbuf_remove(line, 0, 1); 1008 + len--; 1009 + } 1010 + 1011 + /* Save flowed line for later, but without the soft line break. */ 1012 + if (len && line->buf[len - 1] == ' ') { 1013 + strbuf_add(prev, line->buf, len - !!mi->delsp); 1014 + return; 1015 + } 1016 + 1017 + /* Prepend any previous partial lines */ 1018 + strbuf_insert(line, 0, prev->buf, prev->len); 1019 + strbuf_reset(prev); 1020 + 1021 + handle_filter(mi, line); 1022 + } 1023 + 967 1024 static void handle_body(struct mailinfo *mi, struct strbuf *line) 968 1025 { 969 1026 struct strbuf prev = STRBUF_INIT; ··· 1012 1069 strbuf_addbuf(&prev, sb); 1013 1070 break; 1014 1071 } 1015 - handle_filter(mi, sb); 1072 + handle_filter_flowed(mi, sb, &prev); 1016 1073 } 1017 1074 /* 1018 1075 * The partial chunk is saved in "prev" and will be ··· 1022 1079 break; 1023 1080 } 1024 1081 default: 1025 - handle_filter(mi, line); 1082 + handle_filter_flowed(mi, line, &prev); 1026 1083 } 1027 1084 1028 1085 if (mi->input_error) 1029 1086 break; 1030 1087 } while (!strbuf_getwholeline(line, mi->input, '\n')); 1088 + 1089 + if (prev.len) 1090 + handle_filter(mi, &prev); 1031 1091 1032 1092 flush_inbody_header_accum(mi); 1033 1093
+2
mailinfo.h
··· 20 20 struct strbuf *content[MAX_BOUNDARIES]; 21 21 struct strbuf **content_top; 22 22 struct strbuf charset; 23 + unsigned int format_flowed:1; 24 + unsigned int delsp:1; 23 25 char *message_id; 24 26 enum { 25 27 TE_DONTCARE, TE_QP, TE_BASE64
+19
t/t4256-am-format-flowed.sh
··· 1 + #!/bin/sh 2 + 3 + test_description='test format=flowed support of git am' 4 + 5 + . ./test-lib.sh 6 + 7 + test_expect_success 'setup' ' 8 + cp "$TEST_DIRECTORY/t4256/1/mailinfo.c.orig" mailinfo.c && 9 + git add mailinfo.c && 10 + git commit -m initial 11 + ' 12 + 13 + test_expect_success 'am with format=flowed' ' 14 + git am <"$TEST_DIRECTORY/t4256/1/patch" >stdout 2>stderr && 15 + test_i18ngrep "warning: Patch sent with format=flowed" stderr && 16 + test_cmp "$TEST_DIRECTORY/t4256/1/mailinfo.c" mailinfo.c 17 + ' 18 + 19 + test_done
+1245
t/t4256/1/mailinfo.c
··· 1 + #include "cache.h" 2 + #include "config.h" 3 + #include "utf8.h" 4 + #include "strbuf.h" 5 + #include "mailinfo.h" 6 + 7 + static void cleanup_space(struct strbuf *sb) 8 + { 9 + size_t pos, cnt; 10 + for (pos = 0; pos < sb->len; pos++) { 11 + if (isspace(sb->buf[pos])) { 12 + sb->buf[pos] = ' '; 13 + for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); 14 + strbuf_remove(sb, pos + 1, cnt); 15 + } 16 + } 17 + } 18 + 19 + static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) 20 + { 21 + struct strbuf *src = name; 22 + if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') || 23 + strchr(name->buf, '<') || strchr(name->buf, '>')) 24 + src = email; 25 + else if (name == out) 26 + return; 27 + strbuf_reset(out); 28 + strbuf_addbuf(out, src); 29 + } 30 + 31 + static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) 32 + { 33 + /* John Doe <johndoe> */ 34 + 35 + char *bra, *ket; 36 + /* This is fallback, so do not bother if we already have an 37 + * e-mail address. 38 + */ 39 + if (mi->email.len) 40 + return; 41 + 42 + bra = strchr(line->buf, '<'); 43 + if (!bra) 44 + return; 45 + ket = strchr(bra, '>'); 46 + if (!ket) 47 + return; 48 + 49 + strbuf_reset(&mi->email); 50 + strbuf_add(&mi->email, bra + 1, ket - bra - 1); 51 + 52 + strbuf_reset(&mi->name); 53 + strbuf_add(&mi->name, line->buf, bra - line->buf); 54 + strbuf_trim(&mi->name); 55 + get_sane_name(&mi->name, &mi->name, &mi->email); 56 + } 57 + 58 + static const char *unquote_comment(struct strbuf *outbuf, const char *in) 59 + { 60 + int c; 61 + int take_next_literally = 0; 62 + 63 + strbuf_addch(outbuf, '('); 64 + 65 + while ((c = *in++) != 0) { 66 + if (take_next_literally == 1) { 67 + take_next_literally = 0; 68 + } else { 69 + switch (c) { 70 + case '\\': 71 + take_next_literally = 1; 72 + continue; 73 + case '(': 74 + in = unquote_comment(outbuf, in); 75 + continue; 76 + case ')': 77 + strbuf_addch(outbuf, ')'); 78 + return in; 79 + } 80 + } 81 + 82 + strbuf_addch(outbuf, c); 83 + } 84 + 85 + return in; 86 + } 87 + 88 + static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in) 89 + { 90 + int c; 91 + int take_next_literally = 0; 92 + 93 + while ((c = *in++) != 0) { 94 + if (take_next_literally == 1) { 95 + take_next_literally = 0; 96 + } else { 97 + switch (c) { 98 + case '\\': 99 + take_next_literally = 1; 100 + continue; 101 + case '"': 102 + return in; 103 + } 104 + } 105 + 106 + strbuf_addch(outbuf, c); 107 + } 108 + 109 + return in; 110 + } 111 + 112 + static void unquote_quoted_pair(struct strbuf *line) 113 + { 114 + struct strbuf outbuf; 115 + const char *in = line->buf; 116 + int c; 117 + 118 + strbuf_init(&outbuf, line->len); 119 + 120 + while ((c = *in++) != 0) { 121 + switch (c) { 122 + case '"': 123 + in = unquote_quoted_string(&outbuf, in); 124 + continue; 125 + case '(': 126 + in = unquote_comment(&outbuf, in); 127 + continue; 128 + } 129 + 130 + strbuf_addch(&outbuf, c); 131 + } 132 + 133 + strbuf_swap(&outbuf, line); 134 + strbuf_release(&outbuf); 135 + 136 + } 137 + 138 + static void handle_from(struct mailinfo *mi, const struct strbuf *from) 139 + { 140 + char *at; 141 + size_t el; 142 + struct strbuf f; 143 + 144 + strbuf_init(&f, from->len); 145 + strbuf_addbuf(&f, from); 146 + 147 + unquote_quoted_pair(&f); 148 + 149 + at = strchr(f.buf, '@'); 150 + if (!at) { 151 + parse_bogus_from(mi, from); 152 + goto out; 153 + } 154 + 155 + /* 156 + * If we already have one email, don't take any confusing lines 157 + */ 158 + if (mi->email.len && strchr(at + 1, '@')) 159 + goto out; 160 + 161 + /* Pick up the string around '@', possibly delimited with <> 162 + * pair; that is the email part. 163 + */ 164 + while (at > f.buf) { 165 + char c = at[-1]; 166 + if (isspace(c)) 167 + break; 168 + if (c == '<') { 169 + at[-1] = ' '; 170 + break; 171 + } 172 + at--; 173 + } 174 + el = strcspn(at, " \n\t\r\v\f>"); 175 + strbuf_reset(&mi->email); 176 + strbuf_add(&mi->email, at, el); 177 + strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); 178 + 179 + /* The remainder is name. It could be 180 + * 181 + * - "John Doe <john.doe@xz>" (a), or 182 + * - "john.doe@xz (John Doe)" (b), or 183 + * - "John (zzz) Doe <john.doe@xz> (Comment)" (c) 184 + * 185 + * but we have removed the email part, so 186 + * 187 + * - remove extra spaces which could stay after email (case 'c'), and 188 + * - trim from both ends, possibly removing the () pair at the end 189 + * (cases 'a' and 'b'). 190 + */ 191 + cleanup_space(&f); 192 + strbuf_trim(&f); 193 + if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { 194 + strbuf_remove(&f, 0, 1); 195 + strbuf_setlen(&f, f.len - 1); 196 + } 197 + 198 + get_sane_name(&mi->name, &f, &mi->email); 199 + out: 200 + strbuf_release(&f); 201 + } 202 + 203 + static void handle_header(struct strbuf **out, const struct strbuf *line) 204 + { 205 + if (!*out) { 206 + *out = xmalloc(sizeof(struct strbuf)); 207 + strbuf_init(*out, line->len); 208 + } else 209 + strbuf_reset(*out); 210 + 211 + strbuf_addbuf(*out, line); 212 + } 213 + 214 + /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt 215 + * to have enough heuristics to grok MIME encoded patches often found 216 + * on our mailing lists. For example, we do not even treat header lines 217 + * case insensitively. 218 + */ 219 + 220 + static int slurp_attr(const char *line, const char *name, struct strbuf *attr) 221 + { 222 + const char *ends, *ap = strcasestr(line, name); 223 + size_t sz; 224 + 225 + strbuf_setlen(attr, 0); 226 + if (!ap) 227 + return 0; 228 + ap += strlen(name); 229 + if (*ap == '"') { 230 + ap++; 231 + ends = "\""; 232 + } 233 + else 234 + ends = "; \t"; 235 + sz = strcspn(ap, ends); 236 + strbuf_add(attr, ap, sz); 237 + return 1; 238 + } 239 + 240 + static int has_attr_value(const char *line, const char *name, const char *value) 241 + { 242 + struct strbuf sb = STRBUF_INIT; 243 + int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value); 244 + strbuf_release(&sb); 245 + return rc; 246 + } 247 + 248 + static void handle_content_type(struct mailinfo *mi, struct strbuf *line) 249 + { 250 + struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); 251 + strbuf_init(boundary, line->len); 252 + 253 + mi->format_flowed = has_attr_value(line->buf, "format=", "flowed"); 254 + mi->delsp = has_attr_value(line->buf, "delsp=", "yes"); 255 + 256 + if (slurp_attr(line->buf, "boundary=", boundary)) { 257 + strbuf_insert(boundary, 0, "--", 2); 258 + if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { 259 + error("Too many boundaries to handle"); 260 + mi->input_error = -1; 261 + mi->content_top = &mi->content[MAX_BOUNDARIES] - 1; 262 + return; 263 + } 264 + *(mi->content_top) = boundary; 265 + boundary = NULL; 266 + } 267 + slurp_attr(line->buf, "charset=", &mi->charset); 268 + 269 + if (boundary) { 270 + strbuf_release(boundary); 271 + free(boundary); 272 + } 273 + } 274 + 275 + static void handle_content_transfer_encoding(struct mailinfo *mi, 276 + const struct strbuf *line) 277 + { 278 + if (strcasestr(line->buf, "base64")) 279 + mi->transfer_encoding = TE_BASE64; 280 + else if (strcasestr(line->buf, "quoted-printable")) 281 + mi->transfer_encoding = TE_QP; 282 + else 283 + mi->transfer_encoding = TE_DONTCARE; 284 + } 285 + 286 + static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) 287 + { 288 + struct strbuf *content_top = *(mi->content_top); 289 + 290 + return ((content_top->len <= line->len) && 291 + !memcmp(line->buf, content_top->buf, content_top->len)); 292 + } 293 + 294 + static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) 295 + { 296 + size_t at = 0; 297 + 298 + while (at < subject->len) { 299 + char *pos; 300 + size_t remove; 301 + 302 + switch (subject->buf[at]) { 303 + case 'r': case 'R': 304 + if (subject->len <= at + 3) 305 + break; 306 + if ((subject->buf[at + 1] == 'e' || 307 + subject->buf[at + 1] == 'E') && 308 + subject->buf[at + 2] == ':') { 309 + strbuf_remove(subject, at, 3); 310 + continue; 311 + } 312 + at++; 313 + break; 314 + case ' ': case '\t': case ':': 315 + strbuf_remove(subject, at, 1); 316 + continue; 317 + case '[': 318 + pos = strchr(subject->buf + at, ']'); 319 + if (!pos) 320 + break; 321 + remove = pos - subject->buf + at + 1; 322 + if (!mi->keep_non_patch_brackets_in_subject || 323 + (7 <= remove && 324 + memmem(subject->buf + at, remove, "PATCH", 5))) 325 + strbuf_remove(subject, at, remove); 326 + else { 327 + at += remove; 328 + /* 329 + * If the input had a space after the ], keep 330 + * it. We don't bother with finding the end of 331 + * the space, since we later normalize it 332 + * anyway. 333 + */ 334 + if (isspace(subject->buf[at])) 335 + at += 1; 336 + } 337 + continue; 338 + } 339 + break; 340 + } 341 + strbuf_trim(subject); 342 + } 343 + 344 + #define MAX_HDR_PARSED 10 345 + static const char *header[MAX_HDR_PARSED] = { 346 + "From","Subject","Date", 347 + }; 348 + 349 + static inline int cmp_header(const struct strbuf *line, const char *hdr) 350 + { 351 + int len = strlen(hdr); 352 + return !strncasecmp(line->buf, hdr, len) && line->len > len && 353 + line->buf[len] == ':' && isspace(line->buf[len + 1]); 354 + } 355 + 356 + static int is_format_patch_separator(const char *line, int len) 357 + { 358 + static const char SAMPLE[] = 359 + "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; 360 + const char *cp; 361 + 362 + if (len != strlen(SAMPLE)) 363 + return 0; 364 + if (!skip_prefix(line, "From ", &cp)) 365 + return 0; 366 + if (strspn(cp, "0123456789abcdef") != 40) 367 + return 0; 368 + cp += 40; 369 + return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); 370 + } 371 + 372 + static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) 373 + { 374 + const char *in = q_seg->buf; 375 + int c; 376 + struct strbuf *out = xmalloc(sizeof(struct strbuf)); 377 + strbuf_init(out, q_seg->len); 378 + 379 + while ((c = *in++) != 0) { 380 + if (c == '=') { 381 + int ch, d = *in; 382 + if (d == '\n' || !d) 383 + break; /* drop trailing newline */ 384 + ch = hex2chr(in); 385 + if (ch >= 0) { 386 + strbuf_addch(out, ch); 387 + in += 2; 388 + continue; 389 + } 390 + /* garbage -- fall through */ 391 + } 392 + if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ 393 + c = 0x20; 394 + strbuf_addch(out, c); 395 + } 396 + return out; 397 + } 398 + 399 + static struct strbuf *decode_b_segment(const struct strbuf *b_seg) 400 + { 401 + /* Decode in..ep, possibly in-place to ot */ 402 + int c, pos = 0, acc = 0; 403 + const char *in = b_seg->buf; 404 + struct strbuf *out = xmalloc(sizeof(struct strbuf)); 405 + strbuf_init(out, b_seg->len); 406 + 407 + while ((c = *in++) != 0) { 408 + if (c == '+') 409 + c = 62; 410 + else if (c == '/') 411 + c = 63; 412 + else if ('A' <= c && c <= 'Z') 413 + c -= 'A'; 414 + else if ('a' <= c && c <= 'z') 415 + c -= 'a' - 26; 416 + else if ('0' <= c && c <= '9') 417 + c -= '0' - 52; 418 + else 419 + continue; /* garbage */ 420 + switch (pos++) { 421 + case 0: 422 + acc = (c << 2); 423 + break; 424 + case 1: 425 + strbuf_addch(out, (acc | (c >> 4))); 426 + acc = (c & 15) << 4; 427 + break; 428 + case 2: 429 + strbuf_addch(out, (acc | (c >> 2))); 430 + acc = (c & 3) << 6; 431 + break; 432 + case 3: 433 + strbuf_addch(out, (acc | c)); 434 + acc = pos = 0; 435 + break; 436 + } 437 + } 438 + return out; 439 + } 440 + 441 + static int convert_to_utf8(struct mailinfo *mi, 442 + struct strbuf *line, const char *charset) 443 + { 444 + char *out; 445 + 446 + if (!mi->metainfo_charset || !charset || !*charset) 447 + return 0; 448 + 449 + if (same_encoding(mi->metainfo_charset, charset)) 450 + return 0; 451 + out = reencode_string(line->buf, mi->metainfo_charset, charset); 452 + if (!out) { 453 + mi->input_error = -1; 454 + return error("cannot convert from %s to %s", 455 + charset, mi->metainfo_charset); 456 + } 457 + strbuf_attach(line, out, strlen(out), strlen(out)); 458 + return 0; 459 + } 460 + 461 + static void decode_header(struct mailinfo *mi, struct strbuf *it) 462 + { 463 + char *in, *ep, *cp; 464 + struct strbuf outbuf = STRBUF_INIT, *dec; 465 + struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; 466 + int found_error = 1; /* pessimism */ 467 + 468 + in = it->buf; 469 + while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { 470 + int encoding; 471 + strbuf_reset(&charset_q); 472 + strbuf_reset(&piecebuf); 473 + 474 + if (in != ep) { 475 + /* 476 + * We are about to process an encoded-word 477 + * that begins at ep, but there is something 478 + * before the encoded word. 479 + */ 480 + char *scan; 481 + for (scan = in; scan < ep; scan++) 482 + if (!isspace(*scan)) 483 + break; 484 + 485 + if (scan != ep || in == it->buf) { 486 + /* 487 + * We should not lose that "something", 488 + * unless we have just processed an 489 + * encoded-word, and there is only LWS 490 + * before the one we are about to process. 491 + */ 492 + strbuf_add(&outbuf, in, ep - in); 493 + } 494 + } 495 + /* E.g. 496 + * ep : "=?iso-2022-jp?B?GyR...?= foo" 497 + * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" 498 + */ 499 + ep += 2; 500 + 501 + if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) 502 + goto release_return; 503 + 504 + if (cp + 3 - it->buf > it->len) 505 + goto release_return; 506 + strbuf_add(&charset_q, ep, cp - ep); 507 + 508 + encoding = cp[1]; 509 + if (!encoding || cp[2] != '?') 510 + goto release_return; 511 + ep = strstr(cp + 3, "?="); 512 + if (!ep) 513 + goto release_return; 514 + strbuf_add(&piecebuf, cp + 3, ep - cp - 3); 515 + switch (tolower(encoding)) { 516 + default: 517 + goto release_return; 518 + case 'b': 519 + dec = decode_b_segment(&piecebuf); 520 + break; 521 + case 'q': 522 + dec = decode_q_segment(&piecebuf, 1); 523 + break; 524 + } 525 + if (convert_to_utf8(mi, dec, charset_q.buf)) 526 + goto release_return; 527 + 528 + strbuf_addbuf(&outbuf, dec); 529 + strbuf_release(dec); 530 + free(dec); 531 + in = ep + 2; 532 + } 533 + strbuf_addstr(&outbuf, in); 534 + strbuf_reset(it); 535 + strbuf_addbuf(it, &outbuf); 536 + found_error = 0; 537 + release_return: 538 + strbuf_release(&outbuf); 539 + strbuf_release(&charset_q); 540 + strbuf_release(&piecebuf); 541 + 542 + if (found_error) 543 + mi->input_error = -1; 544 + } 545 + 546 + static int check_header(struct mailinfo *mi, 547 + const struct strbuf *line, 548 + struct strbuf *hdr_data[], int overwrite) 549 + { 550 + int i, ret = 0, len; 551 + struct strbuf sb = STRBUF_INIT; 552 + 553 + /* search for the interesting parts */ 554 + for (i = 0; header[i]; i++) { 555 + int len = strlen(header[i]); 556 + if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { 557 + /* Unwrap inline B and Q encoding, and optionally 558 + * normalize the meta information to utf8. 559 + */ 560 + strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); 561 + decode_header(mi, &sb); 562 + handle_header(&hdr_data[i], &sb); 563 + ret = 1; 564 + goto check_header_out; 565 + } 566 + } 567 + 568 + /* Content stuff */ 569 + if (cmp_header(line, "Content-Type")) { 570 + len = strlen("Content-Type: "); 571 + strbuf_add(&sb, line->buf + len, line->len - len); 572 + decode_header(mi, &sb); 573 + strbuf_insert(&sb, 0, "Content-Type: ", len); 574 + handle_content_type(mi, &sb); 575 + ret = 1; 576 + goto check_header_out; 577 + } 578 + if (cmp_header(line, "Content-Transfer-Encoding")) { 579 + len = strlen("Content-Transfer-Encoding: "); 580 + strbuf_add(&sb, line->buf + len, line->len - len); 581 + decode_header(mi, &sb); 582 + handle_content_transfer_encoding(mi, &sb); 583 + ret = 1; 584 + goto check_header_out; 585 + } 586 + if (cmp_header(line, "Message-Id")) { 587 + len = strlen("Message-Id: "); 588 + strbuf_add(&sb, line->buf + len, line->len - len); 589 + decode_header(mi, &sb); 590 + if (mi->add_message_id) 591 + mi->message_id = strbuf_detach(&sb, NULL); 592 + ret = 1; 593 + goto check_header_out; 594 + } 595 + 596 + check_header_out: 597 + strbuf_release(&sb); 598 + return ret; 599 + } 600 + 601 + /* 602 + * Returns 1 if the given line or any line beginning with the given line is an 603 + * in-body header (that is, check_header will succeed when passed 604 + * mi->s_hdr_data). 605 + */ 606 + static int is_inbody_header(const struct mailinfo *mi, 607 + const struct strbuf *line) 608 + { 609 + int i; 610 + for (i = 0; header[i]; i++) 611 + if (!mi->s_hdr_data[i] && cmp_header(line, header[i])) 612 + return 1; 613 + return 0; 614 + } 615 + 616 + static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) 617 + { 618 + struct strbuf *ret; 619 + 620 + switch (mi->transfer_encoding) { 621 + case TE_QP: 622 + ret = decode_q_segment(line, 0); 623 + break; 624 + case TE_BASE64: 625 + ret = decode_b_segment(line); 626 + break; 627 + case TE_DONTCARE: 628 + default: 629 + return; 630 + } 631 + strbuf_reset(line); 632 + strbuf_addbuf(line, ret); 633 + strbuf_release(ret); 634 + free(ret); 635 + } 636 + 637 + static inline int patchbreak(const struct strbuf *line) 638 + { 639 + size_t i; 640 + 641 + /* Beginning of a "diff -" header? */ 642 + if (starts_with(line->buf, "diff -")) 643 + return 1; 644 + 645 + /* CVS "Index: " line? */ 646 + if (starts_with(line->buf, "Index: ")) 647 + return 1; 648 + 649 + /* 650 + * "--- <filename>" starts patches without headers 651 + * "---<sp>*" is a manual separator 652 + */ 653 + if (line->len < 4) 654 + return 0; 655 + 656 + if (starts_with(line->buf, "---")) { 657 + /* space followed by a filename? */ 658 + if (line->buf[3] == ' ' && !isspace(line->buf[4])) 659 + return 1; 660 + /* Just whitespace? */ 661 + for (i = 3; i < line->len; i++) { 662 + unsigned char c = line->buf[i]; 663 + if (c == '\n') 664 + return 1; 665 + if (!isspace(c)) 666 + break; 667 + } 668 + return 0; 669 + } 670 + return 0; 671 + } 672 + 673 + static int is_scissors_line(const char *line) 674 + { 675 + const char *c; 676 + int scissors = 0, gap = 0; 677 + const char *first_nonblank = NULL, *last_nonblank = NULL; 678 + int visible, perforation = 0, in_perforation = 0; 679 + 680 + for (c = line; *c; c++) { 681 + if (isspace(*c)) { 682 + if (in_perforation) { 683 + perforation++; 684 + gap++; 685 + } 686 + continue; 687 + } 688 + last_nonblank = c; 689 + if (first_nonblank == NULL) 690 + first_nonblank = c; 691 + if (*c == '-') { 692 + in_perforation = 1; 693 + perforation++; 694 + continue; 695 + } 696 + if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) || 697 + !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) { 698 + in_perforation = 1; 699 + perforation += 2; 700 + scissors += 2; 701 + c++; 702 + continue; 703 + } 704 + in_perforation = 0; 705 + } 706 + 707 + /* 708 + * The mark must be at least 8 bytes long (e.g. "-- >8 --"). 709 + * Even though there can be arbitrary cruft on the same line 710 + * (e.g. "cut here"), in order to avoid misidentification, the 711 + * perforation must occupy more than a third of the visible 712 + * width of the line, and dashes and scissors must occupy more 713 + * than half of the perforation. 714 + */ 715 + 716 + if (first_nonblank && last_nonblank) 717 + visible = last_nonblank - first_nonblank + 1; 718 + else 719 + visible = 0; 720 + return (scissors && 8 <= visible && 721 + visible < perforation * 3 && 722 + gap * 2 < perforation); 723 + } 724 + 725 + static void flush_inbody_header_accum(struct mailinfo *mi) 726 + { 727 + if (!mi->inbody_header_accum.len) 728 + return; 729 + if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0)) 730 + BUG("inbody_header_accum, if not empty, must always contain a valid in-body header"); 731 + strbuf_reset(&mi->inbody_header_accum); 732 + } 733 + 734 + static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line) 735 + { 736 + if (mi->inbody_header_accum.len && 737 + (line->buf[0] == ' ' || line->buf[0] == '\t')) { 738 + if (mi->use_scissors && is_scissors_line(line->buf)) { 739 + /* 740 + * This is a scissors line; do not consider this line 741 + * as a header continuation line. 742 + */ 743 + flush_inbody_header_accum(mi); 744 + return 0; 745 + } 746 + strbuf_strip_suffix(&mi->inbody_header_accum, "\n"); 747 + strbuf_addbuf(&mi->inbody_header_accum, line); 748 + return 1; 749 + } 750 + 751 + flush_inbody_header_accum(mi); 752 + 753 + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) 754 + return is_format_patch_separator(line->buf + 1, line->len - 1); 755 + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { 756 + int i; 757 + for (i = 0; header[i]; i++) 758 + if (!strcmp("Subject", header[i])) { 759 + handle_header(&mi->s_hdr_data[i], line); 760 + return 1; 761 + } 762 + return 0; 763 + } 764 + if (is_inbody_header(mi, line)) { 765 + strbuf_addbuf(&mi->inbody_header_accum, line); 766 + return 1; 767 + } 768 + return 0; 769 + } 770 + 771 + static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) 772 + { 773 + assert(!mi->filter_stage); 774 + 775 + if (mi->header_stage) { 776 + if (!line->len || (line->len == 1 && line->buf[0] == '\n')) { 777 + if (mi->inbody_header_accum.len) { 778 + flush_inbody_header_accum(mi); 779 + mi->header_stage = 0; 780 + } 781 + return 0; 782 + } 783 + } 784 + 785 + if (mi->use_inbody_headers && mi->header_stage) { 786 + mi->header_stage = check_inbody_header(mi, line); 787 + if (mi->header_stage) 788 + return 0; 789 + } else 790 + /* Only trim the first (blank) line of the commit message 791 + * when ignoring in-body headers. 792 + */ 793 + mi->header_stage = 0; 794 + 795 + /* normalize the log message to UTF-8. */ 796 + if (convert_to_utf8(mi, line, mi->charset.buf)) 797 + return 0; /* mi->input_error already set */ 798 + 799 + if (mi->use_scissors && is_scissors_line(line->buf)) { 800 + int i; 801 + 802 + strbuf_setlen(&mi->log_message, 0); 803 + mi->header_stage = 1; 804 + 805 + /* 806 + * We may have already read "secondary headers"; purge 807 + * them to give ourselves a clean restart. 808 + */ 809 + for (i = 0; header[i]; i++) { 810 + if (mi->s_hdr_data[i]) 811 + strbuf_release(mi->s_hdr_data[i]); 812 + mi->s_hdr_data[i] = NULL; 813 + } 814 + return 0; 815 + } 816 + 817 + if (patchbreak(line)) { 818 + if (mi->message_id) 819 + strbuf_addf(&mi->log_message, 820 + "Message-Id: %s\n", mi->message_id); 821 + return 1; 822 + } 823 + 824 + strbuf_addbuf(&mi->log_message, line); 825 + return 0; 826 + } 827 + 828 + static void handle_patch(struct mailinfo *mi, const struct strbuf *line) 829 + { 830 + fwrite(line->buf, 1, line->len, mi->patchfile); 831 + mi->patch_lines++; 832 + } 833 + 834 + static void handle_filter(struct mailinfo *mi, struct strbuf *line) 835 + { 836 + switch (mi->filter_stage) { 837 + case 0: 838 + if (!handle_commit_msg(mi, line)) 839 + break; 840 + mi->filter_stage++; 841 + /* fallthrough */ 842 + case 1: 843 + handle_patch(mi, line); 844 + break; 845 + } 846 + } 847 + 848 + static int is_rfc2822_header(const struct strbuf *line) 849 + { 850 + /* 851 + * The section that defines the loosest possible 852 + * field name is "3.6.8 Optional fields". 853 + * 854 + * optional-field = field-name ":" unstructured CRLF 855 + * field-name = 1*ftext 856 + * ftext = %d33-57 / %59-126 857 + */ 858 + int ch; 859 + char *cp = line->buf; 860 + 861 + /* Count mbox From headers as headers */ 862 + if (starts_with(cp, "From ") || starts_with(cp, ">From ")) 863 + return 1; 864 + 865 + while ((ch = *cp++)) { 866 + if (ch == ':') 867 + return 1; 868 + if ((33 <= ch && ch <= 57) || 869 + (59 <= ch && ch <= 126)) 870 + continue; 871 + break; 872 + } 873 + return 0; 874 + } 875 + 876 + static int read_one_header_line(struct strbuf *line, FILE *in) 877 + { 878 + struct strbuf continuation = STRBUF_INIT; 879 + 880 + /* Get the first part of the line. */ 881 + if (strbuf_getline_lf(line, in)) 882 + return 0; 883 + 884 + /* 885 + * Is it an empty line or not a valid rfc2822 header? 886 + * If so, stop here, and return false ("not a header") 887 + */ 888 + strbuf_rtrim(line); 889 + if (!line->len || !is_rfc2822_header(line)) { 890 + /* Re-add the newline */ 891 + strbuf_addch(line, '\n'); 892 + return 0; 893 + } 894 + 895 + /* 896 + * Now we need to eat all the continuation lines.. 897 + * Yuck, 2822 header "folding" 898 + */ 899 + for (;;) { 900 + int peek; 901 + 902 + peek = fgetc(in); 903 + if (peek == EOF) 904 + break; 905 + ungetc(peek, in); 906 + if (peek != ' ' && peek != '\t') 907 + break; 908 + if (strbuf_getline_lf(&continuation, in)) 909 + break; 910 + continuation.buf[0] = ' '; 911 + strbuf_rtrim(&continuation); 912 + strbuf_addbuf(line, &continuation); 913 + } 914 + strbuf_release(&continuation); 915 + 916 + return 1; 917 + } 918 + 919 + static int find_boundary(struct mailinfo *mi, struct strbuf *line) 920 + { 921 + while (!strbuf_getline_lf(line, mi->input)) { 922 + if (*(mi->content_top) && is_multipart_boundary(mi, line)) 923 + return 1; 924 + } 925 + return 0; 926 + } 927 + 928 + static int handle_boundary(struct mailinfo *mi, struct strbuf *line) 929 + { 930 + struct strbuf newline = STRBUF_INIT; 931 + 932 + strbuf_addch(&newline, '\n'); 933 + again: 934 + if (line->len >= (*(mi->content_top))->len + 2 && 935 + !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { 936 + /* we hit an end boundary */ 937 + /* pop the current boundary off the stack */ 938 + strbuf_release(*(mi->content_top)); 939 + FREE_AND_NULL(*(mi->content_top)); 940 + 941 + /* technically won't happen as is_multipart_boundary() 942 + will fail first. But just in case.. 943 + */ 944 + if (--mi->content_top < mi->content) { 945 + error("Detected mismatched boundaries, can't recover"); 946 + mi->input_error = -1; 947 + mi->content_top = mi->content; 948 + strbuf_release(&newline); 949 + return 0; 950 + } 951 + handle_filter(mi, &newline); 952 + strbuf_release(&newline); 953 + if (mi->input_error) 954 + return 0; 955 + 956 + /* skip to the next boundary */ 957 + if (!find_boundary(mi, line)) 958 + return 0; 959 + goto again; 960 + } 961 + 962 + /* set some defaults */ 963 + mi->transfer_encoding = TE_DONTCARE; 964 + strbuf_reset(&mi->charset); 965 + 966 + /* slurp in this section's info */ 967 + while (read_one_header_line(line, mi->input)) 968 + check_header(mi, line, mi->p_hdr_data, 0); 969 + 970 + strbuf_release(&newline); 971 + /* replenish line */ 972 + if (strbuf_getline_lf(line, mi->input)) 973 + return 0; 974 + strbuf_addch(line, '\n'); 975 + return 1; 976 + } 977 + 978 + static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, 979 + struct strbuf *prev) 980 + { 981 + size_t len = line->len; 982 + const char *rest; 983 + 984 + if (!mi->format_flowed) { 985 + handle_filter(mi, line); 986 + return; 987 + } 988 + 989 + if (line->buf[len - 1] == '\n') { 990 + len--; 991 + if (len && line->buf[len - 1] == '\r') 992 + len--; 993 + } 994 + 995 + /* Keep signature separator as-is. */ 996 + if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) { 997 + if (prev->len) { 998 + handle_filter(mi, prev); 999 + strbuf_reset(prev); 1000 + } 1001 + handle_filter(mi, line); 1002 + return; 1003 + } 1004 + 1005 + /* Unstuff space-stuffed line. */ 1006 + if (len && line->buf[0] == ' ') { 1007 + strbuf_remove(line, 0, 1); 1008 + len--; 1009 + } 1010 + 1011 + /* Save flowed line for later, but without the soft line break. */ 1012 + if (len && line->buf[len - 1] == ' ') { 1013 + strbuf_add(prev, line->buf, len - !!mi->delsp); 1014 + return; 1015 + } 1016 + 1017 + /* Prepend any previous partial lines */ 1018 + strbuf_insert(line, 0, prev->buf, prev->len); 1019 + strbuf_reset(prev); 1020 + 1021 + handle_filter(mi, line); 1022 + } 1023 + 1024 + static void handle_body(struct mailinfo *mi, struct strbuf *line) 1025 + { 1026 + struct strbuf prev = STRBUF_INIT; 1027 + 1028 + /* Skip up to the first boundary */ 1029 + if (*(mi->content_top)) { 1030 + if (!find_boundary(mi, line)) 1031 + goto handle_body_out; 1032 + } 1033 + 1034 + do { 1035 + /* process any boundary lines */ 1036 + if (*(mi->content_top) && is_multipart_boundary(mi, line)) { 1037 + /* flush any leftover */ 1038 + if (prev.len) { 1039 + handle_filter(mi, &prev); 1040 + strbuf_reset(&prev); 1041 + } 1042 + if (!handle_boundary(mi, line)) 1043 + goto handle_body_out; 1044 + } 1045 + 1046 + /* Unwrap transfer encoding */ 1047 + decode_transfer_encoding(mi, line); 1048 + 1049 + switch (mi->transfer_encoding) { 1050 + case TE_BASE64: 1051 + case TE_QP: 1052 + { 1053 + struct strbuf **lines, **it, *sb; 1054 + 1055 + /* Prepend any previous partial lines */ 1056 + strbuf_insert(line, 0, prev.buf, prev.len); 1057 + strbuf_reset(&prev); 1058 + 1059 + /* 1060 + * This is a decoded line that may contain 1061 + * multiple new lines. Pass only one chunk 1062 + * at a time to handle_filter() 1063 + */ 1064 + lines = strbuf_split(line, '\n'); 1065 + for (it = lines; (sb = *it); it++) { 1066 + if (*(it + 1) == NULL) /* The last line */ 1067 + if (sb->buf[sb->len - 1] != '\n') { 1068 + /* Partial line, save it for later. */ 1069 + strbuf_addbuf(&prev, sb); 1070 + break; 1071 + } 1072 + handle_filter_flowed(mi, sb, &prev); 1073 + } 1074 + /* 1075 + * The partial chunk is saved in "prev" and will be 1076 + * appended by the next iteration of read_line_with_nul(). 1077 + */ 1078 + strbuf_list_free(lines); 1079 + break; 1080 + } 1081 + default: 1082 + handle_filter_flowed(mi, line, &prev); 1083 + } 1084 + 1085 + if (mi->input_error) 1086 + break; 1087 + } while (!strbuf_getwholeline(line, mi->input, '\n')); 1088 + 1089 + if (prev.len) 1090 + handle_filter(mi, &prev); 1091 + 1092 + flush_inbody_header_accum(mi); 1093 + 1094 + handle_body_out: 1095 + strbuf_release(&prev); 1096 + } 1097 + 1098 + static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data) 1099 + { 1100 + const char *sp = data->buf; 1101 + while (1) { 1102 + char *ep = strchr(sp, '\n'); 1103 + int len; 1104 + if (!ep) 1105 + len = strlen(sp); 1106 + else 1107 + len = ep - sp; 1108 + fprintf(fout, "%s: %.*s\n", hdr, len, sp); 1109 + if (!ep) 1110 + break; 1111 + sp = ep + 1; 1112 + } 1113 + } 1114 + 1115 + static void handle_info(struct mailinfo *mi) 1116 + { 1117 + struct strbuf *hdr; 1118 + int i; 1119 + 1120 + for (i = 0; header[i]; i++) { 1121 + /* only print inbody headers if we output a patch file */ 1122 + if (mi->patch_lines && mi->s_hdr_data[i]) 1123 + hdr = mi->s_hdr_data[i]; 1124 + else if (mi->p_hdr_data[i]) 1125 + hdr = mi->p_hdr_data[i]; 1126 + else 1127 + continue; 1128 + 1129 + if (!strcmp(header[i], "Subject")) { 1130 + if (!mi->keep_subject) { 1131 + cleanup_subject(mi, hdr); 1132 + cleanup_space(hdr); 1133 + } 1134 + output_header_lines(mi->output, "Subject", hdr); 1135 + } else if (!strcmp(header[i], "From")) { 1136 + cleanup_space(hdr); 1137 + handle_from(mi, hdr); 1138 + fprintf(mi->output, "Author: %s\n", mi->name.buf); 1139 + fprintf(mi->output, "Email: %s\n", mi->email.buf); 1140 + } else { 1141 + cleanup_space(hdr); 1142 + fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); 1143 + } 1144 + } 1145 + fprintf(mi->output, "\n"); 1146 + } 1147 + 1148 + int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) 1149 + { 1150 + FILE *cmitmsg; 1151 + int peek; 1152 + struct strbuf line = STRBUF_INIT; 1153 + 1154 + cmitmsg = fopen(msg, "w"); 1155 + if (!cmitmsg) { 1156 + perror(msg); 1157 + return -1; 1158 + } 1159 + mi->patchfile = fopen(patch, "w"); 1160 + if (!mi->patchfile) { 1161 + perror(patch); 1162 + fclose(cmitmsg); 1163 + return -1; 1164 + } 1165 + 1166 + mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data))); 1167 + mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data))); 1168 + 1169 + do { 1170 + peek = fgetc(mi->input); 1171 + if (peek == EOF) { 1172 + fclose(cmitmsg); 1173 + return error("empty patch: '%s'", patch); 1174 + } 1175 + } while (isspace(peek)); 1176 + ungetc(peek, mi->input); 1177 + 1178 + /* process the email header */ 1179 + while (read_one_header_line(&line, mi->input)) 1180 + check_header(mi, &line, mi->p_hdr_data, 1); 1181 + 1182 + handle_body(mi, &line); 1183 + fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); 1184 + fclose(cmitmsg); 1185 + fclose(mi->patchfile); 1186 + 1187 + handle_info(mi); 1188 + strbuf_release(&line); 1189 + return mi->input_error; 1190 + } 1191 + 1192 + static int git_mailinfo_config(const char *var, const char *value, void *mi_) 1193 + { 1194 + struct mailinfo *mi = mi_; 1195 + 1196 + if (!starts_with(var, "mailinfo.")) 1197 + return git_default_config(var, value, NULL); 1198 + if (!strcmp(var, "mailinfo.scissors")) { 1199 + mi->use_scissors = git_config_bool(var, value); 1200 + return 0; 1201 + } 1202 + /* perhaps others here */ 1203 + return 0; 1204 + } 1205 + 1206 + void setup_mailinfo(struct mailinfo *mi) 1207 + { 1208 + memset(mi, 0, sizeof(*mi)); 1209 + strbuf_init(&mi->name, 0); 1210 + strbuf_init(&mi->email, 0); 1211 + strbuf_init(&mi->charset, 0); 1212 + strbuf_init(&mi->log_message, 0); 1213 + strbuf_init(&mi->inbody_header_accum, 0); 1214 + mi->header_stage = 1; 1215 + mi->use_inbody_headers = 1; 1216 + mi->content_top = mi->content; 1217 + git_config(git_mailinfo_config, mi); 1218 + } 1219 + 1220 + void clear_mailinfo(struct mailinfo *mi) 1221 + { 1222 + int i; 1223 + 1224 + strbuf_release(&mi->name); 1225 + strbuf_release(&mi->email); 1226 + strbuf_release(&mi->charset); 1227 + strbuf_release(&mi->inbody_header_accum); 1228 + free(mi->message_id); 1229 + 1230 + if (mi->p_hdr_data) 1231 + for (i = 0; mi->p_hdr_data[i]; i++) 1232 + strbuf_release(mi->p_hdr_data[i]); 1233 + free(mi->p_hdr_data); 1234 + if (mi->s_hdr_data) 1235 + for (i = 0; mi->s_hdr_data[i]; i++) 1236 + strbuf_release(mi->s_hdr_data[i]); 1237 + free(mi->s_hdr_data); 1238 + 1239 + while (mi->content < mi->content_top) { 1240 + free(*(mi->content_top)); 1241 + mi->content_top--; 1242 + } 1243 + 1244 + strbuf_release(&mi->log_message); 1245 + }
+1185
t/t4256/1/mailinfo.c.orig
··· 1 + #include "cache.h" 2 + #include "config.h" 3 + #include "utf8.h" 4 + #include "strbuf.h" 5 + #include "mailinfo.h" 6 + 7 + static void cleanup_space(struct strbuf *sb) 8 + { 9 + size_t pos, cnt; 10 + for (pos = 0; pos < sb->len; pos++) { 11 + if (isspace(sb->buf[pos])) { 12 + sb->buf[pos] = ' '; 13 + for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++); 14 + strbuf_remove(sb, pos + 1, cnt); 15 + } 16 + } 17 + } 18 + 19 + static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) 20 + { 21 + struct strbuf *src = name; 22 + if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') || 23 + strchr(name->buf, '<') || strchr(name->buf, '>')) 24 + src = email; 25 + else if (name == out) 26 + return; 27 + strbuf_reset(out); 28 + strbuf_addbuf(out, src); 29 + } 30 + 31 + static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line) 32 + { 33 + /* John Doe <johndoe> */ 34 + 35 + char *bra, *ket; 36 + /* This is fallback, so do not bother if we already have an 37 + * e-mail address. 38 + */ 39 + if (mi->email.len) 40 + return; 41 + 42 + bra = strchr(line->buf, '<'); 43 + if (!bra) 44 + return; 45 + ket = strchr(bra, '>'); 46 + if (!ket) 47 + return; 48 + 49 + strbuf_reset(&mi->email); 50 + strbuf_add(&mi->email, bra + 1, ket - bra - 1); 51 + 52 + strbuf_reset(&mi->name); 53 + strbuf_add(&mi->name, line->buf, bra - line->buf); 54 + strbuf_trim(&mi->name); 55 + get_sane_name(&mi->name, &mi->name, &mi->email); 56 + } 57 + 58 + static const char *unquote_comment(struct strbuf *outbuf, const char *in) 59 + { 60 + int c; 61 + int take_next_literally = 0; 62 + 63 + strbuf_addch(outbuf, '('); 64 + 65 + while ((c = *in++) != 0) { 66 + if (take_next_literally == 1) { 67 + take_next_literally = 0; 68 + } else { 69 + switch (c) { 70 + case '\\': 71 + take_next_literally = 1; 72 + continue; 73 + case '(': 74 + in = unquote_comment(outbuf, in); 75 + continue; 76 + case ')': 77 + strbuf_addch(outbuf, ')'); 78 + return in; 79 + } 80 + } 81 + 82 + strbuf_addch(outbuf, c); 83 + } 84 + 85 + return in; 86 + } 87 + 88 + static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in) 89 + { 90 + int c; 91 + int take_next_literally = 0; 92 + 93 + while ((c = *in++) != 0) { 94 + if (take_next_literally == 1) { 95 + take_next_literally = 0; 96 + } else { 97 + switch (c) { 98 + case '\\': 99 + take_next_literally = 1; 100 + continue; 101 + case '"': 102 + return in; 103 + } 104 + } 105 + 106 + strbuf_addch(outbuf, c); 107 + } 108 + 109 + return in; 110 + } 111 + 112 + static void unquote_quoted_pair(struct strbuf *line) 113 + { 114 + struct strbuf outbuf; 115 + const char *in = line->buf; 116 + int c; 117 + 118 + strbuf_init(&outbuf, line->len); 119 + 120 + while ((c = *in++) != 0) { 121 + switch (c) { 122 + case '"': 123 + in = unquote_quoted_string(&outbuf, in); 124 + continue; 125 + case '(': 126 + in = unquote_comment(&outbuf, in); 127 + continue; 128 + } 129 + 130 + strbuf_addch(&outbuf, c); 131 + } 132 + 133 + strbuf_swap(&outbuf, line); 134 + strbuf_release(&outbuf); 135 + 136 + } 137 + 138 + static void handle_from(struct mailinfo *mi, const struct strbuf *from) 139 + { 140 + char *at; 141 + size_t el; 142 + struct strbuf f; 143 + 144 + strbuf_init(&f, from->len); 145 + strbuf_addbuf(&f, from); 146 + 147 + unquote_quoted_pair(&f); 148 + 149 + at = strchr(f.buf, '@'); 150 + if (!at) { 151 + parse_bogus_from(mi, from); 152 + goto out; 153 + } 154 + 155 + /* 156 + * If we already have one email, don't take any confusing lines 157 + */ 158 + if (mi->email.len && strchr(at + 1, '@')) 159 + goto out; 160 + 161 + /* Pick up the string around '@', possibly delimited with <> 162 + * pair; that is the email part. 163 + */ 164 + while (at > f.buf) { 165 + char c = at[-1]; 166 + if (isspace(c)) 167 + break; 168 + if (c == '<') { 169 + at[-1] = ' '; 170 + break; 171 + } 172 + at--; 173 + } 174 + el = strcspn(at, " \n\t\r\v\f>"); 175 + strbuf_reset(&mi->email); 176 + strbuf_add(&mi->email, at, el); 177 + strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); 178 + 179 + /* The remainder is name. It could be 180 + * 181 + * - "John Doe <john.doe@xz>" (a), or 182 + * - "john.doe@xz (John Doe)" (b), or 183 + * - "John (zzz) Doe <john.doe@xz> (Comment)" (c) 184 + * 185 + * but we have removed the email part, so 186 + * 187 + * - remove extra spaces which could stay after email (case 'c'), and 188 + * - trim from both ends, possibly removing the () pair at the end 189 + * (cases 'a' and 'b'). 190 + */ 191 + cleanup_space(&f); 192 + strbuf_trim(&f); 193 + if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { 194 + strbuf_remove(&f, 0, 1); 195 + strbuf_setlen(&f, f.len - 1); 196 + } 197 + 198 + get_sane_name(&mi->name, &f, &mi->email); 199 + out: 200 + strbuf_release(&f); 201 + } 202 + 203 + static void handle_header(struct strbuf **out, const struct strbuf *line) 204 + { 205 + if (!*out) { 206 + *out = xmalloc(sizeof(struct strbuf)); 207 + strbuf_init(*out, line->len); 208 + } else 209 + strbuf_reset(*out); 210 + 211 + strbuf_addbuf(*out, line); 212 + } 213 + 214 + /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt 215 + * to have enough heuristics to grok MIME encoded patches often found 216 + * on our mailing lists. For example, we do not even treat header lines 217 + * case insensitively. 218 + */ 219 + 220 + static int slurp_attr(const char *line, const char *name, struct strbuf *attr) 221 + { 222 + const char *ends, *ap = strcasestr(line, name); 223 + size_t sz; 224 + 225 + strbuf_setlen(attr, 0); 226 + if (!ap) 227 + return 0; 228 + ap += strlen(name); 229 + if (*ap == '"') { 230 + ap++; 231 + ends = "\""; 232 + } 233 + else 234 + ends = "; \t"; 235 + sz = strcspn(ap, ends); 236 + strbuf_add(attr, ap, sz); 237 + return 1; 238 + } 239 + 240 + static void handle_content_type(struct mailinfo *mi, struct strbuf *line) 241 + { 242 + struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); 243 + strbuf_init(boundary, line->len); 244 + 245 + if (slurp_attr(line->buf, "boundary=", boundary)) { 246 + strbuf_insert(boundary, 0, "--", 2); 247 + if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { 248 + error("Too many boundaries to handle"); 249 + mi->input_error = -1; 250 + mi->content_top = &mi->content[MAX_BOUNDARIES] - 1; 251 + return; 252 + } 253 + *(mi->content_top) = boundary; 254 + boundary = NULL; 255 + } 256 + slurp_attr(line->buf, "charset=", &mi->charset); 257 + 258 + if (boundary) { 259 + strbuf_release(boundary); 260 + free(boundary); 261 + } 262 + } 263 + 264 + static void handle_content_transfer_encoding(struct mailinfo *mi, 265 + const struct strbuf *line) 266 + { 267 + if (strcasestr(line->buf, "base64")) 268 + mi->transfer_encoding = TE_BASE64; 269 + else if (strcasestr(line->buf, "quoted-printable")) 270 + mi->transfer_encoding = TE_QP; 271 + else 272 + mi->transfer_encoding = TE_DONTCARE; 273 + } 274 + 275 + static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line) 276 + { 277 + struct strbuf *content_top = *(mi->content_top); 278 + 279 + return ((content_top->len <= line->len) && 280 + !memcmp(line->buf, content_top->buf, content_top->len)); 281 + } 282 + 283 + static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject) 284 + { 285 + size_t at = 0; 286 + 287 + while (at < subject->len) { 288 + char *pos; 289 + size_t remove; 290 + 291 + switch (subject->buf[at]) { 292 + case 'r': case 'R': 293 + if (subject->len <= at + 3) 294 + break; 295 + if ((subject->buf[at + 1] == 'e' || 296 + subject->buf[at + 1] == 'E') && 297 + subject->buf[at + 2] == ':') { 298 + strbuf_remove(subject, at, 3); 299 + continue; 300 + } 301 + at++; 302 + break; 303 + case ' ': case '\t': case ':': 304 + strbuf_remove(subject, at, 1); 305 + continue; 306 + case '[': 307 + pos = strchr(subject->buf + at, ']'); 308 + if (!pos) 309 + break; 310 + remove = pos - subject->buf + at + 1; 311 + if (!mi->keep_non_patch_brackets_in_subject || 312 + (7 <= remove && 313 + memmem(subject->buf + at, remove, "PATCH", 5))) 314 + strbuf_remove(subject, at, remove); 315 + else { 316 + at += remove; 317 + /* 318 + * If the input had a space after the ], keep 319 + * it. We don't bother with finding the end of 320 + * the space, since we later normalize it 321 + * anyway. 322 + */ 323 + if (isspace(subject->buf[at])) 324 + at += 1; 325 + } 326 + continue; 327 + } 328 + break; 329 + } 330 + strbuf_trim(subject); 331 + } 332 + 333 + #define MAX_HDR_PARSED 10 334 + static const char *header[MAX_HDR_PARSED] = { 335 + "From","Subject","Date", 336 + }; 337 + 338 + static inline int cmp_header(const struct strbuf *line, const char *hdr) 339 + { 340 + int len = strlen(hdr); 341 + return !strncasecmp(line->buf, hdr, len) && line->len > len && 342 + line->buf[len] == ':' && isspace(line->buf[len + 1]); 343 + } 344 + 345 + static int is_format_patch_separator(const char *line, int len) 346 + { 347 + static const char SAMPLE[] = 348 + "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; 349 + const char *cp; 350 + 351 + if (len != strlen(SAMPLE)) 352 + return 0; 353 + if (!skip_prefix(line, "From ", &cp)) 354 + return 0; 355 + if (strspn(cp, "0123456789abcdef") != 40) 356 + return 0; 357 + cp += 40; 358 + return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); 359 + } 360 + 361 + static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047) 362 + { 363 + const char *in = q_seg->buf; 364 + int c; 365 + struct strbuf *out = xmalloc(sizeof(struct strbuf)); 366 + strbuf_init(out, q_seg->len); 367 + 368 + while ((c = *in++) != 0) { 369 + if (c == '=') { 370 + int ch, d = *in; 371 + if (d == '\n' || !d) 372 + break; /* drop trailing newline */ 373 + ch = hex2chr(in); 374 + if (ch >= 0) { 375 + strbuf_addch(out, ch); 376 + in += 2; 377 + continue; 378 + } 379 + /* garbage -- fall through */ 380 + } 381 + if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ 382 + c = 0x20; 383 + strbuf_addch(out, c); 384 + } 385 + return out; 386 + } 387 + 388 + static struct strbuf *decode_b_segment(const struct strbuf *b_seg) 389 + { 390 + /* Decode in..ep, possibly in-place to ot */ 391 + int c, pos = 0, acc = 0; 392 + const char *in = b_seg->buf; 393 + struct strbuf *out = xmalloc(sizeof(struct strbuf)); 394 + strbuf_init(out, b_seg->len); 395 + 396 + while ((c = *in++) != 0) { 397 + if (c == '+') 398 + c = 62; 399 + else if (c == '/') 400 + c = 63; 401 + else if ('A' <= c && c <= 'Z') 402 + c -= 'A'; 403 + else if ('a' <= c && c <= 'z') 404 + c -= 'a' - 26; 405 + else if ('0' <= c && c <= '9') 406 + c -= '0' - 52; 407 + else 408 + continue; /* garbage */ 409 + switch (pos++) { 410 + case 0: 411 + acc = (c << 2); 412 + break; 413 + case 1: 414 + strbuf_addch(out, (acc | (c >> 4))); 415 + acc = (c & 15) << 4; 416 + break; 417 + case 2: 418 + strbuf_addch(out, (acc | (c >> 2))); 419 + acc = (c & 3) << 6; 420 + break; 421 + case 3: 422 + strbuf_addch(out, (acc | c)); 423 + acc = pos = 0; 424 + break; 425 + } 426 + } 427 + return out; 428 + } 429 + 430 + static int convert_to_utf8(struct mailinfo *mi, 431 + struct strbuf *line, const char *charset) 432 + { 433 + char *out; 434 + 435 + if (!mi->metainfo_charset || !charset || !*charset) 436 + return 0; 437 + 438 + if (same_encoding(mi->metainfo_charset, charset)) 439 + return 0; 440 + out = reencode_string(line->buf, mi->metainfo_charset, charset); 441 + if (!out) { 442 + mi->input_error = -1; 443 + return error("cannot convert from %s to %s", 444 + charset, mi->metainfo_charset); 445 + } 446 + strbuf_attach(line, out, strlen(out), strlen(out)); 447 + return 0; 448 + } 449 + 450 + static void decode_header(struct mailinfo *mi, struct strbuf *it) 451 + { 452 + char *in, *ep, *cp; 453 + struct strbuf outbuf = STRBUF_INIT, *dec; 454 + struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT; 455 + int found_error = 1; /* pessimism */ 456 + 457 + in = it->buf; 458 + while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) { 459 + int encoding; 460 + strbuf_reset(&charset_q); 461 + strbuf_reset(&piecebuf); 462 + 463 + if (in != ep) { 464 + /* 465 + * We are about to process an encoded-word 466 + * that begins at ep, but there is something 467 + * before the encoded word. 468 + */ 469 + char *scan; 470 + for (scan = in; scan < ep; scan++) 471 + if (!isspace(*scan)) 472 + break; 473 + 474 + if (scan != ep || in == it->buf) { 475 + /* 476 + * We should not lose that "something", 477 + * unless we have just processed an 478 + * encoded-word, and there is only LWS 479 + * before the one we are about to process. 480 + */ 481 + strbuf_add(&outbuf, in, ep - in); 482 + } 483 + } 484 + /* E.g. 485 + * ep : "=?iso-2022-jp?B?GyR...?= foo" 486 + * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" 487 + */ 488 + ep += 2; 489 + 490 + if (ep - it->buf >= it->len || !(cp = strchr(ep, '?'))) 491 + goto release_return; 492 + 493 + if (cp + 3 - it->buf > it->len) 494 + goto release_return; 495 + strbuf_add(&charset_q, ep, cp - ep); 496 + 497 + encoding = cp[1]; 498 + if (!encoding || cp[2] != '?') 499 + goto release_return; 500 + ep = strstr(cp + 3, "?="); 501 + if (!ep) 502 + goto release_return; 503 + strbuf_add(&piecebuf, cp + 3, ep - cp - 3); 504 + switch (tolower(encoding)) { 505 + default: 506 + goto release_return; 507 + case 'b': 508 + dec = decode_b_segment(&piecebuf); 509 + break; 510 + case 'q': 511 + dec = decode_q_segment(&piecebuf, 1); 512 + break; 513 + } 514 + if (convert_to_utf8(mi, dec, charset_q.buf)) 515 + goto release_return; 516 + 517 + strbuf_addbuf(&outbuf, dec); 518 + strbuf_release(dec); 519 + free(dec); 520 + in = ep + 2; 521 + } 522 + strbuf_addstr(&outbuf, in); 523 + strbuf_reset(it); 524 + strbuf_addbuf(it, &outbuf); 525 + found_error = 0; 526 + release_return: 527 + strbuf_release(&outbuf); 528 + strbuf_release(&charset_q); 529 + strbuf_release(&piecebuf); 530 + 531 + if (found_error) 532 + mi->input_error = -1; 533 + } 534 + 535 + static int check_header(struct mailinfo *mi, 536 + const struct strbuf *line, 537 + struct strbuf *hdr_data[], int overwrite) 538 + { 539 + int i, ret = 0, len; 540 + struct strbuf sb = STRBUF_INIT; 541 + 542 + /* search for the interesting parts */ 543 + for (i = 0; header[i]; i++) { 544 + int len = strlen(header[i]); 545 + if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { 546 + /* Unwrap inline B and Q encoding, and optionally 547 + * normalize the meta information to utf8. 548 + */ 549 + strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); 550 + decode_header(mi, &sb); 551 + handle_header(&hdr_data[i], &sb); 552 + ret = 1; 553 + goto check_header_out; 554 + } 555 + } 556 + 557 + /* Content stuff */ 558 + if (cmp_header(line, "Content-Type")) { 559 + len = strlen("Content-Type: "); 560 + strbuf_add(&sb, line->buf + len, line->len - len); 561 + decode_header(mi, &sb); 562 + strbuf_insert(&sb, 0, "Content-Type: ", len); 563 + handle_content_type(mi, &sb); 564 + ret = 1; 565 + goto check_header_out; 566 + } 567 + if (cmp_header(line, "Content-Transfer-Encoding")) { 568 + len = strlen("Content-Transfer-Encoding: "); 569 + strbuf_add(&sb, line->buf + len, line->len - len); 570 + decode_header(mi, &sb); 571 + handle_content_transfer_encoding(mi, &sb); 572 + ret = 1; 573 + goto check_header_out; 574 + } 575 + if (cmp_header(line, "Message-Id")) { 576 + len = strlen("Message-Id: "); 577 + strbuf_add(&sb, line->buf + len, line->len - len); 578 + decode_header(mi, &sb); 579 + if (mi->add_message_id) 580 + mi->message_id = strbuf_detach(&sb, NULL); 581 + ret = 1; 582 + goto check_header_out; 583 + } 584 + 585 + check_header_out: 586 + strbuf_release(&sb); 587 + return ret; 588 + } 589 + 590 + /* 591 + * Returns 1 if the given line or any line beginning with the given line is an 592 + * in-body header (that is, check_header will succeed when passed 593 + * mi->s_hdr_data). 594 + */ 595 + static int is_inbody_header(const struct mailinfo *mi, 596 + const struct strbuf *line) 597 + { 598 + int i; 599 + for (i = 0; header[i]; i++) 600 + if (!mi->s_hdr_data[i] && cmp_header(line, header[i])) 601 + return 1; 602 + return 0; 603 + } 604 + 605 + static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) 606 + { 607 + struct strbuf *ret; 608 + 609 + switch (mi->transfer_encoding) { 610 + case TE_QP: 611 + ret = decode_q_segment(line, 0); 612 + break; 613 + case TE_BASE64: 614 + ret = decode_b_segment(line); 615 + break; 616 + case TE_DONTCARE: 617 + default: 618 + return; 619 + } 620 + strbuf_reset(line); 621 + strbuf_addbuf(line, ret); 622 + strbuf_release(ret); 623 + free(ret); 624 + } 625 + 626 + static inline int patchbreak(const struct strbuf *line) 627 + { 628 + size_t i; 629 + 630 + /* Beginning of a "diff -" header? */ 631 + if (starts_with(line->buf, "diff -")) 632 + return 1; 633 + 634 + /* CVS "Index: " line? */ 635 + if (starts_with(line->buf, "Index: ")) 636 + return 1; 637 + 638 + /* 639 + * "--- <filename>" starts patches without headers 640 + * "---<sp>*" is a manual separator 641 + */ 642 + if (line->len < 4) 643 + return 0; 644 + 645 + if (starts_with(line->buf, "---")) { 646 + /* space followed by a filename? */ 647 + if (line->buf[3] == ' ' && !isspace(line->buf[4])) 648 + return 1; 649 + /* Just whitespace? */ 650 + for (i = 3; i < line->len; i++) { 651 + unsigned char c = line->buf[i]; 652 + if (c == '\n') 653 + return 1; 654 + if (!isspace(c)) 655 + break; 656 + } 657 + return 0; 658 + } 659 + return 0; 660 + } 661 + 662 + static int is_scissors_line(const char *line) 663 + { 664 + const char *c; 665 + int scissors = 0, gap = 0; 666 + const char *first_nonblank = NULL, *last_nonblank = NULL; 667 + int visible, perforation = 0, in_perforation = 0; 668 + 669 + for (c = line; *c; c++) { 670 + if (isspace(*c)) { 671 + if (in_perforation) { 672 + perforation++; 673 + gap++; 674 + } 675 + continue; 676 + } 677 + last_nonblank = c; 678 + if (first_nonblank == NULL) 679 + first_nonblank = c; 680 + if (*c == '-') { 681 + in_perforation = 1; 682 + perforation++; 683 + continue; 684 + } 685 + if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) || 686 + !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) { 687 + in_perforation = 1; 688 + perforation += 2; 689 + scissors += 2; 690 + c++; 691 + continue; 692 + } 693 + in_perforation = 0; 694 + } 695 + 696 + /* 697 + * The mark must be at least 8 bytes long (e.g. "-- >8 --"). 698 + * Even though there can be arbitrary cruft on the same line 699 + * (e.g. "cut here"), in order to avoid misidentification, the 700 + * perforation must occupy more than a third of the visible 701 + * width of the line, and dashes and scissors must occupy more 702 + * than half of the perforation. 703 + */ 704 + 705 + if (first_nonblank && last_nonblank) 706 + visible = last_nonblank - first_nonblank + 1; 707 + else 708 + visible = 0; 709 + return (scissors && 8 <= visible && 710 + visible < perforation * 3 && 711 + gap * 2 < perforation); 712 + } 713 + 714 + static void flush_inbody_header_accum(struct mailinfo *mi) 715 + { 716 + if (!mi->inbody_header_accum.len) 717 + return; 718 + if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0)) 719 + BUG("inbody_header_accum, if not empty, must always contain a valid in-body header"); 720 + strbuf_reset(&mi->inbody_header_accum); 721 + } 722 + 723 + static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line) 724 + { 725 + if (mi->inbody_header_accum.len && 726 + (line->buf[0] == ' ' || line->buf[0] == '\t')) { 727 + if (mi->use_scissors && is_scissors_line(line->buf)) { 728 + /* 729 + * This is a scissors line; do not consider this line 730 + * as a header continuation line. 731 + */ 732 + flush_inbody_header_accum(mi); 733 + return 0; 734 + } 735 + strbuf_strip_suffix(&mi->inbody_header_accum, "\n"); 736 + strbuf_addbuf(&mi->inbody_header_accum, line); 737 + return 1; 738 + } 739 + 740 + flush_inbody_header_accum(mi); 741 + 742 + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) 743 + return is_format_patch_separator(line->buf + 1, line->len - 1); 744 + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { 745 + int i; 746 + for (i = 0; header[i]; i++) 747 + if (!strcmp("Subject", header[i])) { 748 + handle_header(&mi->s_hdr_data[i], line); 749 + return 1; 750 + } 751 + return 0; 752 + } 753 + if (is_inbody_header(mi, line)) { 754 + strbuf_addbuf(&mi->inbody_header_accum, line); 755 + return 1; 756 + } 757 + return 0; 758 + } 759 + 760 + static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) 761 + { 762 + assert(!mi->filter_stage); 763 + 764 + if (mi->header_stage) { 765 + if (!line->len || (line->len == 1 && line->buf[0] == '\n')) { 766 + if (mi->inbody_header_accum.len) { 767 + flush_inbody_header_accum(mi); 768 + mi->header_stage = 0; 769 + } 770 + return 0; 771 + } 772 + } 773 + 774 + if (mi->use_inbody_headers && mi->header_stage) { 775 + mi->header_stage = check_inbody_header(mi, line); 776 + if (mi->header_stage) 777 + return 0; 778 + } else 779 + /* Only trim the first (blank) line of the commit message 780 + * when ignoring in-body headers. 781 + */ 782 + mi->header_stage = 0; 783 + 784 + /* normalize the log message to UTF-8. */ 785 + if (convert_to_utf8(mi, line, mi->charset.buf)) 786 + return 0; /* mi->input_error already set */ 787 + 788 + if (mi->use_scissors && is_scissors_line(line->buf)) { 789 + int i; 790 + 791 + strbuf_setlen(&mi->log_message, 0); 792 + mi->header_stage = 1; 793 + 794 + /* 795 + * We may have already read "secondary headers"; purge 796 + * them to give ourselves a clean restart. 797 + */ 798 + for (i = 0; header[i]; i++) { 799 + if (mi->s_hdr_data[i]) 800 + strbuf_release(mi->s_hdr_data[i]); 801 + mi->s_hdr_data[i] = NULL; 802 + } 803 + return 0; 804 + } 805 + 806 + if (patchbreak(line)) { 807 + if (mi->message_id) 808 + strbuf_addf(&mi->log_message, 809 + "Message-Id: %s\n", mi->message_id); 810 + return 1; 811 + } 812 + 813 + strbuf_addbuf(&mi->log_message, line); 814 + return 0; 815 + } 816 + 817 + static void handle_patch(struct mailinfo *mi, const struct strbuf *line) 818 + { 819 + fwrite(line->buf, 1, line->len, mi->patchfile); 820 + mi->patch_lines++; 821 + } 822 + 823 + static void handle_filter(struct mailinfo *mi, struct strbuf *line) 824 + { 825 + switch (mi->filter_stage) { 826 + case 0: 827 + if (!handle_commit_msg(mi, line)) 828 + break; 829 + mi->filter_stage++; 830 + /* fallthrough */ 831 + case 1: 832 + handle_patch(mi, line); 833 + break; 834 + } 835 + } 836 + 837 + static int is_rfc2822_header(const struct strbuf *line) 838 + { 839 + /* 840 + * The section that defines the loosest possible 841 + * field name is "3.6.8 Optional fields". 842 + * 843 + * optional-field = field-name ":" unstructured CRLF 844 + * field-name = 1*ftext 845 + * ftext = %d33-57 / %59-126 846 + */ 847 + int ch; 848 + char *cp = line->buf; 849 + 850 + /* Count mbox From headers as headers */ 851 + if (starts_with(cp, "From ") || starts_with(cp, ">From ")) 852 + return 1; 853 + 854 + while ((ch = *cp++)) { 855 + if (ch == ':') 856 + return 1; 857 + if ((33 <= ch && ch <= 57) || 858 + (59 <= ch && ch <= 126)) 859 + continue; 860 + break; 861 + } 862 + return 0; 863 + } 864 + 865 + static int read_one_header_line(struct strbuf *line, FILE *in) 866 + { 867 + struct strbuf continuation = STRBUF_INIT; 868 + 869 + /* Get the first part of the line. */ 870 + if (strbuf_getline_lf(line, in)) 871 + return 0; 872 + 873 + /* 874 + * Is it an empty line or not a valid rfc2822 header? 875 + * If so, stop here, and return false ("not a header") 876 + */ 877 + strbuf_rtrim(line); 878 + if (!line->len || !is_rfc2822_header(line)) { 879 + /* Re-add the newline */ 880 + strbuf_addch(line, '\n'); 881 + return 0; 882 + } 883 + 884 + /* 885 + * Now we need to eat all the continuation lines.. 886 + * Yuck, 2822 header "folding" 887 + */ 888 + for (;;) { 889 + int peek; 890 + 891 + peek = fgetc(in); 892 + if (peek == EOF) 893 + break; 894 + ungetc(peek, in); 895 + if (peek != ' ' && peek != '\t') 896 + break; 897 + if (strbuf_getline_lf(&continuation, in)) 898 + break; 899 + continuation.buf[0] = ' '; 900 + strbuf_rtrim(&continuation); 901 + strbuf_addbuf(line, &continuation); 902 + } 903 + strbuf_release(&continuation); 904 + 905 + return 1; 906 + } 907 + 908 + static int find_boundary(struct mailinfo *mi, struct strbuf *line) 909 + { 910 + while (!strbuf_getline_lf(line, mi->input)) { 911 + if (*(mi->content_top) && is_multipart_boundary(mi, line)) 912 + return 1; 913 + } 914 + return 0; 915 + } 916 + 917 + static int handle_boundary(struct mailinfo *mi, struct strbuf *line) 918 + { 919 + struct strbuf newline = STRBUF_INIT; 920 + 921 + strbuf_addch(&newline, '\n'); 922 + again: 923 + if (line->len >= (*(mi->content_top))->len + 2 && 924 + !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) { 925 + /* we hit an end boundary */ 926 + /* pop the current boundary off the stack */ 927 + strbuf_release(*(mi->content_top)); 928 + FREE_AND_NULL(*(mi->content_top)); 929 + 930 + /* technically won't happen as is_multipart_boundary() 931 + will fail first. But just in case.. 932 + */ 933 + if (--mi->content_top < mi->content) { 934 + error("Detected mismatched boundaries, can't recover"); 935 + mi->input_error = -1; 936 + mi->content_top = mi->content; 937 + strbuf_release(&newline); 938 + return 0; 939 + } 940 + handle_filter(mi, &newline); 941 + strbuf_release(&newline); 942 + if (mi->input_error) 943 + return 0; 944 + 945 + /* skip to the next boundary */ 946 + if (!find_boundary(mi, line)) 947 + return 0; 948 + goto again; 949 + } 950 + 951 + /* set some defaults */ 952 + mi->transfer_encoding = TE_DONTCARE; 953 + strbuf_reset(&mi->charset); 954 + 955 + /* slurp in this section's info */ 956 + while (read_one_header_line(line, mi->input)) 957 + check_header(mi, line, mi->p_hdr_data, 0); 958 + 959 + strbuf_release(&newline); 960 + /* replenish line */ 961 + if (strbuf_getline_lf(line, mi->input)) 962 + return 0; 963 + strbuf_addch(line, '\n'); 964 + return 1; 965 + } 966 + 967 + static void handle_body(struct mailinfo *mi, struct strbuf *line) 968 + { 969 + struct strbuf prev = STRBUF_INIT; 970 + 971 + /* Skip up to the first boundary */ 972 + if (*(mi->content_top)) { 973 + if (!find_boundary(mi, line)) 974 + goto handle_body_out; 975 + } 976 + 977 + do { 978 + /* process any boundary lines */ 979 + if (*(mi->content_top) && is_multipart_boundary(mi, line)) { 980 + /* flush any leftover */ 981 + if (prev.len) { 982 + handle_filter(mi, &prev); 983 + strbuf_reset(&prev); 984 + } 985 + if (!handle_boundary(mi, line)) 986 + goto handle_body_out; 987 + } 988 + 989 + /* Unwrap transfer encoding */ 990 + decode_transfer_encoding(mi, line); 991 + 992 + switch (mi->transfer_encoding) { 993 + case TE_BASE64: 994 + case TE_QP: 995 + { 996 + struct strbuf **lines, **it, *sb; 997 + 998 + /* Prepend any previous partial lines */ 999 + strbuf_insert(line, 0, prev.buf, prev.len); 1000 + strbuf_reset(&prev); 1001 + 1002 + /* 1003 + * This is a decoded line that may contain 1004 + * multiple new lines. Pass only one chunk 1005 + * at a time to handle_filter() 1006 + */ 1007 + lines = strbuf_split(line, '\n'); 1008 + for (it = lines; (sb = *it); it++) { 1009 + if (*(it + 1) == NULL) /* The last line */ 1010 + if (sb->buf[sb->len - 1] != '\n') { 1011 + /* Partial line, save it for later. */ 1012 + strbuf_addbuf(&prev, sb); 1013 + break; 1014 + } 1015 + handle_filter(mi, sb); 1016 + } 1017 + /* 1018 + * The partial chunk is saved in "prev" and will be 1019 + * appended by the next iteration of read_line_with_nul(). 1020 + */ 1021 + strbuf_list_free(lines); 1022 + break; 1023 + } 1024 + default: 1025 + handle_filter(mi, line); 1026 + } 1027 + 1028 + if (mi->input_error) 1029 + break; 1030 + } while (!strbuf_getwholeline(line, mi->input, '\n')); 1031 + 1032 + flush_inbody_header_accum(mi); 1033 + 1034 + handle_body_out: 1035 + strbuf_release(&prev); 1036 + } 1037 + 1038 + static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data) 1039 + { 1040 + const char *sp = data->buf; 1041 + while (1) { 1042 + char *ep = strchr(sp, '\n'); 1043 + int len; 1044 + if (!ep) 1045 + len = strlen(sp); 1046 + else 1047 + len = ep - sp; 1048 + fprintf(fout, "%s: %.*s\n", hdr, len, sp); 1049 + if (!ep) 1050 + break; 1051 + sp = ep + 1; 1052 + } 1053 + } 1054 + 1055 + static void handle_info(struct mailinfo *mi) 1056 + { 1057 + struct strbuf *hdr; 1058 + int i; 1059 + 1060 + for (i = 0; header[i]; i++) { 1061 + /* only print inbody headers if we output a patch file */ 1062 + if (mi->patch_lines && mi->s_hdr_data[i]) 1063 + hdr = mi->s_hdr_data[i]; 1064 + else if (mi->p_hdr_data[i]) 1065 + hdr = mi->p_hdr_data[i]; 1066 + else 1067 + continue; 1068 + 1069 + if (!strcmp(header[i], "Subject")) { 1070 + if (!mi->keep_subject) { 1071 + cleanup_subject(mi, hdr); 1072 + cleanup_space(hdr); 1073 + } 1074 + output_header_lines(mi->output, "Subject", hdr); 1075 + } else if (!strcmp(header[i], "From")) { 1076 + cleanup_space(hdr); 1077 + handle_from(mi, hdr); 1078 + fprintf(mi->output, "Author: %s\n", mi->name.buf); 1079 + fprintf(mi->output, "Email: %s\n", mi->email.buf); 1080 + } else { 1081 + cleanup_space(hdr); 1082 + fprintf(mi->output, "%s: %s\n", header[i], hdr->buf); 1083 + } 1084 + } 1085 + fprintf(mi->output, "\n"); 1086 + } 1087 + 1088 + int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) 1089 + { 1090 + FILE *cmitmsg; 1091 + int peek; 1092 + struct strbuf line = STRBUF_INIT; 1093 + 1094 + cmitmsg = fopen(msg, "w"); 1095 + if (!cmitmsg) { 1096 + perror(msg); 1097 + return -1; 1098 + } 1099 + mi->patchfile = fopen(patch, "w"); 1100 + if (!mi->patchfile) { 1101 + perror(patch); 1102 + fclose(cmitmsg); 1103 + return -1; 1104 + } 1105 + 1106 + mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data))); 1107 + mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data))); 1108 + 1109 + do { 1110 + peek = fgetc(mi->input); 1111 + if (peek == EOF) { 1112 + fclose(cmitmsg); 1113 + return error("empty patch: '%s'", patch); 1114 + } 1115 + } while (isspace(peek)); 1116 + ungetc(peek, mi->input); 1117 + 1118 + /* process the email header */ 1119 + while (read_one_header_line(&line, mi->input)) 1120 + check_header(mi, &line, mi->p_hdr_data, 1); 1121 + 1122 + handle_body(mi, &line); 1123 + fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg); 1124 + fclose(cmitmsg); 1125 + fclose(mi->patchfile); 1126 + 1127 + handle_info(mi); 1128 + strbuf_release(&line); 1129 + return mi->input_error; 1130 + } 1131 + 1132 + static int git_mailinfo_config(const char *var, const char *value, void *mi_) 1133 + { 1134 + struct mailinfo *mi = mi_; 1135 + 1136 + if (!starts_with(var, "mailinfo.")) 1137 + return git_default_config(var, value, NULL); 1138 + if (!strcmp(var, "mailinfo.scissors")) { 1139 + mi->use_scissors = git_config_bool(var, value); 1140 + return 0; 1141 + } 1142 + /* perhaps others here */ 1143 + return 0; 1144 + } 1145 + 1146 + void setup_mailinfo(struct mailinfo *mi) 1147 + { 1148 + memset(mi, 0, sizeof(*mi)); 1149 + strbuf_init(&mi->name, 0); 1150 + strbuf_init(&mi->email, 0); 1151 + strbuf_init(&mi->charset, 0); 1152 + strbuf_init(&mi->log_message, 0); 1153 + strbuf_init(&mi->inbody_header_accum, 0); 1154 + mi->header_stage = 1; 1155 + mi->use_inbody_headers = 1; 1156 + mi->content_top = mi->content; 1157 + git_config(git_mailinfo_config, mi); 1158 + } 1159 + 1160 + void clear_mailinfo(struct mailinfo *mi) 1161 + { 1162 + int i; 1163 + 1164 + strbuf_release(&mi->name); 1165 + strbuf_release(&mi->email); 1166 + strbuf_release(&mi->charset); 1167 + strbuf_release(&mi->inbody_header_accum); 1168 + free(mi->message_id); 1169 + 1170 + if (mi->p_hdr_data) 1171 + for (i = 0; mi->p_hdr_data[i]; i++) 1172 + strbuf_release(mi->p_hdr_data[i]); 1173 + free(mi->p_hdr_data); 1174 + if (mi->s_hdr_data) 1175 + for (i = 0; mi->s_hdr_data[i]; i++) 1176 + strbuf_release(mi->s_hdr_data[i]); 1177 + free(mi->s_hdr_data); 1178 + 1179 + while (mi->content < mi->content_top) { 1180 + free(*(mi->content_top)); 1181 + mi->content_top--; 1182 + } 1183 + 1184 + strbuf_release(&mi->log_message); 1185 + }
+129
t/t4256/1/patch
··· 1 + From: A <author@example.com> 2 + Subject: [PATCH] mailinfo: support format=flowed 3 + Message-ID: <aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa@example.com> 4 + Date: Sat, 25 Aug 2018 22:04:50 +0200 5 + User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 6 + Thunderbird/60.0 7 + MIME-Version: 1.0 8 + Content-Type: text/plain; charset=utf-8; format=flowed 9 + Content-Language: en-US 10 + Content-Transfer-Encoding: 7bit 11 + 12 + --- 13 + mailinfo.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 14 + 1 file changed, 62 insertions(+), 2 deletions(-) 15 + 16 + diff --git a/mailinfo.c b/mailinfo.c 17 + index 3281a37d51..b395adbdf2 100644 18 + --- a/mailinfo.c 19 + +++ b/mailinfo.c 20 + @@ -237,11 +237,22 @@ static int slurp_attr(const char *line, const char 21 + *name, struct strbuf *attr) 22 + return 1; 23 + } 24 + 25 + +static int has_attr_value(const char *line, const char *name, const 26 + char *value) 27 + +{ 28 + + struct strbuf sb = STRBUF_INIT; 29 + + int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value); 30 + + strbuf_release(&sb); 31 + + return rc; 32 + +} 33 + + 34 + static void handle_content_type(struct mailinfo *mi, struct strbuf *line) 35 + { 36 + struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); 37 + strbuf_init(boundary, line->len); 38 + 39 + + mi->format_flowed = has_attr_value(line->buf, "format=", "flowed"); 40 + + mi->delsp = has_attr_value(line->buf, "delsp=", "yes"); 41 + + 42 + if (slurp_attr(line->buf, "boundary=", boundary)) { 43 + strbuf_insert(boundary, 0, "--", 2); 44 + if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) { 45 + @@ -964,6 +975,52 @@ static int handle_boundary(struct mailinfo *mi, 46 + struct strbuf *line) 47 + return 1; 48 + } 49 + 50 + +static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, 51 + + struct strbuf *prev) 52 + +{ 53 + + size_t len = line->len; 54 + + const char *rest; 55 + + 56 + + if (!mi->format_flowed) { 57 + + handle_filter(mi, line); 58 + + return; 59 + + } 60 + + 61 + + if (line->buf[len - 1] == '\n') { 62 + + len--; 63 + + if (len && line->buf[len - 1] == '\r') 64 + + len--; 65 + + } 66 + + 67 + + /* Keep signature separator as-is. */ 68 + + if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) { 69 + + if (prev->len) { 70 + + handle_filter(mi, prev); 71 + + strbuf_reset(prev); 72 + + } 73 + + handle_filter(mi, line); 74 + + return; 75 + + } 76 + + 77 + + /* Unstuff space-stuffed line. */ 78 + + if (len && line->buf[0] == ' ') { 79 + + strbuf_remove(line, 0, 1); 80 + + len--; 81 + + } 82 + + 83 + + /* Save flowed line for later, but without the soft line break. */ 84 + + if (len && line->buf[len - 1] == ' ') { 85 + + strbuf_add(prev, line->buf, len - !!mi->delsp); 86 + + return; 87 + + } 88 + + 89 + + /* Prepend any previous partial lines */ 90 + + strbuf_insert(line, 0, prev->buf, prev->len); 91 + + strbuf_reset(prev); 92 + + 93 + + handle_filter(mi, line); 94 + +} 95 + + 96 + static void handle_body(struct mailinfo *mi, struct strbuf *line) 97 + { 98 + struct strbuf prev = STRBUF_INIT; 99 + @@ -1012,7 +1069,7 @@ static void handle_body(struct mailinfo *mi, 100 + struct strbuf *line) 101 + strbuf_addbuf(&prev, sb); 102 + break; 103 + } 104 + - handle_filter(mi, sb); 105 + + handle_filter_flowed(mi, sb, &prev); 106 + } 107 + /* 108 + * The partial chunk is saved in "prev" and will be 109 + @@ -1022,13 +1079,16 @@ static void handle_body(struct mailinfo *mi, 110 + struct strbuf *line) 111 + break; 112 + } 113 + default: 114 + - handle_filter(mi, line); 115 + + handle_filter_flowed(mi, line, &prev); 116 + } 117 + 118 + if (mi->input_error) 119 + break; 120 + } while (!strbuf_getwholeline(line, mi->input, '\n')); 121 + 122 + + if (prev.len) 123 + + handle_filter(mi, &prev); 124 + + 125 + flush_inbody_header_accum(mi); 126 + 127 + handle_body_out: 128 + -- 129 + 2.18.0