mailinfo.c at reftables-rust · freshlybakedca.ke/git

freshlybakedca.ke / git
fork atom
Git fork
fork atom
git / mailinfo.c
at reftables-rust 1319 lines 30 kB view raw
wrap content
Patrick Steinhardt config: move Git config parsing into "environment.c" 7mo ago
08b77586
   1#define DISABLE_SIGN_COMPARE_WARNINGS
   2
   3#include "git-compat-util.h"
   4#include "config.h"
   5#include "environment.h"
   6#include "gettext.h"
   7#include "hex-ll.h"
   8#include "utf8.h"
   9#include "strbuf.h"
  10#include "mailinfo.h"
  11
  12static void cleanup_space(struct strbuf *sb)
  13{
  14	size_t pos, cnt;
  15	for (pos = 0; pos < sb->len; pos++) {
  16		if (isspace(sb->buf[pos])) {
  17			sb->buf[pos] = ' ';
  18			for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
  19			strbuf_remove(sb, pos + 1, cnt);
  20		}
  21	}
  22}
  23
  24static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
  25{
  26	struct strbuf *src = name;
  27	if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
  28		src = email;
  29	else if (name == out)
  30		return;
  31	strbuf_reset(out);
  32	strbuf_addbuf(out, src);
  33}
  34
  35static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
  36{
  37	/* John Doe <johndoe> */
  38
  39	char *bra, *ket;
  40	/* This is fallback, so do not bother if we already have an
  41	 * e-mail address.
  42	 */
  43	if (mi->email.len)
  44		return;
  45
  46	bra = strchr(line->buf, '<');
  47	if (!bra)
  48		return;
  49	ket = strchr(bra, '>');
  50	if (!ket)
  51		return;
  52
  53	strbuf_reset(&mi->email);
  54	strbuf_add(&mi->email, bra + 1, ket - bra - 1);
  55
  56	strbuf_reset(&mi->name);
  57	strbuf_add(&mi->name, line->buf, bra - line->buf);
  58	strbuf_trim(&mi->name);
  59	get_sane_name(&mi->name, &mi->name, &mi->email);
  60}
  61
  62static const char *unquote_comment(struct strbuf *outbuf, const char *in)
  63{
  64	int take_next_literally = 0;
  65	int depth = 1;
  66
  67	strbuf_addch(outbuf, '(');
  68
  69	while (*in) {
  70		int c = *in++;
  71		if (take_next_literally == 1) {
  72			take_next_literally = 0;
  73		} else {
  74			switch (c) {
  75			case '\\':
  76				take_next_literally = 1;
  77				continue;
  78			case '(':
  79				strbuf_addch(outbuf, '(');
  80				depth++;
  81				continue;
  82			case ')':
  83				strbuf_addch(outbuf, ')');
  84				if (!--depth)
  85					return in;
  86				continue;
  87			}
  88		}
  89
  90		strbuf_addch(outbuf, c);
  91	}
  92
  93	return in;
  94}
  95
  96static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
  97{
  98	int take_next_literally = 0;
  99
 100	while (*in) {
 101		int c = *in++;
 102		if (take_next_literally == 1) {
 103			take_next_literally = 0;
 104		} else {
 105			switch (c) {
 106			case '\\':
 107				take_next_literally = 1;
 108				continue;
 109			case '"':
 110				return in;
 111			}
 112		}
 113
 114		strbuf_addch(outbuf, c);
 115	}
 116
 117	return in;
 118}
 119
 120static void unquote_quoted_pair(struct strbuf *line)
 121{
 122	struct strbuf outbuf;
 123	const char *in = line->buf;
 124	int c;
 125
 126	strbuf_init(&outbuf, line->len);
 127
 128	while ((c = *in++) != 0) {
 129		switch (c) {
 130		case '"':
 131			in = unquote_quoted_string(&outbuf, in);
 132			continue;
 133		case '(':
 134			in = unquote_comment(&outbuf, in);
 135			continue;
 136		}
 137
 138		strbuf_addch(&outbuf, c);
 139	}
 140
 141	strbuf_swap(&outbuf, line);
 142	strbuf_release(&outbuf);
 143
 144}
 145
 146static void handle_from(struct mailinfo *mi, const struct strbuf *from)
 147{
 148	char *at;
 149	size_t el;
 150	struct strbuf f;
 151
 152	strbuf_init(&f, from->len);
 153	strbuf_addbuf(&f, from);
 154
 155	unquote_quoted_pair(&f);
 156
 157	at = strchr(f.buf, '@');
 158	if (!at) {
 159		parse_bogus_from(mi, from);
 160		goto out;
 161	}
 162
 163	/*
 164	 * If we already have one email, don't take any confusing lines
 165	 */
 166	if (mi->email.len && strchr(at + 1, '@'))
 167		goto out;
 168
 169	/* Pick up the string around '@', possibly delimited with <>
 170	 * pair; that is the email part.
 171	 */
 172	while (at > f.buf) {
 173		char c = at[-1];
 174		if (isspace(c))
 175			break;
 176		if (c == '<') {
 177			at[-1] = ' ';
 178			break;
 179		}
 180		at--;
 181	}
 182	el = strcspn(at, " \n\t\r\v\f>");
 183	strbuf_reset(&mi->email);
 184	strbuf_add(&mi->email, at, el);
 185	strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
 186
 187	/* The remainder is name.  It could be
 188	 *
 189	 * - "John Doe <john.doe@xz>"			(a), or
 190	 * - "john.doe@xz (John Doe)"			(b), or
 191	 * - "John (zzz) Doe <john.doe@xz> (Comment)"	(c)
 192	 *
 193	 * but we have removed the email part, so
 194	 *
 195	 * - remove extra spaces which could stay after email (case 'c'), and
 196	 * - trim from both ends, possibly removing the () pair at the end
 197	 *   (cases 'a' and 'b').
 198	 */
 199	cleanup_space(&f);
 200	strbuf_trim(&f);
 201	if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
 202		strbuf_remove(&f, 0, 1);
 203		strbuf_setlen(&f, f.len - 1);
 204	}
 205
 206	get_sane_name(&mi->name, &f, &mi->email);
 207out:
 208	strbuf_release(&f);
 209}
 210
 211static void handle_header(struct strbuf **out, const struct strbuf *line)
 212{
 213	if (!*out) {
 214		*out = xmalloc(sizeof(struct strbuf));
 215		strbuf_init(*out, line->len);
 216	} else
 217		strbuf_reset(*out);
 218
 219	strbuf_addbuf(*out, line);
 220}
 221
 222/* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
 223 * to have enough heuristics to grok MIME encoded patches often found
 224 * on our mailing lists.  For example, we do not even treat header lines
 225 * case insensitively.
 226 */
 227
 228static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
 229{
 230	const char *ends, *ap = strcasestr(line, name);
 231	size_t sz;
 232
 233	strbuf_setlen(attr, 0);
 234	if (!ap)
 235		return 0;
 236	ap += strlen(name);
 237	if (*ap == '"') {
 238		ap++;
 239		ends = "\"";
 240	}
 241	else
 242		ends = "; \t";
 243	sz = strcspn(ap, ends);
 244	strbuf_add(attr, ap, sz);
 245	return 1;
 246}
 247
 248static int has_attr_value(const char *line, const char *name, const char *value)
 249{
 250	struct strbuf sb = STRBUF_INIT;
 251	int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
 252	strbuf_release(&sb);
 253	return rc;
 254}
 255
 256static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
 257{
 258	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
 259	strbuf_init(boundary, line->len);
 260
 261	mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
 262	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
 263
 264	if (slurp_attr(line->buf, "boundary=", boundary)) {
 265		strbuf_insertstr(boundary, 0, "--");
 266		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
 267			error("Too many boundaries to handle");
 268			mi->input_error = -1;
 269			mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
 270			strbuf_release(boundary);
 271			free(boundary);
 272			return;
 273		}
 274		*(mi->content_top) = boundary;
 275		boundary = NULL;
 276	}
 277	slurp_attr(line->buf, "charset=", &mi->charset);
 278
 279	if (boundary) {
 280		strbuf_release(boundary);
 281		free(boundary);
 282	}
 283}
 284
 285static void handle_content_transfer_encoding(struct mailinfo *mi,
 286					     const struct strbuf *line)
 287{
 288	if (strcasestr(line->buf, "base64"))
 289		mi->transfer_encoding = TE_BASE64;
 290	else if (strcasestr(line->buf, "quoted-printable"))
 291		mi->transfer_encoding = TE_QP;
 292	else
 293		mi->transfer_encoding = TE_DONTCARE;
 294}
 295
 296static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
 297{
 298	struct strbuf *content_top = *(mi->content_top);
 299
 300	return ((content_top->len <= line->len) &&
 301		!memcmp(line->buf, content_top->buf, content_top->len));
 302}
 303
 304static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
 305{
 306	size_t at = 0;
 307
 308	while (at < subject->len) {
 309		char *pos;
 310		size_t remove;
 311
 312		switch (subject->buf[at]) {
 313		case 'r': case 'R':
 314			if (subject->len <= at + 3)
 315				break;
 316			if ((subject->buf[at + 1] == 'e' ||
 317			     subject->buf[at + 1] == 'E') &&
 318			    subject->buf[at + 2] == ':') {
 319				strbuf_remove(subject, at, 3);
 320				continue;
 321			}
 322			at++;
 323			break;
 324		case ' ': case '\t': case ':':
 325			strbuf_remove(subject, at, 1);
 326			continue;
 327		case '[':
 328			pos = strchr(subject->buf + at, ']');
 329			if (!pos)
 330				break;
 331			remove = pos - (subject->buf + at) + 1;
 332			if (!mi->keep_non_patch_brackets_in_subject ||
 333			    (7 <= remove &&
 334			     memmem(subject->buf + at, remove, "PATCH", 5)))
 335				strbuf_remove(subject, at, remove);
 336			else {
 337				at += remove;
 338				/*
 339				 * If the input had a space after the ], keep
 340				 * it.  We don't bother with finding the end of
 341				 * the space, since we later normalize it
 342				 * anyway.
 343				 */
 344				if (isspace(subject->buf[at]))
 345					at += 1;
 346			}
 347			continue;
 348		}
 349		break;
 350	}
 351	strbuf_trim(subject);
 352}
 353
 354static const char * const header[] = {
 355	"From", "Subject", "Date",
 356};
 357
 358static inline int skip_header(const struct strbuf *line, const char *hdr,
 359			      const char **outval)
 360{
 361	const char *val;
 362	if (!skip_iprefix(line->buf, hdr, &val) ||
 363	    *val++ != ':')
 364		return 0;
 365	while (isspace(*val))
 366		val++;
 367	*outval = val;
 368	return 1;
 369}
 370
 371static int is_format_patch_separator(const char *line, int len)
 372{
 373	static const char SAMPLE[] =
 374		"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
 375	const char *cp;
 376
 377	if (len != strlen(SAMPLE))
 378		return 0;
 379	if (!skip_prefix(line, "From ", &cp))
 380		return 0;
 381	if (strspn(cp, "0123456789abcdef") != 40)
 382		return 0;
 383	cp += 40;
 384	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
 385}
 386
 387static int decode_q_segment(struct strbuf *out, const struct strbuf *q_seg,
 388			    int rfc2047)
 389{
 390	const char *in = q_seg->buf;
 391	int c;
 392	strbuf_grow(out, q_seg->len);
 393
 394	while ((c = *in++) != 0) {
 395		if (c == '=') {
 396			int ch, d = *in;
 397			if (d == '\n' || !d)
 398				break; /* drop trailing newline */
 399			ch = hex2chr(in);
 400			if (ch >= 0) {
 401				strbuf_addch(out, ch);
 402				in += 2;
 403				continue;
 404			}
 405			/* garbage -- fall through */
 406		}
 407		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
 408			c = 0x20;
 409		strbuf_addch(out, c);
 410	}
 411	return 0;
 412}
 413
 414static int decode_b_segment(struct strbuf *out, const struct strbuf *b_seg)
 415{
 416	/* Decode in..ep, possibly in-place to ot */
 417	int c, pos = 0, acc = 0;
 418	const char *in = b_seg->buf;
 419	strbuf_grow(out, b_seg->len);
 420
 421	while ((c = *in++) != 0) {
 422		if (c == '+')
 423			c = 62;
 424		else if (c == '/')
 425			c = 63;
 426		else if ('A' <= c && c <= 'Z')
 427			c -= 'A';
 428		else if ('a' <= c && c <= 'z')
 429			c -= 'a' - 26;
 430		else if ('0' <= c && c <= '9')
 431			c -= '0' - 52;
 432		else
 433			continue; /* garbage */
 434		switch (pos++) {
 435		case 0:
 436			acc = (c << 2);
 437			break;
 438		case 1:
 439			strbuf_addch(out, (acc | (c >> 4)));
 440			acc = (c & 15) << 4;
 441			break;
 442		case 2:
 443			strbuf_addch(out, (acc | (c >> 2)));
 444			acc = (c & 3) << 6;
 445			break;
 446		case 3:
 447			strbuf_addch(out, (acc | c));
 448			acc = pos = 0;
 449			break;
 450		}
 451	}
 452	return 0;
 453}
 454
 455static int convert_to_utf8(struct mailinfo *mi,
 456			   struct strbuf *line, const char *charset)
 457{
 458	char *out;
 459	size_t out_len;
 460
 461	if (!mi->metainfo_charset || !charset || !*charset)
 462		return 0;
 463
 464	if (same_encoding(mi->metainfo_charset, charset))
 465		return 0;
 466	out = reencode_string_len(line->buf, line->len,
 467				  mi->metainfo_charset, charset, &out_len);
 468	if (!out) {
 469		mi->input_error = -1;
 470		return error("cannot convert from %s to %s",
 471			     charset, mi->metainfo_charset);
 472	}
 473	strbuf_attach(line, out, out_len, out_len);
 474	return 0;
 475}
 476
 477static void decode_header(struct mailinfo *mi, struct strbuf *it)
 478{
 479	char *in, *ep, *cp;
 480	struct strbuf outbuf = STRBUF_INIT, dec = STRBUF_INIT;
 481	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
 482	int found_error = 1; /* pessimism */
 483
 484	in = it->buf;
 485	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
 486		int encoding;
 487		strbuf_reset(&charset_q);
 488		strbuf_reset(&piecebuf);
 489
 490		if (in != ep) {
 491			/*
 492			 * We are about to process an encoded-word
 493			 * that begins at ep, but there is something
 494			 * before the encoded word.
 495			 */
 496			char *scan;
 497			for (scan = in; scan < ep; scan++)
 498				if (!isspace(*scan))
 499					break;
 500
 501			if (scan != ep || in == it->buf) {
 502				/*
 503				 * We should not lose that "something",
 504				 * unless we have just processed an
 505				 * encoded-word, and there is only LWS
 506				 * before the one we are about to process.
 507				 */
 508				strbuf_add(&outbuf, in, ep - in);
 509			}
 510		}
 511		/* E.g.
 512		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
 513		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
 514		 */
 515		ep += 2;
 516
 517		if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
 518			goto release_return;
 519
 520		if (cp + 3 - it->buf > it->len)
 521			goto release_return;
 522		strbuf_add(&charset_q, ep, cp - ep);
 523
 524		encoding = cp[1];
 525		if (!encoding || cp[2] != '?')
 526			goto release_return;
 527		ep = strstr(cp + 3, "?=");
 528		if (!ep)
 529			goto release_return;
 530		strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
 531		switch (tolower(encoding)) {
 532		default:
 533			goto release_return;
 534		case 'b':
 535			if ((found_error = decode_b_segment(&dec, &piecebuf)))
 536				goto release_return;
 537			break;
 538		case 'q':
 539			if ((found_error = decode_q_segment(&dec, &piecebuf, 1)))
 540				goto release_return;
 541			break;
 542		}
 543		if (convert_to_utf8(mi, &dec, charset_q.buf))
 544			goto release_return;
 545
 546		strbuf_addbuf(&outbuf, &dec);
 547		strbuf_release(&dec);
 548		in = ep + 2;
 549	}
 550	strbuf_addstr(&outbuf, in);
 551	strbuf_reset(it);
 552	strbuf_addbuf(it, &outbuf);
 553	found_error = 0;
 554release_return:
 555	strbuf_release(&outbuf);
 556	strbuf_release(&charset_q);
 557	strbuf_release(&piecebuf);
 558	strbuf_release(&dec);
 559
 560	if (found_error)
 561		mi->input_error = -1;
 562}
 563
 564/*
 565 * Returns true if "line" contains a header matching "hdr", in which case "val"
 566 * will contain the value of the header with any RFC2047 B and Q encoding
 567 * unwrapped, and optionally normalize the meta information to utf8.
 568 */
 569static int parse_header(const struct strbuf *line,
 570			const char *hdr,
 571			struct mailinfo *mi,
 572			struct strbuf *val)
 573{
 574	const char *val_str;
 575
 576	if (!skip_header(line, hdr, &val_str))
 577		return 0;
 578	strbuf_addstr(val, val_str);
 579	decode_header(mi, val);
 580	return 1;
 581}
 582
 583static int check_header(struct mailinfo *mi,
 584			const struct strbuf *line,
 585			struct strbuf *hdr_data[], int overwrite)
 586{
 587	int i, ret = 0;
 588	struct strbuf sb = STRBUF_INIT;
 589
 590	/* search for the interesting parts */
 591	for (i = 0; i < ARRAY_SIZE(header); i++) {
 592		if ((!hdr_data[i] || overwrite) &&
 593		    parse_header(line, header[i], mi, &sb)) {
 594			handle_header(&hdr_data[i], &sb);
 595			ret = 1;
 596			goto check_header_out;
 597		}
 598	}
 599
 600	/* Content stuff */
 601	if (parse_header(line, "Content-Type", mi, &sb)) {
 602		handle_content_type(mi, &sb);
 603		ret = 1;
 604		goto check_header_out;
 605	}
 606	if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
 607		handle_content_transfer_encoding(mi, &sb);
 608		ret = 1;
 609		goto check_header_out;
 610	}
 611	if (parse_header(line, "Message-ID", mi, &sb)) {
 612		if (mi->add_message_id)
 613			mi->message_id = strbuf_detach(&sb, NULL);
 614		ret = 1;
 615		goto check_header_out;
 616	}
 617
 618check_header_out:
 619	strbuf_release(&sb);
 620	return ret;
 621}
 622
 623/*
 624 * Returns 1 if the given line or any line beginning with the given line is an
 625 * in-body header (that is, check_header will succeed when passed
 626 * mi->s_hdr_data).
 627 */
 628static int is_inbody_header(const struct mailinfo *mi,
 629			    const struct strbuf *line)
 630{
 631	int i;
 632	const char *val;
 633	for (i = 0; i < ARRAY_SIZE(header); i++)
 634		if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
 635			return 1;
 636	return 0;
 637}
 638
 639static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
 640{
 641	struct strbuf ret = STRBUF_INIT;
 642
 643	switch (mi->transfer_encoding) {
 644	case TE_QP:
 645		decode_q_segment(&ret, line, 0);
 646		break;
 647	case TE_BASE64:
 648		decode_b_segment(&ret, line);
 649		break;
 650	case TE_DONTCARE:
 651	default:
 652		return;
 653	}
 654	strbuf_reset(line);
 655	strbuf_addbuf(line, &ret);
 656	strbuf_release(&ret);
 657}
 658
 659static inline int patchbreak(const struct strbuf *line)
 660{
 661	size_t i;
 662
 663	/* Beginning of a "diff -" header? */
 664	if (starts_with(line->buf, "diff -"))
 665		return 1;
 666
 667	/* CVS "Index: " line? */
 668	if (starts_with(line->buf, "Index: "))
 669		return 1;
 670
 671	/*
 672	 * "--- <filename>" starts patches without headers
 673	 * "---<sp>*" is a manual separator
 674	 */
 675	if (line->len < 4)
 676		return 0;
 677
 678	if (starts_with(line->buf, "---")) {
 679		/* space followed by a filename? */
 680		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
 681			return 1;
 682		/* Just whitespace? */
 683		for (i = 3; i < line->len; i++) {
 684			unsigned char c = line->buf[i];
 685			if (c == '\n')
 686				return 1;
 687			if (!isspace(c))
 688				break;
 689		}
 690		return 0;
 691	}
 692	return 0;
 693}
 694
 695static int is_scissors_line(const char *line)
 696{
 697	const char *c;
 698	int scissors = 0, gap = 0;
 699	const char *first_nonblank = NULL, *last_nonblank = NULL;
 700	int visible, perforation = 0, in_perforation = 0;
 701
 702	for (c = line; *c; c++) {
 703		if (isspace(*c)) {
 704			if (in_perforation) {
 705				perforation++;
 706				gap++;
 707			}
 708			continue;
 709		}
 710		last_nonblank = c;
 711		if (!first_nonblank)
 712			first_nonblank = c;
 713		if (*c == '-') {
 714			in_perforation = 1;
 715			perforation++;
 716			continue;
 717		}
 718		if (starts_with(c, ">8") || starts_with(c, "8<") ||
 719		    starts_with(c, ">%") || starts_with(c, "%<")) {
 720			in_perforation = 1;
 721			perforation += 2;
 722			scissors += 2;
 723			c++;
 724			continue;
 725		}
 726		in_perforation = 0;
 727	}
 728
 729	/*
 730	 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
 731	 * Even though there can be arbitrary cruft on the same line
 732	 * (e.g. "cut here"), in order to avoid misidentification, the
 733	 * perforation must occupy more than a third of the visible
 734	 * width of the line, and dashes and scissors must occupy more
 735	 * than half of the perforation.
 736	 */
 737
 738	if (first_nonblank && last_nonblank)
 739		visible = last_nonblank - first_nonblank + 1;
 740	else
 741		visible = 0;
 742	return (scissors && 8 <= visible &&
 743		visible < perforation * 3 &&
 744		gap * 2 < perforation);
 745}
 746
 747static void flush_inbody_header_accum(struct mailinfo *mi)
 748{
 749	if (!mi->inbody_header_accum.len)
 750		return;
 751	if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
 752		BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
 753	strbuf_reset(&mi->inbody_header_accum);
 754}
 755
 756static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
 757{
 758	if (mi->inbody_header_accum.len &&
 759	    (line->buf[0] == ' ' || line->buf[0] == '\t')) {
 760		if (mi->use_scissors && is_scissors_line(line->buf)) {
 761			/*
 762			 * This is a scissors line; do not consider this line
 763			 * as a header continuation line.
 764			 */
 765			flush_inbody_header_accum(mi);
 766			return 0;
 767		}
 768		strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
 769		strbuf_addbuf(&mi->inbody_header_accum, line);
 770		return 1;
 771	}
 772
 773	flush_inbody_header_accum(mi);
 774
 775	if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
 776		return is_format_patch_separator(line->buf + 1, line->len - 1);
 777	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
 778		int i;
 779		for (i = 0; i < ARRAY_SIZE(header); i++)
 780			if (!strcmp("Subject", header[i])) {
 781				handle_header(&mi->s_hdr_data[i], line);
 782				return 1;
 783			}
 784		return 0;
 785	}
 786	if (is_inbody_header(mi, line)) {
 787		strbuf_addbuf(&mi->inbody_header_accum, line);
 788		return 1;
 789	}
 790	return 0;
 791}
 792
 793static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
 794{
 795	assert(!mi->filter_stage);
 796
 797	if (mi->header_stage) {
 798		if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
 799			if (mi->inbody_header_accum.len) {
 800				flush_inbody_header_accum(mi);
 801				mi->header_stage = 0;
 802			}
 803			return 0;
 804		}
 805	}
 806
 807	if (mi->use_inbody_headers && mi->header_stage) {
 808		mi->header_stage = check_inbody_header(mi, line);
 809		if (mi->header_stage)
 810			return 0;
 811	} else
 812		/* Only trim the first (blank) line of the commit message
 813		 * when ignoring in-body headers.
 814		 */
 815		mi->header_stage = 0;
 816
 817	/* normalize the log message to UTF-8. */
 818	if (convert_to_utf8(mi, line, mi->charset.buf))
 819		return 0; /* mi->input_error already set */
 820
 821	if (mi->use_scissors && is_scissors_line(line->buf)) {
 822		int i;
 823
 824		strbuf_setlen(&mi->log_message, 0);
 825		mi->header_stage = 1;
 826
 827		/*
 828		 * We may have already read "secondary headers"; purge
 829		 * them to give ourselves a clean restart.
 830		 */
 831		for (i = 0; i < ARRAY_SIZE(header); i++) {
 832			if (mi->s_hdr_data[i])
 833				strbuf_release(mi->s_hdr_data[i]);
 834			FREE_AND_NULL(mi->s_hdr_data[i]);
 835		}
 836		return 0;
 837	}
 838
 839	if (patchbreak(line)) {
 840		if (mi->message_id)
 841			strbuf_addf(&mi->log_message,
 842				    "Message-ID: %s\n", mi->message_id);
 843		return 1;
 844	}
 845
 846	strbuf_addbuf(&mi->log_message, line);
 847	return 0;
 848}
 849
 850static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
 851{
 852	fwrite(line->buf, 1, line->len, mi->patchfile);
 853	mi->patch_lines++;
 854}
 855
 856static void handle_filter(struct mailinfo *mi, struct strbuf *line)
 857{
 858	switch (mi->filter_stage) {
 859	case 0:
 860		if (!handle_commit_msg(mi, line))
 861			break;
 862		mi->filter_stage++;
 863		/* fallthrough */
 864	case 1:
 865		handle_patch(mi, line);
 866		break;
 867	}
 868}
 869
 870static int is_rfc2822_header(const struct strbuf *line)
 871{
 872	/*
 873	 * The section that defines the loosest possible
 874	 * field name is "3.6.8 Optional fields".
 875	 *
 876	 * optional-field = field-name ":" unstructured CRLF
 877	 * field-name = 1*ftext
 878	 * ftext = %d33-57 / %59-126
 879	 */
 880	int ch;
 881	char *cp = line->buf;
 882
 883	/* Count mbox From headers as headers */
 884	if (starts_with(cp, "From ") || starts_with(cp, ">From "))
 885		return 1;
 886
 887	while ((ch = *cp++)) {
 888		if (ch == ':')
 889			return 1;
 890		if ((33 <= ch && ch <= 57) ||
 891		    (59 <= ch && ch <= 126))
 892			continue;
 893		break;
 894	}
 895	return 0;
 896}
 897
 898static int read_one_header_line(struct strbuf *line, FILE *in)
 899{
 900	struct strbuf continuation = STRBUF_INIT;
 901
 902	/* Get the first part of the line. */
 903	if (strbuf_getline_lf(line, in))
 904		return 0;
 905
 906	/*
 907	 * Is it an empty line or not a valid rfc2822 header?
 908	 * If so, stop here, and return false ("not a header")
 909	 */
 910	strbuf_rtrim(line);
 911	if (!line->len || !is_rfc2822_header(line)) {
 912		/* Re-add the newline */
 913		strbuf_addch(line, '\n');
 914		return 0;
 915	}
 916
 917	/*
 918	 * Now we need to eat all the continuation lines..
 919	 * Yuck, 2822 header "folding"
 920	 */
 921	for (;;) {
 922		int peek;
 923
 924		peek = fgetc(in);
 925		if (peek == EOF)
 926			break;
 927		ungetc(peek, in);
 928		if (peek != ' ' && peek != '\t')
 929			break;
 930		if (strbuf_getline_lf(&continuation, in))
 931			break;
 932		continuation.buf[0] = ' ';
 933		strbuf_rtrim(&continuation);
 934		strbuf_addbuf(line, &continuation);
 935	}
 936	strbuf_release(&continuation);
 937
 938	return 1;
 939}
 940
 941static int find_boundary(struct mailinfo *mi, struct strbuf *line)
 942{
 943	while (!strbuf_getline_lf(line, mi->input)) {
 944		if (*(mi->content_top) && is_multipart_boundary(mi, line))
 945			return 1;
 946	}
 947	return 0;
 948}
 949
 950static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
 951{
 952	struct strbuf newline = STRBUF_INIT;
 953
 954	strbuf_addch(&newline, '\n');
 955again:
 956	if (line->len >= (*(mi->content_top))->len + 2 &&
 957	    !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
 958		/* we hit an end boundary */
 959		/* pop the current boundary off the stack */
 960		strbuf_release(*(mi->content_top));
 961		FREE_AND_NULL(*(mi->content_top));
 962
 963		/* technically won't happen as is_multipart_boundary()
 964		   will fail first.  But just in case..
 965		 */
 966		if (--mi->content_top < mi->content) {
 967			error("Detected mismatched boundaries, can't recover");
 968			mi->input_error = -1;
 969			mi->content_top = mi->content;
 970			strbuf_release(&newline);
 971			return 0;
 972		}
 973		handle_filter(mi, &newline);
 974		strbuf_release(&newline);
 975		if (mi->input_error)
 976			return 0;
 977
 978		/* skip to the next boundary */
 979		if (!find_boundary(mi, line))
 980			return 0;
 981		goto again;
 982	}
 983
 984	/* set some defaults */
 985	mi->transfer_encoding = TE_DONTCARE;
 986	strbuf_reset(&mi->charset);
 987
 988	/* slurp in this section's info */
 989	while (read_one_header_line(line, mi->input))
 990		check_header(mi, line, mi->p_hdr_data, 0);
 991
 992	strbuf_release(&newline);
 993	/* replenish line */
 994	if (strbuf_getline_lf(line, mi->input))
 995		return 0;
 996	strbuf_addch(line, '\n');
 997	return 1;
 998}
 999
1000static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
1001				 struct strbuf *prev)
1002{
1003	size_t len = line->len;
1004	const char *rest;
1005
1006	if (!mi->format_flowed) {
1007		if (len >= 2 &&
1008		    line->buf[len - 2] == '\r' &&
1009		    line->buf[len - 1] == '\n') {
1010			mi->have_quoted_cr = 1;
1011			if (mi->quoted_cr == quoted_cr_strip) {
1012				strbuf_setlen(line, len - 2);
1013				strbuf_addch(line, '\n');
1014				len--;
1015			}
1016		}
1017		handle_filter(mi, line);
1018		return;
1019	}
1020
1021	if (line->buf[len - 1] == '\n') {
1022		len--;
1023		if (len && line->buf[len - 1] == '\r')
1024			len--;
1025	}
1026
1027	/* Keep signature separator as-is. */
1028	if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1029		if (prev->len) {
1030			handle_filter(mi, prev);
1031			strbuf_reset(prev);
1032		}
1033		handle_filter(mi, line);
1034		return;
1035	}
1036
1037	/* Unstuff space-stuffed line. */
1038	if (len && line->buf[0] == ' ') {
1039		strbuf_remove(line, 0, 1);
1040		len--;
1041	}
1042
1043	/* Save flowed line for later, but without the soft line break. */
1044	if (len && line->buf[len - 1] == ' ') {
1045		strbuf_add(prev, line->buf, len - !!mi->delsp);
1046		return;
1047	}
1048
1049	/* Prepend any previous partial lines */
1050	strbuf_insert(line, 0, prev->buf, prev->len);
1051	strbuf_reset(prev);
1052
1053	handle_filter(mi, line);
1054}
1055
1056static void summarize_quoted_cr(struct mailinfo *mi)
1057{
1058	if (mi->have_quoted_cr &&
1059	    mi->quoted_cr == quoted_cr_warn)
1060		warning(_("quoted CRLF detected"));
1061}
1062
1063static void handle_body(struct mailinfo *mi, struct strbuf *line)
1064{
1065	struct strbuf prev = STRBUF_INIT;
1066
1067	/* Skip up to the first boundary */
1068	if (*(mi->content_top)) {
1069		if (!find_boundary(mi, line))
1070			goto handle_body_out;
1071	}
1072
1073	do {
1074		/* process any boundary lines */
1075		if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1076			/* flush any leftover */
1077			if (prev.len) {
1078				handle_filter(mi, &prev);
1079				strbuf_reset(&prev);
1080			}
1081			summarize_quoted_cr(mi);
1082			mi->have_quoted_cr = 0;
1083			if (!handle_boundary(mi, line))
1084				goto handle_body_out;
1085		}
1086
1087		/* Unwrap transfer encoding */
1088		decode_transfer_encoding(mi, line);
1089
1090		switch (mi->transfer_encoding) {
1091		case TE_BASE64:
1092		case TE_QP:
1093		{
1094			struct strbuf **lines, **it, *sb;
1095
1096			/* Prepend any previous partial lines */
1097			strbuf_insert(line, 0, prev.buf, prev.len);
1098			strbuf_reset(&prev);
1099
1100			/*
1101			 * This is a decoded line that may contain
1102			 * multiple new lines.  Pass only one chunk
1103			 * at a time to handle_filter()
1104			 */
1105			lines = strbuf_split(line, '\n');
1106			for (it = lines; (sb = *it); it++) {
1107				if (!*(it + 1)) /* The last line */
1108					if (sb->buf[sb->len - 1] != '\n') {
1109						/* Partial line, save it for later. */
1110						strbuf_addbuf(&prev, sb);
1111						break;
1112					}
1113				handle_filter_flowed(mi, sb, &prev);
1114			}
1115			/*
1116			 * The partial chunk is saved in "prev" and will be
1117			 * appended by the next iteration of read_line_with_nul().
1118			 */
1119			strbuf_list_free(lines);
1120			break;
1121		}
1122		default:
1123			handle_filter_flowed(mi, line, &prev);
1124		}
1125
1126		if (mi->input_error)
1127			break;
1128	} while (!strbuf_getwholeline(line, mi->input, '\n'));
1129
1130	if (prev.len)
1131		handle_filter(mi, &prev);
1132	summarize_quoted_cr(mi);
1133
1134	flush_inbody_header_accum(mi);
1135
1136handle_body_out:
1137	strbuf_release(&prev);
1138}
1139
1140static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1141{
1142	const char *sp = data->buf;
1143	while (1) {
1144		char *ep = strchr(sp, '\n');
1145		int len;
1146		if (!ep)
1147			len = strlen(sp);
1148		else
1149			len = ep - sp;
1150		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1151		if (!ep)
1152			break;
1153		sp = ep + 1;
1154	}
1155}
1156
1157static void handle_info(struct mailinfo *mi)
1158{
1159	struct strbuf *hdr;
1160	int i;
1161
1162	for (i = 0; i < ARRAY_SIZE(header); i++) {
1163		/* only print inbody headers if we output a patch file */
1164		if (mi->patch_lines && mi->s_hdr_data[i])
1165			hdr = mi->s_hdr_data[i];
1166		else if (mi->p_hdr_data[i])
1167			hdr = mi->p_hdr_data[i];
1168		else
1169			continue;
1170
1171		if (memchr(hdr->buf, '\0', hdr->len)) {
1172			error("a NUL byte in '%s' is not allowed.", header[i]);
1173			mi->input_error = -1;
1174		}
1175
1176		if (!strcmp(header[i], "Subject")) {
1177			if (!mi->keep_subject) {
1178				cleanup_subject(mi, hdr);
1179				cleanup_space(hdr);
1180			}
1181			output_header_lines(mi->output, "Subject", hdr);
1182		} else if (!strcmp(header[i], "From")) {
1183			cleanup_space(hdr);
1184			handle_from(mi, hdr);
1185			fprintf(mi->output, "Author: %s\n", mi->name.buf);
1186			fprintf(mi->output, "Email: %s\n", mi->email.buf);
1187		} else {
1188			cleanup_space(hdr);
1189			fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1190		}
1191	}
1192	fprintf(mi->output, "\n");
1193}
1194
1195int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1196{
1197	FILE *cmitmsg;
1198	int peek;
1199	struct strbuf line = STRBUF_INIT;
1200
1201	cmitmsg = fopen(msg, "w");
1202	if (!cmitmsg) {
1203		perror(msg);
1204		return -1;
1205	}
1206	mi->patchfile = fopen(patch, "w");
1207	if (!mi->patchfile) {
1208		perror(patch);
1209		fclose(cmitmsg);
1210		return -1;
1211	}
1212
1213	mi->p_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->p_hdr_data)));
1214	mi->s_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->s_hdr_data)));
1215
1216	do {
1217		peek = fgetc(mi->input);
1218		if (peek == EOF) {
1219			fclose(cmitmsg);
1220			return error("empty patch: '%s'", patch);
1221		}
1222	} while (isspace(peek));
1223	ungetc(peek, mi->input);
1224
1225	/* process the email header */
1226	while (read_one_header_line(&line, mi->input))
1227		check_header(mi, &line, mi->p_hdr_data, 1);
1228
1229	handle_body(mi, &line);
1230	fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1231	fclose(cmitmsg);
1232	fclose(mi->patchfile);
1233
1234	handle_info(mi);
1235	strbuf_release(&line);
1236	return mi->input_error;
1237}
1238
1239int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
1240{
1241	if (!strcmp(actionstr, "nowarn"))
1242		*action = quoted_cr_nowarn;
1243	else if (!strcmp(actionstr, "warn"))
1244		*action = quoted_cr_warn;
1245	else if (!strcmp(actionstr, "strip"))
1246		*action = quoted_cr_strip;
1247	else
1248		return -1;
1249	return 0;
1250}
1251
1252static int git_mailinfo_config(const char *var, const char *value,
1253			       const struct config_context *ctx, void *mi_)
1254{
1255	struct mailinfo *mi = mi_;
1256
1257	if (!starts_with(var, "mailinfo."))
1258		return git_default_config(var, value, ctx, NULL);
1259	if (!strcmp(var, "mailinfo.scissors")) {
1260		mi->use_scissors = git_config_bool(var, value);
1261		return 0;
1262	}
1263	if (!strcmp(var, "mailinfo.quotedcr")) {
1264		if (!value)
1265			return config_error_nonbool(var);
1266		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
1267			return error(_("bad action '%s' for '%s'"), value, var);
1268		return 0;
1269	}
1270	/* perhaps others here */
1271	return 0;
1272}
1273
1274void setup_mailinfo(struct repository *r, struct mailinfo *mi)
1275{
1276	memset(mi, 0, sizeof(*mi));
1277	strbuf_init(&mi->name, 0);
1278	strbuf_init(&mi->email, 0);
1279	strbuf_init(&mi->charset, 0);
1280	strbuf_init(&mi->log_message, 0);
1281	strbuf_init(&mi->inbody_header_accum, 0);
1282	mi->quoted_cr = quoted_cr_warn;
1283	mi->header_stage = 1;
1284	mi->use_inbody_headers = 1;
1285	mi->content_top = mi->content;
1286	repo_config(r, git_mailinfo_config, mi);
1287}
1288
1289void clear_mailinfo(struct mailinfo *mi)
1290{
1291	strbuf_release(&mi->name);
1292	strbuf_release(&mi->email);
1293	strbuf_release(&mi->charset);
1294	strbuf_release(&mi->inbody_header_accum);
1295	free(mi->message_id);
1296
1297	for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1298		if (!mi->p_hdr_data[i])
1299			continue;
1300		strbuf_release(mi->p_hdr_data[i]);
1301		free(mi->p_hdr_data[i]);
1302	}
1303	free(mi->p_hdr_data);
1304
1305	for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1306		if (!mi->s_hdr_data[i])
1307			continue;
1308		strbuf_release(mi->s_hdr_data[i]);
1309		free(mi->s_hdr_data[i]);
1310	}
1311	free(mi->s_hdr_data);
1312
1313	while (mi->content < mi->content_top) {
1314		free(*(mi->content_top));
1315		mi->content_top--;
1316	}
1317
1318	strbuf_release(&mi->log_message);
1319}