Git fork

UTF-8: introduce i18n.logoutputencoding.

It is plausible for somebody to want to view the commit log in a
different encoding from i18n.commitencoding -- the project's
policy may be UTF-8 and the user may be using a commit message
hook to run iconv to conform to that policy (and either not have
i18n.commitencoding to default to UTF-8 or have it explicitly
set to UTF-8). Even then, Latin-1 may be more convenient for
the usual pager and the terminal the user uses.

The new variable i18n.logoutputencoding is used in preference to
i18n.commitencoding to decide what encoding to recode the log
output in when git-log and friends formats the commit log message.

Signed-off-by: Junio C Hamano <junkio@cox.net>

+160 -23
+4
Documentation/config.txt
··· 248 248 browser (and possibly at other places in the future or in other 249 249 porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'. 250 250 251 + i18n.logOutputEncoding:: 252 + Character encoding the commit messages are converted to when 253 + running `git-log` and friends. 254 + 251 255 log.showroot:: 252 256 If true, the initial commit will be shown as a big creation event. 253 257 This is equivalent to a diff against an empty tree.
+3 -1
builtin-commit-tree.c
··· 118 118 parents++; 119 119 } 120 120 121 - encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8"); 121 + /* Not having i18n.commitencoding is the same as having utf-8 */ 122 + encoding_is_utf8 = (!git_commit_encoding || 123 + !strcmp(git_commit_encoding, "utf-8")); 122 124 123 125 init_buffer(&buffer, &size); 124 126 add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
+2 -5
builtin-log.c
··· 33 33 const char *arg = argv[i]; 34 34 if (!strncmp(arg, "--encoding=", 11)) { 35 35 arg += 11; 36 - if (MAX_ENCODING_LENGTH <= strlen(arg)) 37 - die(" Value of output encoding '%s' too long", 38 - arg); 39 36 if (strcmp(arg, "none")) 40 - strcpy(git_commit_encoding, arg); 37 + git_log_output_encoding = strdup(arg); 41 38 else 42 - git_commit_encoding[0] = 0; 39 + git_log_output_encoding = ""; 43 40 } 44 41 else 45 42 die("unrecognized argument: %s", arg);
+2 -1
builtin-mailinfo.c
··· 806 806 if (!strcmp(argv[1], "-k")) 807 807 keep_subject = 1; 808 808 else if (!strcmp(argv[1], "-u")) 809 - metainfo_charset = git_commit_encoding; 809 + metainfo_charset = (git_commit_encoding 810 + ? git_commit_encoding : "utf-8"); 810 811 else if (!strncmp(argv[1], "--encoding=", 11)) 811 812 metainfo_charset = argv[1] + 11; 812 813 else
+2 -2
cache.h
··· 416 416 extern char git_default_email[MAX_GITNAME]; 417 417 extern char git_default_name[MAX_GITNAME]; 418 418 419 - #define MAX_ENCODING_LENGTH 64 420 - extern char git_commit_encoding[MAX_ENCODING_LENGTH]; 419 + extern char *git_commit_encoding; 420 + extern char *git_log_output_encoding; 421 421 422 422 extern int copy_fd(int ifd, int ofd); 423 423 extern void write_or_die(int fd, const void *buf, size_t count);
+15 -12
commit.c
··· 592 592 593 593 static char *logmsg_reencode(const struct commit *commit) 594 594 { 595 - char *encoding = get_header(commit, "encoding"); 595 + char *encoding; 596 596 char *out; 597 + char *output_encoding = (git_log_output_encoding 598 + ? git_log_output_encoding 599 + : git_commit_encoding); 597 600 598 - if (!encoding || !strcmp(encoding, git_commit_encoding)) 601 + if (!output_encoding) 599 602 return NULL; 600 - out = reencode_string(commit->buffer, git_commit_encoding, encoding); 603 + encoding = get_header(commit, "encoding"); 604 + if (!encoding || !strcmp(encoding, output_encoding)) { 605 + free(encoding); 606 + return NULL; 607 + } 608 + out = reencode_string(commit->buffer, output_encoding, encoding); 601 609 free(encoding); 602 610 if (!out) 603 611 return NULL; ··· 618 626 int parents_shown = 0; 619 627 const char *msg = commit->buffer; 620 628 int plain_non_ascii = 0; 621 - char *reencoded = NULL; 629 + char *reencoded = logmsg_reencode(commit); 622 630 623 - if (*git_commit_encoding) { 624 - reencoded = logmsg_reencode(commit); 625 - if (reencoded) { 626 - msg = reencoded; 627 - len = strlen(msg); 628 - } 629 - } 631 + if (reencoded) 632 + msg = reencoded; 630 633 631 634 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) 632 635 indent = 0; ··· 643 646 for (in_body = i = 0; (ch = msg[i]) && i < len; i++) { 644 647 if (!in_body) { 645 648 /* author could be non 7-bit ASCII but 646 - * the log may so; skip over the 649 + * the log may be so; skip over the 647 650 * header part first. 648 651 */ 649 652 if (ch == '\n' &&
+7 -1
config.c
··· 309 309 } 310 310 311 311 if (!strcmp(var, "i18n.commitencoding")) { 312 - strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding)); 312 + git_commit_encoding = strdup(value); 313 313 return 0; 314 314 } 315 + 316 + if (!strcmp(var, "i18n.logoutputencoding")) { 317 + git_log_output_encoding = strdup(value); 318 + return 0; 319 + } 320 + 315 321 316 322 if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) { 317 323 pager_use_color = git_config_bool(var,value);
+1
contrib/completion/git-completion.bash
··· 711 711 core.compression 712 712 core.legacyHeaders 713 713 i18n.commitEncoding 714 + i18n.logOutputEncoding 714 715 diff.color 715 716 color.diff 716 717 diff.renameLimit
+2 -1
environment.c
··· 18 18 int log_all_ref_updates; 19 19 int warn_ambiguous_refs = 1; 20 20 int repository_format_version; 21 - char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8"; 21 + char *git_commit_encoding; 22 + char *git_log_output_encoding; 22 23 int shared_repository = PERM_UMASK; 23 24 const char *apply_default_whitespace; 24 25 int zlib_compression_level = Z_DEFAULT_COMPRESSION;
+104
t/t3900-i18n-commit.sh
··· 1 + #!/bin/sh 2 + # 3 + # Copyright (c) 2006 Junio C Hamano 4 + # 5 + 6 + test_description='commit and log output encodings' 7 + 8 + . ./test-lib.sh 9 + 10 + compare_with () { 11 + git-show -s "$1" | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current && 12 + diff -u current "$2" 13 + } 14 + 15 + test_expect_success setup ' 16 + : >F && 17 + git-add F && 18 + T=$(git-write-tree) && 19 + C=$(git-commit-tree $T <../t3900/1-UTF-8.txt) && 20 + git-update-ref HEAD $C && 21 + git-tag C0 22 + ' 23 + 24 + test_expect_success 'no encoding header for base case' ' 25 + E=$(git-cat-file commit C0 | sed -ne "s/^encoding //p") && 26 + test z = "z$E" 27 + ' 28 + 29 + for H in ISO-8859-1 EUCJP ISO2022JP 30 + do 31 + test_expect_success "$H setup" ' 32 + git-repo-config i18n.commitencoding $H && 33 + git-checkout -b $H C0 && 34 + echo $H >F && 35 + git-commit -a -F ../t3900/$H.txt 36 + ' 37 + done 38 + 39 + for H in ISO-8859-1 EUCJP ISO2022JP 40 + do 41 + test_expect_success "check encoding header for $H" ' 42 + E=$(git-cat-file commit '$H' | sed -ne "s/^encoding //p") && 43 + test "z$E" = "z'$H'" 44 + ' 45 + done 46 + 47 + test_expect_success 'repo-config to remove customization' ' 48 + git-repo-config --unset-all i18n.commitencoding && 49 + if Z=$(git-repo-config --get-all i18n.commitencoding) 50 + then 51 + echo Oops, should have failed. 52 + false 53 + else 54 + test z = "z$Z" 55 + fi && 56 + git-repo-config i18n.commitencoding utf-8 57 + ' 58 + 59 + test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' ' 60 + compare_with ISO-8859-1 ../t3900/1-UTF-8.txt 61 + ' 62 + 63 + for H in EUCJP ISO2022JP 64 + do 65 + test_expect_success "$H should be shown in UTF-8 now" ' 66 + compare_with '$H' ../t3900/2-UTF-8.txt 67 + ' 68 + done 69 + 70 + test_expect_success 'repo-config to add customization' ' 71 + git-repo-config --unset-all i18n.commitencoding && 72 + if Z=$(git-repo-config --get-all i18n.commitencoding) 73 + then 74 + echo Oops, should have failed. 75 + false 76 + else 77 + test z = "z$Z" 78 + fi 79 + ' 80 + 81 + for H in ISO-8859-1 EUCJP ISO2022JP 82 + do 83 + test_expect_success "$H should be shown in itself now" ' 84 + git-repo-config i18n.commitencoding '$H' && 85 + compare_with '$H' ../t3900/'$H'.txt 86 + ' 87 + done 88 + 89 + test_expect_success 'repo-config to tweak customization' ' 90 + git-repo-config i18n.logoutputencoding utf-8 91 + ' 92 + 93 + test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' ' 94 + compare_with ISO-8859-1 ../t3900/1-UTF-8.txt 95 + ' 96 + 97 + for H in EUCJP ISO2022JP 98 + do 99 + test_expect_success "$H should be shown in UTF-8 now" ' 100 + compare_with '$H' ../t3900/2-UTF-8.txt 101 + ' 102 + done 103 + 104 + test_done
+3
t/t3900/1-UTF-8.txt
··· 1 + ÄËÑÏÖ 2 + 3 + Ábçdèfg
+4
t/t3900/2-UTF-8.txt
··· 1 + はれひほふ 2 + 3 + しているのが、いるので。 4 + 濱浜ほれぷりぽれまびぐりろへ。
+4
t/t3900/EUCJP.txt
··· 1 + �Ϥ��Ҥۤ� 2 + 3 + ���Ƥ����Τ��������Τǡ� 4 + ���ͤۤ��פ��ݤ��ޤӤ������ء�
+3
t/t3900/ISO-8859-1.txt
··· 1 + ����� 2 + 3 + �b�d�fg
+4
t/t3900/ISO2022JP.txt
··· 1 + $B$O$l$R$[$U(B 2 + 3 + $B$7$F$$$k$N$,!"$$$k$N$G!#(B 4 + $B_@IM$[$l$W$j$]$l$^$S$0$j$m$X!#(B