Git fork

Merge branch 'jk/url-decode'

* jk/url-decode:
decode file:// and ssh:// URLs
make url-related functions reusable

+153 -106
+1
Makefile
··· 627 627 LIB_OBJS += tree.o 628 628 LIB_OBJS += tree-walk.o 629 629 LIB_OBJS += unpack-trees.o 630 + LIB_OBJS += url.o 630 631 LIB_OBJS += usage.o 631 632 LIB_OBJS += userdiff.o 632 633 LIB_OBJS += utf8.o
+7 -1
connect.c
··· 5 5 #include "refs.h" 6 6 #include "run-command.h" 7 7 #include "remote.h" 8 + #include "url.h" 8 9 9 10 static char *server_capabilities; 10 11 ··· 450 451 struct child_process *git_connect(int fd[2], const char *url_orig, 451 452 const char *prog, int flags) 452 453 { 453 - char *url = xstrdup(url_orig); 454 + char *url; 454 455 char *host, *path; 455 456 char *end; 456 457 int c; ··· 465 466 * what happened to our children. 466 467 */ 467 468 signal(SIGCHLD, SIG_DFL); 469 + 470 + if (is_url(url_orig)) 471 + url = url_decode(url_orig); 472 + else 473 + url = xstrdup(url_orig); 468 474 469 475 host = strstr(url, "://"); 470 476 if (host) {
+3 -56
http-backend.c
··· 6 6 #include "exec_cmd.h" 7 7 #include "run-command.h" 8 8 #include "string-list.h" 9 + #include "url.h" 9 10 10 11 static const char content_type[] = "Content-Type"; 11 12 static const char content_length[] = "Content-Length"; ··· 25 26 { "receive-pack", "receivepack", -1 }, 26 27 }; 27 28 28 - static int decode_char(const char *q) 29 - { 30 - int i; 31 - unsigned char val = 0; 32 - for (i = 0; i < 2; i++) { 33 - unsigned char c = *q++; 34 - val <<= 4; 35 - if (c >= '0' && c <= '9') 36 - val += c - '0'; 37 - else if (c >= 'a' && c <= 'f') 38 - val += c - 'a' + 10; 39 - else if (c >= 'A' && c <= 'F') 40 - val += c - 'A' + 10; 41 - else 42 - return -1; 43 - } 44 - return val; 45 - } 46 - 47 - static char *decode_parameter(const char **query, int is_name) 48 - { 49 - const char *q = *query; 50 - struct strbuf out; 51 - 52 - strbuf_init(&out, 16); 53 - do { 54 - unsigned char c = *q; 55 - 56 - if (!c) 57 - break; 58 - if (c == '&' || (is_name && c == '=')) { 59 - q++; 60 - break; 61 - } 62 - 63 - if (c == '%') { 64 - int val = decode_char(q + 1); 65 - if (0 <= val) { 66 - strbuf_addch(&out, val); 67 - q += 3; 68 - continue; 69 - } 70 - } 71 - 72 - if (c == '+') 73 - strbuf_addch(&out, ' '); 74 - else 75 - strbuf_addch(&out, c); 76 - q++; 77 - } while (1); 78 - *query = q; 79 - return strbuf_detach(&out, NULL); 80 - } 81 - 82 29 static struct string_list *get_parameters(void) 83 30 { 84 31 if (!query_params) { ··· 86 33 87 34 query_params = xcalloc(1, sizeof(*query_params)); 88 35 while (query && *query) { 89 - char *name = decode_parameter(&query, 1); 90 - char *value = decode_parameter(&query, 0); 36 + char *name = url_decode_parameter_name(&query); 37 + char *value = url_decode_parameter_value(&query); 91 38 struct string_list_item *i; 92 39 93 40 i = string_list_lookup(name, query_params);
+12
t/t5601-clone.sh
··· 176 176 ) 177 177 ' 178 178 179 + test_expect_success 'respect url-encoding of file://' ' 180 + git init x+y && 181 + test_must_fail git clone "file://$PWD/x+y" xy-url && 182 + git clone "file://$PWD/x%2By" xy-url 183 + ' 184 + 185 + test_expect_success 'do not respect url-encoding of non-url path' ' 186 + git init x+y && 187 + test_must_fail git clone x%2By xy-regular && 188 + git clone x+y xy-regular 189 + ' 190 + 179 191 test_done
+2 -49
transport.c
··· 9 9 #include "dir.h" 10 10 #include "refs.h" 11 11 #include "branch.h" 12 + #include "url.h" 12 13 13 14 /* rsync support */ 14 15 ··· 871 872 return S_ISREG(buf.st_mode); 872 873 } 873 874 874 - static int isurlschemechar(int first_flag, int ch) 875 - { 876 - /* 877 - * The set of valid URL schemes, as per STD66 (RFC3986) is 878 - * '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check 879 - * of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version 880 - * of check used '[A-Za-z0-9]+' so not to break any remote 881 - * helpers. 882 - */ 883 - int alphanumeric, special; 884 - alphanumeric = ch > 0 && isalnum(ch); 885 - special = ch == '+' || ch == '-' || ch == '.'; 886 - return alphanumeric || (!first_flag && special); 887 - } 888 - 889 - static int is_url(const char *url) 890 - { 891 - const char *url2, *first_slash; 892 - 893 - if (!url) 894 - return 0; 895 - url2 = url; 896 - first_slash = strchr(url, '/'); 897 - 898 - /* Input with no slash at all or slash first can't be URL. */ 899 - if (!first_slash || first_slash == url) 900 - return 0; 901 - /* Character before must be : and next must be /. */ 902 - if (first_slash[-1] != ':' || first_slash[1] != '/') 903 - return 0; 904 - /* There must be something before the :// */ 905 - if (first_slash == url + 1) 906 - return 0; 907 - /* 908 - * Check all characters up to first slash - 1. Only alphanum 909 - * is allowed. 910 - */ 911 - url2 = url; 912 - while (url2 < first_slash - 1) { 913 - if (!isurlschemechar(url2 == url, (unsigned char)*url2)) 914 - return 0; 915 - url2++; 916 - } 917 - 918 - /* Valid enough. */ 919 - return 1; 920 - } 921 - 922 875 static int external_specification_len(const char *url) 923 876 { 924 877 return strchr(url, ':') - url; ··· 946 899 if (url) { 947 900 const char *p = url; 948 901 949 - while (isurlschemechar(p == url, *p)) 902 + while (is_urlschemechar(p == url, *p)) 950 903 p++; 951 904 if (!prefixcmp(p, "::")) 952 905 helper = xstrndup(url, p - url);
+118
url.c
··· 1 + #include "cache.h" 2 + 3 + int is_urlschemechar(int first_flag, int ch) 4 + { 5 + /* 6 + * The set of valid URL schemes, as per STD66 (RFC3986) is 7 + * '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check 8 + * of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version 9 + * of check used '[A-Za-z0-9]+' so not to break any remote 10 + * helpers. 11 + */ 12 + int alphanumeric, special; 13 + alphanumeric = ch > 0 && isalnum(ch); 14 + special = ch == '+' || ch == '-' || ch == '.'; 15 + return alphanumeric || (!first_flag && special); 16 + } 17 + 18 + int is_url(const char *url) 19 + { 20 + const char *url2, *first_slash; 21 + 22 + if (!url) 23 + return 0; 24 + url2 = url; 25 + first_slash = strchr(url, '/'); 26 + 27 + /* Input with no slash at all or slash first can't be URL. */ 28 + if (!first_slash || first_slash == url) 29 + return 0; 30 + /* Character before must be : and next must be /. */ 31 + if (first_slash[-1] != ':' || first_slash[1] != '/') 32 + return 0; 33 + /* There must be something before the :// */ 34 + if (first_slash == url + 1) 35 + return 0; 36 + /* 37 + * Check all characters up to first slash - 1. Only alphanum 38 + * is allowed. 39 + */ 40 + url2 = url; 41 + while (url2 < first_slash - 1) { 42 + if (!is_urlschemechar(url2 == url, (unsigned char)*url2)) 43 + return 0; 44 + url2++; 45 + } 46 + 47 + /* Valid enough. */ 48 + return 1; 49 + } 50 + 51 + static int url_decode_char(const char *q) 52 + { 53 + int i; 54 + unsigned char val = 0; 55 + for (i = 0; i < 2; i++) { 56 + unsigned char c = *q++; 57 + val <<= 4; 58 + if (c >= '0' && c <= '9') 59 + val += c - '0'; 60 + else if (c >= 'a' && c <= 'f') 61 + val += c - 'a' + 10; 62 + else if (c >= 'A' && c <= 'F') 63 + val += c - 'A' + 10; 64 + else 65 + return -1; 66 + } 67 + return val; 68 + } 69 + 70 + static char *url_decode_internal(const char **query, const char *stop_at) 71 + { 72 + const char *q = *query; 73 + struct strbuf out; 74 + 75 + strbuf_init(&out, 16); 76 + do { 77 + unsigned char c = *q; 78 + 79 + if (!c) 80 + break; 81 + if (stop_at && strchr(stop_at, c)) { 82 + q++; 83 + break; 84 + } 85 + 86 + if (c == '%') { 87 + int val = url_decode_char(q + 1); 88 + if (0 <= val) { 89 + strbuf_addch(&out, val); 90 + q += 3; 91 + continue; 92 + } 93 + } 94 + 95 + if (c == '+') 96 + strbuf_addch(&out, ' '); 97 + else 98 + strbuf_addch(&out, c); 99 + q++; 100 + } while (1); 101 + *query = q; 102 + return strbuf_detach(&out, NULL); 103 + } 104 + 105 + char *url_decode(const char *url) 106 + { 107 + return url_decode_internal(&url, NULL); 108 + } 109 + 110 + char *url_decode_parameter_name(const char **query) 111 + { 112 + return url_decode_internal(query, "&="); 113 + } 114 + 115 + char *url_decode_parameter_value(const char **query) 116 + { 117 + return url_decode_internal(query, "&"); 118 + }
+10
url.h
··· 1 + #ifndef URL_H 2 + #define URL_H 3 + 4 + extern int is_url(const char *url); 5 + extern int is_urlschemechar(int first_flag, int ch); 6 + extern char *url_decode(const char *url); 7 + extern char *url_decode_parameter_name(const char **query); 8 + extern char *url_decode_parameter_value(const char **query); 9 + 10 + #endif /* URL_H */