Git fork
at reftables-rust 826 lines 21 kB view raw
1#define DISABLE_SIGN_COMPARE_WARNINGS 2 3#include "git-compat-util.h" 4#include "strbuf.h" 5#include "utf8.h" 6 7/* This code is originally from https://www.cl.cam.ac.uk/~mgk25/ucs/ */ 8 9static const char utf16_be_bom[] = {'\xFE', '\xFF'}; 10static const char utf16_le_bom[] = {'\xFF', '\xFE'}; 11static const char utf32_be_bom[] = {'\0', '\0', '\xFE', '\xFF'}; 12static const char utf32_le_bom[] = {'\xFF', '\xFE', '\0', '\0'}; 13 14struct interval { 15 ucs_char_t first; 16 ucs_char_t last; 17}; 18 19size_t display_mode_esc_sequence_len(const char *s) 20{ 21 const char *p = s; 22 if (*p++ != '\033') 23 return 0; 24 if (*p++ != '[') 25 return 0; 26 while (isdigit(*p) || *p == ';') 27 p++; 28 if (*p++ != 'm') 29 return 0; 30 return p - s; 31} 32 33/* auxiliary function for binary search in interval table */ 34static int bisearch(ucs_char_t ucs, const struct interval *table, int max) 35{ 36 int min = 0; 37 int mid; 38 39 if (ucs < table[0].first || ucs > table[max].last) 40 return 0; 41 while (max >= min) { 42 mid = min + (max - min) / 2; 43 if (ucs > table[mid].last) 44 min = mid + 1; 45 else if (ucs < table[mid].first) 46 max = mid - 1; 47 else 48 return 1; 49 } 50 51 return 0; 52} 53 54/* The following two functions define the column width of an ISO 10646 55 * character as follows: 56 * 57 * - The null character (U+0000) has a column width of 0. 58 * 59 * - Other C0/C1 control characters and DEL will lead to a return 60 * value of -1. 61 * 62 * - Non-spacing and enclosing combining characters (general 63 * category code Mn or Me in the Unicode database) have a 64 * column width of 0. 65 * 66 * - SOFT HYPHEN (U+00AD) has a column width of 1. 67 * 68 * - Other format characters (general category code Cf in the Unicode 69 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. 70 * 71 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) 72 * have a column width of 0. 73 * 74 * - Spacing characters in the East Asian Wide (W) or East Asian 75 * Full-width (F) category as defined in Unicode Technical 76 * Report #11 have a column width of 2. 77 * 78 * - All remaining characters (including all printable 79 * ISO 8859-1 and WGL4 characters, Unicode control characters, 80 * etc.) have a column width of 1. 81 * 82 * This implementation assumes that ucs_char_t characters are encoded 83 * in ISO 10646. 84 */ 85 86static int git_wcwidth(ucs_char_t ch) 87{ 88 /* 89 * Sorted list of non-overlapping intervals of non-spacing characters, 90 */ 91#include "unicode-width.h" 92 93 /* test for 8-bit control characters */ 94 if (ch == 0) 95 return 0; 96 if (ch < 32 || (ch >= 0x7f && ch < 0xa0)) 97 return -1; 98 99 /* binary search in table of non-spacing characters */ 100 if (bisearch(ch, zero_width, ARRAY_SIZE(zero_width) - 1)) 101 return 0; 102 103 /* binary search in table of double width characters */ 104 if (bisearch(ch, double_width, ARRAY_SIZE(double_width) - 1)) 105 return 2; 106 107 return 1; 108} 109 110/* 111 * Pick one ucs character starting from the location *start points at, 112 * and return it, while updating the *start pointer to point at the 113 * end of that character. When remainder_p is not NULL, the location 114 * holds the number of bytes remaining in the string that we are allowed 115 * to pick from. Otherwise we are allowed to pick up to the NUL that 116 * would eventually appear in the string. *remainder_p is also reduced 117 * by the number of bytes we have consumed. 118 * 119 * If the string was not a valid UTF-8, *start pointer is set to NULL 120 * and the return value is undefined. 121 */ 122static ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) 123{ 124 unsigned char *s = (unsigned char *)*start; 125 ucs_char_t ch; 126 size_t remainder, incr; 127 128 /* 129 * A caller that assumes NUL terminated text can choose 130 * not to bother with the remainder length. We will 131 * stop at the first NUL. 132 */ 133 remainder = (remainder_p ? *remainder_p : 999); 134 135 if (remainder < 1) { 136 goto invalid; 137 } else if (*s < 0x80) { 138 /* 0xxxxxxx */ 139 ch = *s; 140 incr = 1; 141 } else if ((s[0] & 0xe0) == 0xc0) { 142 /* 110XXXXx 10xxxxxx */ 143 if (remainder < 2 || 144 (s[1] & 0xc0) != 0x80 || 145 (s[0] & 0xfe) == 0xc0) 146 goto invalid; 147 ch = ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); 148 incr = 2; 149 } else if ((s[0] & 0xf0) == 0xe0) { 150 /* 1110XXXX 10Xxxxxx 10xxxxxx */ 151 if (remainder < 3 || 152 (s[1] & 0xc0) != 0x80 || 153 (s[2] & 0xc0) != 0x80 || 154 /* overlong? */ 155 (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || 156 /* surrogate? */ 157 (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || 158 /* U+FFFE or U+FFFF? */ 159 (s[0] == 0xef && s[1] == 0xbf && 160 (s[2] & 0xfe) == 0xbe)) 161 goto invalid; 162 ch = ((s[0] & 0x0f) << 12) | 163 ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); 164 incr = 3; 165 } else if ((s[0] & 0xf8) == 0xf0) { 166 /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ 167 if (remainder < 4 || 168 (s[1] & 0xc0) != 0x80 || 169 (s[2] & 0xc0) != 0x80 || 170 (s[3] & 0xc0) != 0x80 || 171 /* overlong? */ 172 (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || 173 /* > U+10FFFF? */ 174 (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) 175 goto invalid; 176 ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3f) << 12) | 177 ((s[2] & 0x3f) << 6) | (s[3] & 0x3f); 178 incr = 4; 179 } else { 180invalid: 181 *start = NULL; 182 return 0; 183 } 184 185 *start += incr; 186 if (remainder_p) 187 *remainder_p = remainder - incr; 188 return ch; 189} 190 191/* 192 * This function returns the number of columns occupied by the character 193 * pointed to by the variable start. The pointer is updated to point at 194 * the next character. When remainder_p is not NULL, it points at the 195 * location that stores the number of remaining bytes we can use to pick 196 * a character (see pick_one_utf8_char() above). 197 */ 198int utf8_width(const char **start, size_t *remainder_p) 199{ 200 ucs_char_t ch = pick_one_utf8_char(start, remainder_p); 201 if (!*start) 202 return 0; 203 return git_wcwidth(ch); 204} 205 206/* 207 * Returns the total number of columns required by a null-terminated 208 * string, assuming that the string is utf8. Returns strlen() instead 209 * if the string does not look like a valid utf8 string. 210 */ 211int utf8_strnwidth(const char *string, size_t len, int skip_ansi) 212{ 213 const char *orig = string; 214 size_t width = 0; 215 216 while (string && string < orig + len) { 217 int glyph_width; 218 size_t skip; 219 220 while (skip_ansi && 221 (skip = display_mode_esc_sequence_len(string)) != 0) 222 string += skip; 223 224 glyph_width = utf8_width(&string, NULL); 225 if (glyph_width > 0) 226 width += glyph_width; 227 } 228 229 /* 230 * TODO: fix the interface of this function and `utf8_strwidth()` to 231 * return `size_t` instead of `int`. 232 */ 233 return cast_size_t_to_int(string ? width : len); 234} 235 236int utf8_strwidth(const char *string) 237{ 238 return utf8_strnwidth(string, strlen(string), 0); 239} 240 241int is_utf8(const char *text) 242{ 243 while (*text) { 244 if (*text == '\n' || *text == '\t' || *text == '\r') { 245 text++; 246 continue; 247 } 248 utf8_width(&text, NULL); 249 if (!text) 250 return 0; 251 } 252 return 1; 253} 254 255static void strbuf_add_indented_text(struct strbuf *buf, const char *text, 256 int indent, int indent2) 257{ 258 if (indent < 0) 259 indent = 0; 260 while (*text) { 261 const char *eol = strchrnul(text, '\n'); 262 if (*eol == '\n') 263 eol++; 264 strbuf_addchars(buf, ' ', indent); 265 strbuf_add(buf, text, eol - text); 266 text = eol; 267 indent = indent2; 268 } 269} 270 271/* 272 * Wrap the text, if necessary. The variable indent is the indent for the 273 * first line, indent2 is the indent for all other lines. 274 * If indent is negative, assume that already -indent columns have been 275 * consumed (and no extra indent is necessary for the first line). 276 */ 277void strbuf_add_wrapped_text(struct strbuf *buf, 278 const char *text, int indent1, int indent2, int width) 279{ 280 int indent, w, assume_utf8 = 1; 281 const char *bol, *space, *start = text; 282 size_t orig_len = buf->len; 283 284 if (width <= 0) { 285 strbuf_add_indented_text(buf, text, indent1, indent2); 286 return; 287 } 288 289retry: 290 bol = text; 291 w = indent = indent1; 292 space = NULL; 293 if (indent < 0) { 294 w = -indent; 295 space = text; 296 } 297 298 for (;;) { 299 char c; 300 size_t skip; 301 302 while ((skip = display_mode_esc_sequence_len(text))) 303 text += skip; 304 305 c = *text; 306 if (!c || isspace(c)) { 307 if (w <= width || !space) { 308 const char *start = bol; 309 if (!c && text == start) 310 return; 311 if (space) 312 start = space; 313 else 314 strbuf_addchars(buf, ' ', indent); 315 strbuf_add(buf, start, text - start); 316 if (!c) 317 return; 318 space = text; 319 if (c == '\t') 320 w |= 0x07; 321 else if (c == '\n') { 322 space++; 323 if (*space == '\n') { 324 strbuf_addch(buf, '\n'); 325 goto new_line; 326 } 327 else if (!isalnum(*space)) 328 goto new_line; 329 else 330 strbuf_addch(buf, ' '); 331 } 332 w++; 333 text++; 334 } 335 else { 336new_line: 337 strbuf_addch(buf, '\n'); 338 text = bol = space + isspace(*space); 339 space = NULL; 340 w = indent = indent2; 341 } 342 continue; 343 } 344 if (assume_utf8) { 345 w += utf8_width(&text, NULL); 346 if (!text) { 347 assume_utf8 = 0; 348 text = start; 349 strbuf_setlen(buf, orig_len); 350 goto retry; 351 } 352 } else { 353 w++; 354 text++; 355 } 356 } 357} 358 359void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, 360 int indent, int indent2, int width) 361{ 362 char *tmp = xstrndup(data, len); 363 strbuf_add_wrapped_text(buf, tmp, indent, indent2, width); 364 free(tmp); 365} 366 367void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, 368 const char *subst) 369{ 370 const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len; 371 struct strbuf dst; 372 int w = 0; 373 374 strbuf_init(&dst, sb_src->len); 375 376 while (src < end) { 377 const char *old; 378 int glyph_width; 379 size_t n; 380 381 while ((n = display_mode_esc_sequence_len(src))) { 382 strbuf_add(&dst, src, n); 383 src += n; 384 } 385 386 if (src >= end) 387 break; 388 389 old = src; 390 glyph_width = utf8_width((const char**)&src, NULL); 391 if (!src) /* broken utf-8, do nothing */ 392 goto out; 393 394 /* 395 * In case we see a control character we copy it into the 396 * buffer, but don't add it to the width. 397 */ 398 if (glyph_width < 0) 399 glyph_width = 0; 400 401 if (glyph_width && w >= pos && w < pos + width) { 402 if (subst) { 403 strbuf_addstr(&dst, subst); 404 subst = NULL; 405 } 406 } else { 407 strbuf_add(&dst, old, src - old); 408 } 409 410 w += glyph_width; 411 } 412 413 strbuf_swap(sb_src, &dst); 414out: 415 strbuf_release(&dst); 416} 417 418/* 419 * Returns true (1) if the src encoding name matches the dst encoding 420 * name directly or one of its alternative names. E.g. UTF-16BE is the 421 * same as UTF16BE. 422 */ 423static int same_utf_encoding(const char *src, const char *dst) 424{ 425 if (skip_iprefix(src, "utf", &src) && skip_iprefix(dst, "utf", &dst)) { 426 skip_prefix(src, "-", &src); 427 skip_prefix(dst, "-", &dst); 428 return !strcasecmp(src, dst); 429 } 430 return 0; 431} 432 433int is_encoding_utf8(const char *name) 434{ 435 if (!name) 436 return 1; 437 if (same_utf_encoding("utf-8", name)) 438 return 1; 439 return 0; 440} 441 442int same_encoding(const char *src, const char *dst) 443{ 444 static const char utf8[] = "UTF-8"; 445 446 if (!src) 447 src = utf8; 448 if (!dst) 449 dst = utf8; 450 if (same_utf_encoding(src, dst)) 451 return 1; 452 return !strcasecmp(src, dst); 453} 454 455/* 456 * Wrapper for fprintf and returns the total number of columns required 457 * for the printed string, assuming that the string is utf8. 458 */ 459int utf8_fprintf(FILE *stream, const char *format, ...) 460{ 461 struct strbuf buf = STRBUF_INIT; 462 va_list arg; 463 int columns; 464 465 va_start(arg, format); 466 strbuf_vaddf(&buf, format, arg); 467 va_end(arg); 468 469 columns = fputs(buf.buf, stream); 470 if (0 <= columns) /* keep the error from the I/O */ 471 columns = utf8_strwidth(buf.buf); 472 strbuf_release(&buf); 473 return columns; 474} 475 476/* 477 * Given a buffer and its encoding, return it re-encoded 478 * with iconv. If the conversion fails, returns NULL. 479 */ 480#ifndef NO_ICONV 481#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) 482 typedef const char * iconv_ibp; 483#else 484 typedef char * iconv_ibp; 485#endif 486char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, 487 size_t bom_len, size_t *outsz_p) 488{ 489 size_t outsz, outalloc; 490 char *out, *outpos; 491 iconv_ibp cp; 492 493 outsz = insz; 494 outalloc = st_add(outsz, 1 + bom_len); /* for terminating NUL */ 495 out = xmalloc(outalloc); 496 outpos = out + bom_len; 497 cp = (iconv_ibp)in; 498 499 while (1) { 500 size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz); 501 502 if (cnt == (size_t) -1) { 503 size_t sofar; 504 if (errno != E2BIG) { 505 free(out); 506 return NULL; 507 } 508 /* insz has remaining number of bytes. 509 * since we started outsz the same as insz, 510 * it is likely that insz is not enough for 511 * converting the rest. 512 */ 513 sofar = outpos - out; 514 outalloc = st_add3(sofar, st_mult(insz, 2), 32); 515 out = xrealloc(out, outalloc); 516 outpos = out + sofar; 517 outsz = outalloc - sofar - 1; 518 } 519 else { 520 *outpos = '\0'; 521 if (outsz_p) 522 *outsz_p = outpos - out; 523 break; 524 } 525 } 526 return out; 527} 528 529static const char *fallback_encoding(const char *name) 530{ 531 /* 532 * Some platforms do not have the variously spelled variants of 533 * UTF-8, so let's fall back to trying the most official 534 * spelling. We do so only as a fallback in case the platform 535 * does understand the user's spelling, but not our official 536 * one. 537 */ 538 if (is_encoding_utf8(name)) 539 return "UTF-8"; 540 541 /* 542 * Even though latin-1 is still seen in e-mail 543 * headers, some platforms only install ISO-8859-1. 544 */ 545 if (!strcasecmp(name, "latin-1")) 546 return "ISO-8859-1"; 547 548 return name; 549} 550 551char *reencode_string_len(const char *in, size_t insz, 552 const char *out_encoding, const char *in_encoding, 553 size_t *outsz) 554{ 555 iconv_t conv; 556 char *out; 557 const char *bom_str = NULL; 558 size_t bom_len = 0; 559 560 if (!in_encoding) 561 return NULL; 562 563 /* UTF-16LE-BOM is the same as UTF-16 for reading */ 564 if (same_utf_encoding("UTF-16LE-BOM", in_encoding)) 565 in_encoding = "UTF-16"; 566 567 /* 568 * For writing, UTF-16 iconv typically creates "UTF-16BE-BOM" 569 * Some users under Windows want the little endian version 570 * 571 * We handle UTF-16 and UTF-32 ourselves only if the platform does not 572 * provide a BOM (which we require), since we want to match the behavior 573 * of the system tools and libc as much as possible. 574 */ 575 if (same_utf_encoding("UTF-16LE-BOM", out_encoding)) { 576 bom_str = utf16_le_bom; 577 bom_len = sizeof(utf16_le_bom); 578 out_encoding = "UTF-16LE"; 579 } else if (same_utf_encoding("UTF-16BE-BOM", out_encoding)) { 580 bom_str = utf16_be_bom; 581 bom_len = sizeof(utf16_be_bom); 582 out_encoding = "UTF-16BE"; 583#ifdef ICONV_OMITS_BOM 584 } else if (same_utf_encoding("UTF-16", out_encoding)) { 585 bom_str = utf16_be_bom; 586 bom_len = sizeof(utf16_be_bom); 587 out_encoding = "UTF-16BE"; 588 } else if (same_utf_encoding("UTF-32", out_encoding)) { 589 bom_str = utf32_be_bom; 590 bom_len = sizeof(utf32_be_bom); 591 out_encoding = "UTF-32BE"; 592#endif 593 } 594 595 conv = iconv_open(out_encoding, in_encoding); 596 if (conv == (iconv_t) -1) { 597 in_encoding = fallback_encoding(in_encoding); 598 out_encoding = fallback_encoding(out_encoding); 599 600 conv = iconv_open(out_encoding, in_encoding); 601 if (conv == (iconv_t) -1) 602 return NULL; 603 } 604 out = reencode_string_iconv(in, insz, conv, bom_len, outsz); 605 iconv_close(conv); 606 if (out && bom_str && bom_len) 607 memcpy(out, bom_str, bom_len); 608 return out; 609} 610#endif 611 612static int has_bom_prefix(const char *data, size_t len, 613 const char *bom, size_t bom_len) 614{ 615 return data && bom && (len >= bom_len) && !memcmp(data, bom, bom_len); 616} 617 618int has_prohibited_utf_bom(const char *enc, const char *data, size_t len) 619{ 620 return ( 621 (same_utf_encoding("UTF-16BE", enc) || 622 same_utf_encoding("UTF-16LE", enc)) && 623 (has_bom_prefix(data, len, utf16_be_bom, sizeof(utf16_be_bom)) || 624 has_bom_prefix(data, len, utf16_le_bom, sizeof(utf16_le_bom))) 625 ) || ( 626 (same_utf_encoding("UTF-32BE", enc) || 627 same_utf_encoding("UTF-32LE", enc)) && 628 (has_bom_prefix(data, len, utf32_be_bom, sizeof(utf32_be_bom)) || 629 has_bom_prefix(data, len, utf32_le_bom, sizeof(utf32_le_bom))) 630 ); 631} 632 633int is_missing_required_utf_bom(const char *enc, const char *data, size_t len) 634{ 635 return ( 636 (same_utf_encoding(enc, "UTF-16")) && 637 !(has_bom_prefix(data, len, utf16_be_bom, sizeof(utf16_be_bom)) || 638 has_bom_prefix(data, len, utf16_le_bom, sizeof(utf16_le_bom))) 639 ) || ( 640 (same_utf_encoding(enc, "UTF-32")) && 641 !(has_bom_prefix(data, len, utf32_be_bom, sizeof(utf32_be_bom)) || 642 has_bom_prefix(data, len, utf32_le_bom, sizeof(utf32_le_bom))) 643 ); 644} 645 646/* 647 * Returns first character length in bytes for multi-byte `text` according to 648 * `encoding`. 649 * 650 * - The `text` pointer is updated to point at the next character. 651 * - When `remainder_p` is not NULL, on entry `*remainder_p` is how much bytes 652 * we can consume from text, and on exit `*remainder_p` is reduced by returned 653 * character length. Otherwise `text` is treated as limited by NUL. 654 */ 655int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding) 656{ 657 int chrlen; 658 const char *p = *text; 659 size_t r = (remainder_p ? *remainder_p : SIZE_MAX); 660 661 if (r < 1) 662 return 0; 663 664 if (is_encoding_utf8(encoding)) { 665 pick_one_utf8_char(&p, &r); 666 667 chrlen = p ? (p - *text) 668 : 1 /* not valid UTF-8 -> raw byte sequence */; 669 } 670 else { 671 /* 672 * TODO use iconv to decode one char and obtain its chrlen 673 * for now, let's treat encodings != UTF-8 as one-byte 674 */ 675 chrlen = 1; 676 } 677 678 *text += chrlen; 679 if (remainder_p) 680 *remainder_p -= chrlen; 681 682 return chrlen; 683} 684 685/* 686 * Pick the next char from the stream, ignoring codepoints an HFS+ would. 687 * Note that this is _not_ complete by any means. It's just enough 688 * to make is_hfs_dotgit() work, and should not be used otherwise. 689 */ 690static ucs_char_t next_hfs_char(const char **in) 691{ 692 while (1) { 693 ucs_char_t out = pick_one_utf8_char(in, NULL); 694 /* 695 * check for malformed utf8. Technically this 696 * gets converted to a percent-sequence, but 697 * returning 0 is good enough for is_hfs_dotgit 698 * to realize it cannot be .git 699 */ 700 if (!*in) 701 return 0; 702 703 /* these code points are ignored completely */ 704 switch (out) { 705 case 0x200c: /* ZERO WIDTH NON-JOINER */ 706 case 0x200d: /* ZERO WIDTH JOINER */ 707 case 0x200e: /* LEFT-TO-RIGHT MARK */ 708 case 0x200f: /* RIGHT-TO-LEFT MARK */ 709 case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */ 710 case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */ 711 case 0x202c: /* POP DIRECTIONAL FORMATTING */ 712 case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */ 713 case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */ 714 case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */ 715 case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */ 716 case 0x206c: /* INHIBIT ARABIC FORM SHAPING */ 717 case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */ 718 case 0x206e: /* NATIONAL DIGIT SHAPES */ 719 case 0x206f: /* NOMINAL DIGIT SHAPES */ 720 case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */ 721 continue; 722 } 723 724 return out; 725 } 726} 727 728static int is_hfs_dot_generic(const char *path, 729 const char *needle, size_t needle_len) 730{ 731 ucs_char_t c; 732 733 c = next_hfs_char(&path); 734 if (c != '.') 735 return 0; 736 737 /* 738 * there's a great deal of other case-folding that occurs 739 * in HFS+, but this is enough to catch our fairly vanilla 740 * hard-coded needles. 741 */ 742 for (; needle_len > 0; needle++, needle_len--) { 743 c = next_hfs_char(&path); 744 745 /* 746 * We know our needles contain only ASCII, so we clamp here to 747 * make the results of tolower() sane. 748 */ 749 if (c > 127) 750 return 0; 751 if (tolower(c) != *needle) 752 return 0; 753 } 754 755 c = next_hfs_char(&path); 756 if (c && !is_dir_sep(c)) 757 return 0; 758 759 return 1; 760} 761 762/* 763 * Inline wrapper to make sure the compiler resolves strlen() on literals at 764 * compile time. 765 */ 766static inline int is_hfs_dot_str(const char *path, const char *needle) 767{ 768 return is_hfs_dot_generic(path, needle, strlen(needle)); 769} 770 771int is_hfs_dotgit(const char *path) 772{ 773 return is_hfs_dot_str(path, "git"); 774} 775 776int is_hfs_dotgitmodules(const char *path) 777{ 778 return is_hfs_dot_str(path, "gitmodules"); 779} 780 781int is_hfs_dotgitignore(const char *path) 782{ 783 return is_hfs_dot_str(path, "gitignore"); 784} 785 786int is_hfs_dotgitattributes(const char *path) 787{ 788 return is_hfs_dot_str(path, "gitattributes"); 789} 790 791int is_hfs_dotmailmap(const char *path) 792{ 793 return is_hfs_dot_str(path, "mailmap"); 794} 795 796const char utf8_bom[] = "\357\273\277"; 797 798int skip_utf8_bom(char **text, size_t len) 799{ 800 if (len < strlen(utf8_bom) || 801 memcmp(*text, utf8_bom, strlen(utf8_bom))) 802 return 0; 803 *text += strlen(utf8_bom); 804 return 1; 805} 806 807void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, 808 const char *s) 809{ 810 size_t slen = strlen(s); 811 int display_len = utf8_strnwidth(s, slen, 0); 812 int utf8_compensation = slen - display_len; 813 814 if (display_len >= width) { 815 strbuf_addstr(buf, s); 816 return; 817 } 818 819 if (position == ALIGN_LEFT) 820 strbuf_addf(buf, "%-*s", width + utf8_compensation, s); 821 else if (position == ALIGN_MIDDLE) { 822 int left = (width - display_len) / 2; 823 strbuf_addf(buf, "%*s%-*s", left, "", width - left + utf8_compensation, s); 824 } else if (position == ALIGN_RIGHT) 825 strbuf_addf(buf, "%*s", width + utf8_compensation, s); 826}