utf8.h at reftables-rust · freshlybakedca.ke/git

freshlybakedca.ke / git
fork atom
Git fork
fork atom
git / utf8.h
at reftables-rust 110 lines 3.7 kB view raw
wrap content
Mike Hommey utf8.h: squelch unused-parameter warnings with NO_ICONV 1y ago
e03b2a21
  1#ifndef GIT_UTF8_H
  2#define GIT_UTF8_H
  3
  4struct strbuf;
  5
  6typedef unsigned int ucs_char_t;  /* assuming 32bit int */
  7
  8size_t display_mode_esc_sequence_len(const char *s);
  9int utf8_width(const char **start, size_t *remainder_p);
 10int utf8_strnwidth(const char *string, size_t len, int skip_ansi);
 11int utf8_strwidth(const char *string);
 12int is_utf8(const char *text);
 13int is_encoding_utf8(const char *name);
 14int same_encoding(const char *, const char *);
 15__attribute__((format (printf, 2, 3)))
 16int utf8_fprintf(FILE *, const char *, ...);
 17
 18extern const char utf8_bom[];
 19int skip_utf8_bom(char **, size_t);
 20
 21void strbuf_add_wrapped_text(struct strbuf *buf,
 22		const char *text, int indent, int indent2, int width);
 23void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
 24			     int indent, int indent2, int width);
 25void strbuf_utf8_replace(struct strbuf *sb, int pos, int width,
 26			 const char *subst);
 27
 28#ifndef NO_ICONV
 29char *reencode_string_iconv(const char *in, size_t insz,
 30			    iconv_t conv, size_t bom_len, size_t *outsz);
 31char *reencode_string_len(const char *in, size_t insz,
 32			  const char *out_encoding,
 33			  const char *in_encoding,
 34			  size_t *outsz);
 35#else
 36static inline char *reencode_string_len(const char *a UNUSED, size_t b UNUSED,
 37					const char *c UNUSED,
 38					const char *d UNUSED, size_t *e)
 39{ if (e) *e = 0; return NULL; }
 40#endif
 41
 42static inline char *reencode_string(const char *in,
 43				    const char *out_encoding,
 44				    const char *in_encoding)
 45{
 46	return reencode_string_len(in, strlen(in),
 47				   out_encoding, in_encoding,
 48				   NULL);
 49}
 50
 51int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);
 52
 53/*
 54 * Returns true if the path would match ".git" after HFS case-folding.
 55 * The path should be NUL-terminated, but we will match variants of both ".git\0"
 56 * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck
 57 * and verify_path().
 58 *
 59 * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo".
 60 */
 61int is_hfs_dotgit(const char *path);
 62int is_hfs_dotgitmodules(const char *path);
 63int is_hfs_dotgitignore(const char *path);
 64int is_hfs_dotgitattributes(const char *path);
 65int is_hfs_dotmailmap(const char *path);
 66
 67typedef enum {
 68	ALIGN_LEFT,
 69	ALIGN_MIDDLE,
 70	ALIGN_RIGHT
 71} align_type;
 72
 73/*
 74 * Align the string given and store it into a strbuf as per the
 75 * 'position' and 'width'. If the given string length is larger than
 76 * 'width' than then the input string is not truncated and no
 77 * alignment is done.
 78 */
 79void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width,
 80		       const char *s);
 81
 82/*
 83 * If a data stream is declared as UTF-16BE or UTF-16LE, then a UTF-16
 84 * BOM must not be used [1]. The same applies for the UTF-32 equivalents.
 85 * The function returns true if this rule is violated.
 86 *
 87 * [1] https://unicode.org/faq/utf_bom.html#bom10
 88 */
 89int has_prohibited_utf_bom(const char *enc, const char *data, size_t len);
 90
 91/*
 92 * If the endianness is not defined in the encoding name, then we
 93 * require a BOM. The function returns true if a required BOM is missing.
 94 *
 95 * The Unicode standard instructs to assume big-endian if there in no
 96 * BOM for UTF-16/32 [1][2]. However, the W3C/WHATWG encoding standard
 97 * used in HTML5 recommends to assume little-endian to "deal with
 98 * deployed content" [3].
 99 *
100 * Therefore, strictly requiring a BOM seems to be the safest option for
101 * content in Git.
102 *
103 * [1] https://unicode.org/faq/utf_bom.html#gen6
104 * [2] https://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
105 *     Section 3.10, D98, page 132
106 * [3] https://encoding.spec.whatwg.org/#utf-16le
107 */
108int is_missing_required_utf_bom(const char *enc, const char *data, size_t len);
109
110#endif