this repo has no description
1use ecow::EcoString;
2use itertools::Itertools;
3
4use crate::ast::Endianness;
5
6/// Converts any escape sequences from the given string to their correct
7/// bytewise UTF-8 representation and returns the resulting string.
8pub fn convert_string_escape_chars(str: &EcoString) -> EcoString {
9 let mut filtered_str = EcoString::new();
10 let mut str_iter = str.chars().peekable();
11 loop {
12 match str_iter.next() {
13 Some('\\') => match str_iter.next() {
14 // Check for Unicode escape sequence, e.g. \u{00012FF}
15 Some('u') => {
16 if str_iter.peek() != Some(&'{') {
17 // Invalid Unicode escape sequence
18 filtered_str.push('u');
19 continue;
20 }
21
22 // Consume the left brace after peeking
23 let _ = str_iter.next();
24
25 let codepoint_str = str_iter
26 .peeking_take_while(char::is_ascii_hexdigit)
27 .collect::<String>();
28
29 if codepoint_str.is_empty() || str_iter.peek() != Some(&'}') {
30 // Invalid Unicode escape sequence
31 filtered_str.push_str("u{");
32 filtered_str.push_str(&codepoint_str);
33 continue;
34 }
35
36 let codepoint = u32::from_str_radix(&codepoint_str, 16)
37 .ok()
38 .and_then(char::from_u32);
39
40 if let Some(codepoint) = codepoint {
41 // Consume the right brace after peeking
42 let _ = str_iter.next();
43
44 // Consider this codepoint's length instead of
45 // that of the Unicode escape sequence itself
46 filtered_str.push(codepoint);
47 } else {
48 // Invalid Unicode escape sequence
49 // (codepoint value not in base 16 or too large)
50 filtered_str.push_str("u{");
51 filtered_str.push_str(&codepoint_str);
52 }
53 }
54 Some('n') => filtered_str.push('\n'),
55 Some('r') => filtered_str.push('\r'),
56 Some('f') => filtered_str.push('\u{C}'),
57 Some('t') => filtered_str.push('\t'),
58 Some('"') => filtered_str.push('\"'),
59 Some('\\') => filtered_str.push('\\'),
60 Some(c) => filtered_str.push(c),
61 None => break,
62 },
63 Some(c) => filtered_str.push(c),
64 None => break,
65 }
66 }
67 filtered_str
68}
69
70pub fn to_snake_case(string: &str) -> EcoString {
71 let mut snake_case = EcoString::with_capacity(string.len());
72 let mut is_word_boundary = true;
73
74 for char in string.chars() {
75 match char {
76 '_' | ' ' => {
77 is_word_boundary = true;
78 continue;
79 }
80 _ if char.is_uppercase() => {
81 is_word_boundary = true;
82 }
83 _ => {}
84 }
85
86 if is_word_boundary {
87 // We don't want to push an underscore at the start of the string,
88 // even if it starts with a capital letter or other delimiter.
89 if !snake_case.is_empty() {
90 snake_case.push('_');
91 }
92 is_word_boundary = false;
93 }
94 snake_case.push(char.to_ascii_lowercase());
95 }
96
97 snake_case
98}
99
100pub fn to_upper_camel_case(string: &str) -> EcoString {
101 let mut pascal_case = EcoString::with_capacity(string.len());
102 let mut chars = string.chars();
103
104 while let Some(char) = chars.next() {
105 if char == '_' {
106 let Some(next) = chars.next() else { break };
107 pascal_case.push(next.to_ascii_uppercase());
108 } else {
109 pascal_case.push(char);
110 }
111 }
112
113 pascal_case
114}
115
116/// Converts a string into its UTF-16 representation in bytes
117pub fn string_to_utf16_bytes(string: &str, endianness: Endianness) -> Vec<u8> {
118 let mut bytes = Vec::with_capacity(string.len() * 2);
119
120 let mut character_buffer = [0, 0];
121 for character in string.chars() {
122 let segments = character.encode_utf16(&mut character_buffer);
123
124 for segment in segments {
125 let segment_bytes = match endianness {
126 Endianness::Big => segment.to_be_bytes(),
127 Endianness::Little => segment.to_le_bytes(),
128 };
129
130 bytes.push(segment_bytes[0]);
131 bytes.push(segment_bytes[1]);
132 }
133 }
134
135 bytes
136}
137
138/// Converts a string into its UTF-32 representation in bytes
139pub fn string_to_utf32_bytes(string: &str, endianness: Endianness) -> Vec<u8> {
140 let mut bytes = Vec::with_capacity(string.len() * 4);
141
142 for character in string.chars() {
143 let character_bytes = match endianness {
144 Endianness::Big => (character as u32).to_be_bytes(),
145 Endianness::Little => (character as u32).to_le_bytes(),
146 };
147 bytes.extend(character_bytes);
148 }
149
150 bytes
151}
152
153/// Gets the number of UTF-16 codepoints it would take to encode a given string.
154pub fn length_utf16(string: &str) -> usize {
155 let mut length = 0;
156
157 for char in string.chars() {
158 length += char.len_utf16()
159 }
160
161 length
162}
163
164/// Gets the number of UTF-32 codepoints in a string
165pub fn length_utf32(string: &str) -> usize {
166 string.chars().count()
167}