this repo has no description
at wasm 167 lines 5.6 kB view raw
1use ecow::EcoString; 2use itertools::Itertools; 3 4use crate::ast::Endianness; 5 6/// Converts any escape sequences from the given string to their correct 7/// bytewise UTF-8 representation and returns the resulting string. 8pub fn convert_string_escape_chars(str: &EcoString) -> EcoString { 9 let mut filtered_str = EcoString::new(); 10 let mut str_iter = str.chars().peekable(); 11 loop { 12 match str_iter.next() { 13 Some('\\') => match str_iter.next() { 14 // Check for Unicode escape sequence, e.g. \u{00012FF} 15 Some('u') => { 16 if str_iter.peek() != Some(&'{') { 17 // Invalid Unicode escape sequence 18 filtered_str.push('u'); 19 continue; 20 } 21 22 // Consume the left brace after peeking 23 let _ = str_iter.next(); 24 25 let codepoint_str = str_iter 26 .peeking_take_while(char::is_ascii_hexdigit) 27 .collect::<String>(); 28 29 if codepoint_str.is_empty() || str_iter.peek() != Some(&'}') { 30 // Invalid Unicode escape sequence 31 filtered_str.push_str("u{"); 32 filtered_str.push_str(&codepoint_str); 33 continue; 34 } 35 36 let codepoint = u32::from_str_radix(&codepoint_str, 16) 37 .ok() 38 .and_then(char::from_u32); 39 40 if let Some(codepoint) = codepoint { 41 // Consume the right brace after peeking 42 let _ = str_iter.next(); 43 44 // Consider this codepoint's length instead of 45 // that of the Unicode escape sequence itself 46 filtered_str.push(codepoint); 47 } else { 48 // Invalid Unicode escape sequence 49 // (codepoint value not in base 16 or too large) 50 filtered_str.push_str("u{"); 51 filtered_str.push_str(&codepoint_str); 52 } 53 } 54 Some('n') => filtered_str.push('\n'), 55 Some('r') => filtered_str.push('\r'), 56 Some('f') => filtered_str.push('\u{C}'), 57 Some('t') => filtered_str.push('\t'), 58 Some('"') => filtered_str.push('\"'), 59 Some('\\') => filtered_str.push('\\'), 60 Some(c) => filtered_str.push(c), 61 None => break, 62 }, 63 Some(c) => filtered_str.push(c), 64 None => break, 65 } 66 } 67 filtered_str 68} 69 70pub fn to_snake_case(string: &str) -> EcoString { 71 let mut snake_case = EcoString::with_capacity(string.len()); 72 let mut is_word_boundary = true; 73 74 for char in string.chars() { 75 match char { 76 '_' | ' ' => { 77 is_word_boundary = true; 78 continue; 79 } 80 _ if char.is_uppercase() => { 81 is_word_boundary = true; 82 } 83 _ => {} 84 } 85 86 if is_word_boundary { 87 // We don't want to push an underscore at the start of the string, 88 // even if it starts with a capital letter or other delimiter. 89 if !snake_case.is_empty() { 90 snake_case.push('_'); 91 } 92 is_word_boundary = false; 93 } 94 snake_case.push(char.to_ascii_lowercase()); 95 } 96 97 snake_case 98} 99 100pub fn to_upper_camel_case(string: &str) -> EcoString { 101 let mut pascal_case = EcoString::with_capacity(string.len()); 102 let mut chars = string.chars(); 103 104 while let Some(char) = chars.next() { 105 if char == '_' { 106 let Some(next) = chars.next() else { break }; 107 pascal_case.push(next.to_ascii_uppercase()); 108 } else { 109 pascal_case.push(char); 110 } 111 } 112 113 pascal_case 114} 115 116/// Converts a string into its UTF-16 representation in bytes 117pub fn string_to_utf16_bytes(string: &str, endianness: Endianness) -> Vec<u8> { 118 let mut bytes = Vec::with_capacity(string.len() * 2); 119 120 let mut character_buffer = [0, 0]; 121 for character in string.chars() { 122 let segments = character.encode_utf16(&mut character_buffer); 123 124 for segment in segments { 125 let segment_bytes = match endianness { 126 Endianness::Big => segment.to_be_bytes(), 127 Endianness::Little => segment.to_le_bytes(), 128 }; 129 130 bytes.push(segment_bytes[0]); 131 bytes.push(segment_bytes[1]); 132 } 133 } 134 135 bytes 136} 137 138/// Converts a string into its UTF-32 representation in bytes 139pub fn string_to_utf32_bytes(string: &str, endianness: Endianness) -> Vec<u8> { 140 let mut bytes = Vec::with_capacity(string.len() * 4); 141 142 for character in string.chars() { 143 let character_bytes = match endianness { 144 Endianness::Big => (character as u32).to_be_bytes(), 145 Endianness::Little => (character as u32).to_le_bytes(), 146 }; 147 bytes.extend(character_bytes); 148 } 149 150 bytes 151} 152 153/// Gets the number of UTF-16 codepoints it would take to encode a given string. 154pub fn length_utf16(string: &str) -> usize { 155 let mut length = 0; 156 157 for char in string.chars() { 158 length += char.len_utf16() 159 } 160 161 length 162} 163 164/// Gets the number of UTF-32 codepoints in a string 165pub fn length_utf32(string: &str) -> usize { 166 string.chars().count() 167}