web engine - experimental web browser
at x25519 339 lines 11 kB view raw
1//! Minimal JSON parser for reading html5lib test fixtures. 2//! 3//! Supports the subset of JSON used by html5lib-tests: objects, arrays, 4//! strings (with escape sequences including `\uXXXX`), numbers, booleans, 5//! and null. 6 7#[derive(Debug, Clone, PartialEq)] 8pub enum JsonValue { 9 Null, 10 Bool(bool), 11 Number(f64), 12 Str(String), 13 Array(Vec<JsonValue>), 14 Object(Vec<(String, JsonValue)>), 15} 16 17impl JsonValue { 18 pub fn as_str(&self) -> Option<&str> { 19 match self { 20 JsonValue::Str(s) => Some(s), 21 _ => None, 22 } 23 } 24 25 pub fn as_array(&self) -> Option<&[JsonValue]> { 26 match self { 27 JsonValue::Array(a) => Some(a), 28 _ => None, 29 } 30 } 31 32 pub fn as_object(&self) -> Option<&[(String, JsonValue)]> { 33 match self { 34 JsonValue::Object(o) => Some(o), 35 _ => None, 36 } 37 } 38 39 pub fn as_bool(&self) -> Option<bool> { 40 match self { 41 JsonValue::Bool(b) => Some(*b), 42 _ => None, 43 } 44 } 45 46 /// Look up a key in a JSON object. 47 pub fn get(&self, key: &str) -> Option<&JsonValue> { 48 match self { 49 JsonValue::Object(pairs) => pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v), 50 _ => None, 51 } 52 } 53} 54 55struct Parser<'a> { 56 bytes: &'a [u8], 57 pos: usize, 58} 59 60impl<'a> Parser<'a> { 61 fn new(input: &'a str) -> Self { 62 Self { 63 bytes: input.as_bytes(), 64 pos: 0, 65 } 66 } 67 68 fn skip_ws(&mut self) { 69 while self.pos < self.bytes.len() { 70 match self.bytes[self.pos] { 71 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1, 72 _ => break, 73 } 74 } 75 } 76 77 fn peek(&self) -> Option<u8> { 78 self.bytes.get(self.pos).copied() 79 } 80 81 fn advance(&mut self) -> Option<u8> { 82 let b = self.bytes.get(self.pos).copied()?; 83 self.pos += 1; 84 Some(b) 85 } 86 87 fn expect(&mut self, ch: u8) -> Result<(), String> { 88 match self.advance() { 89 Some(b) if b == ch => Ok(()), 90 Some(b) => Err(format!( 91 "expected '{}' at pos {}, got '{}'", 92 ch as char, self.pos, b as char 93 )), 94 None => Err(format!( 95 "expected '{}' at pos {}, got EOF", 96 ch as char, self.pos 97 )), 98 } 99 } 100 101 fn parse_value(&mut self) -> Result<JsonValue, String> { 102 self.skip_ws(); 103 match self.peek() { 104 Some(b'"') => self.parse_string().map(JsonValue::Str), 105 Some(b'{') => self.parse_object(), 106 Some(b'[') => self.parse_array(), 107 Some(b't') => self.parse_literal("true", JsonValue::Bool(true)), 108 Some(b'f') => self.parse_literal("false", JsonValue::Bool(false)), 109 Some(b'n') => self.parse_literal("null", JsonValue::Null), 110 Some(b'-') | Some(b'0'..=b'9') => self.parse_number(), 111 Some(b) => Err(format!( 112 "unexpected byte '{}' at pos {}", 113 b as char, self.pos 114 )), 115 None => Err("unexpected EOF".into()), 116 } 117 } 118 119 fn parse_string(&mut self) -> Result<String, String> { 120 self.expect(b'"')?; 121 let mut s = String::new(); 122 loop { 123 match self.advance() { 124 Some(b'"') => return Ok(s), 125 Some(b'\\') => match self.advance() { 126 Some(b'"') => s.push('"'), 127 Some(b'\\') => s.push('\\'), 128 Some(b'/') => s.push('/'), 129 Some(b'n') => s.push('\n'), 130 Some(b'r') => s.push('\r'), 131 Some(b't') => s.push('\t'), 132 Some(b'b') => s.push('\u{0008}'), 133 Some(b'f') => s.push('\u{000C}'), 134 Some(b'u') => { 135 let cp = self.parse_hex4()?; 136 // Handle surrogate pairs. 137 if (0xD800..=0xDBFF).contains(&cp) { 138 // High surrogate — expect \uXXXX low surrogate. 139 if self.advance() == Some(b'\\') && self.advance() == Some(b'u') { 140 let lo = self.parse_hex4()?; 141 if (0xDC00..=0xDFFF).contains(&lo) { 142 let combined = 0x10000 143 + ((cp as u32 - 0xD800) << 10) 144 + (lo as u32 - 0xDC00); 145 if let Some(ch) = char::from_u32(combined) { 146 s.push(ch); 147 } 148 } 149 } 150 } else if let Some(ch) = char::from_u32(cp as u32) { 151 s.push(ch); 152 } 153 } 154 Some(b) => { 155 s.push('\\'); 156 s.push(b as char); 157 } 158 None => return Err("unexpected EOF in string escape".into()), 159 }, 160 Some(_) => { 161 // We need to handle multi-byte UTF-8 properly. 162 // Since we're working on bytes, back up and grab the char. 163 self.pos -= 1; 164 let rest = std::str::from_utf8(&self.bytes[self.pos..]) 165 .map_err(|e| format!("invalid UTF-8: {}", e))?; 166 let ch = rest.chars().next().unwrap(); 167 self.pos += ch.len_utf8(); 168 s.push(ch); 169 } 170 None => return Err("unexpected EOF in string".into()), 171 } 172 } 173 } 174 175 fn parse_hex4(&mut self) -> Result<u16, String> { 176 let mut val: u16 = 0; 177 for _ in 0..4 { 178 let b = self.advance().ok_or("unexpected EOF in \\u escape")?; 179 let digit = match b { 180 b'0'..=b'9' => b - b'0', 181 b'a'..=b'f' => b - b'a' + 10, 182 b'A'..=b'F' => b - b'A' + 10, 183 _ => return Err(format!("invalid hex digit '{}'", b as char)), 184 }; 185 val = val * 16 + digit as u16; 186 } 187 Ok(val) 188 } 189 190 fn parse_number(&mut self) -> Result<JsonValue, String> { 191 let start = self.pos; 192 if self.peek() == Some(b'-') { 193 self.pos += 1; 194 } 195 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 196 self.pos += 1; 197 } 198 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'.' { 199 self.pos += 1; 200 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 201 self.pos += 1; 202 } 203 } 204 if self.pos < self.bytes.len() 205 && (self.bytes[self.pos] == b'e' || self.bytes[self.pos] == b'E') 206 { 207 self.pos += 1; 208 if self.pos < self.bytes.len() 209 && (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-') 210 { 211 self.pos += 1; 212 } 213 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 214 self.pos += 1; 215 } 216 } 217 let s = std::str::from_utf8(&self.bytes[start..self.pos]) 218 .map_err(|e| format!("invalid UTF-8 in number: {}", e))?; 219 let n: f64 = s 220 .parse() 221 .map_err(|e| format!("invalid number '{}': {}", s, e))?; 222 Ok(JsonValue::Number(n)) 223 } 224 225 fn parse_object(&mut self) -> Result<JsonValue, String> { 226 self.expect(b'{')?; 227 self.skip_ws(); 228 let mut pairs = Vec::new(); 229 if self.peek() == Some(b'}') { 230 self.pos += 1; 231 return Ok(JsonValue::Object(pairs)); 232 } 233 loop { 234 self.skip_ws(); 235 let key = self.parse_string()?; 236 self.skip_ws(); 237 self.expect(b':')?; 238 let val = self.parse_value()?; 239 pairs.push((key, val)); 240 self.skip_ws(); 241 match self.peek() { 242 Some(b',') => { 243 self.pos += 1; 244 } 245 Some(b'}') => { 246 self.pos += 1; 247 return Ok(JsonValue::Object(pairs)); 248 } 249 _ => return Err(format!("expected ',' or '}}' at pos {}", self.pos)), 250 } 251 } 252 } 253 254 fn parse_array(&mut self) -> Result<JsonValue, String> { 255 self.expect(b'[')?; 256 self.skip_ws(); 257 let mut elems = Vec::new(); 258 if self.peek() == Some(b']') { 259 self.pos += 1; 260 return Ok(JsonValue::Array(elems)); 261 } 262 loop { 263 let val = self.parse_value()?; 264 elems.push(val); 265 self.skip_ws(); 266 match self.peek() { 267 Some(b',') => { 268 self.pos += 1; 269 } 270 Some(b']') => { 271 self.pos += 1; 272 return Ok(JsonValue::Array(elems)); 273 } 274 _ => return Err(format!("expected ',' or ']' at pos {}", self.pos)), 275 } 276 } 277 } 278 279 fn parse_literal(&mut self, expected: &str, value: JsonValue) -> Result<JsonValue, String> { 280 for b in expected.bytes() { 281 match self.advance() { 282 Some(got) if got == b => {} 283 _ => return Err(format!("expected literal '{}'", expected)), 284 } 285 } 286 Ok(value) 287 } 288} 289 290/// Parse a JSON string into a `JsonValue`. 291pub fn parse(input: &str) -> Result<JsonValue, String> { 292 let mut parser = Parser::new(input); 293 let val = parser.parse_value()?; 294 parser.skip_ws(); 295 if parser.pos != parser.bytes.len() { 296 return Err(format!("trailing data at pos {}", parser.pos)); 297 } 298 Ok(val) 299} 300 301#[cfg(test)] 302mod tests { 303 use super::*; 304 305 #[test] 306 fn parse_simple_object() { 307 let val = parse(r#"{"a": 1, "b": "hello"}"#).unwrap(); 308 assert_eq!(val.get("a"), Some(&JsonValue::Number(1.0))); 309 assert_eq!(val.get("b"), Some(&JsonValue::Str("hello".into()))); 310 } 311 312 #[test] 313 fn parse_array() { 314 let val = parse(r#"[1, "two", true, null]"#).unwrap(); 315 let arr = val.as_array().unwrap(); 316 assert_eq!(arr.len(), 4); 317 assert_eq!(arr[2], JsonValue::Bool(true)); 318 assert_eq!(arr[3], JsonValue::Null); 319 } 320 321 #[test] 322 fn parse_nested() { 323 let val = parse(r#"{"tests": [{"desc": "a"}]}"#).unwrap(); 324 let tests = val.get("tests").unwrap().as_array().unwrap(); 325 assert_eq!(tests.len(), 1); 326 } 327 328 #[test] 329 fn parse_string_escapes() { 330 let val = parse(r#""hello\nworld""#).unwrap(); 331 assert_eq!(val.as_str().unwrap(), "hello\nworld"); 332 } 333 334 #[test] 335 fn parse_unicode_escape() { 336 let val = parse(r#""\u0041""#).unwrap(); 337 assert_eq!(val.as_str().unwrap(), "A"); 338 } 339}