web engine - experimental web browser
1//! Minimal JSON parser for reading html5lib test fixtures.
2//!
3//! Supports the subset of JSON used by html5lib-tests: objects, arrays,
4//! strings (with escape sequences including `\uXXXX`), numbers, booleans,
5//! and null.
6
7#[derive(Debug, Clone, PartialEq)]
8pub enum JsonValue {
9 Null,
10 Bool(bool),
11 Number(f64),
12 Str(String),
13 Array(Vec<JsonValue>),
14 Object(Vec<(String, JsonValue)>),
15}
16
17impl JsonValue {
18 pub fn as_str(&self) -> Option<&str> {
19 match self {
20 JsonValue::Str(s) => Some(s),
21 _ => None,
22 }
23 }
24
25 pub fn as_array(&self) -> Option<&[JsonValue]> {
26 match self {
27 JsonValue::Array(a) => Some(a),
28 _ => None,
29 }
30 }
31
32 pub fn as_object(&self) -> Option<&[(String, JsonValue)]> {
33 match self {
34 JsonValue::Object(o) => Some(o),
35 _ => None,
36 }
37 }
38
39 pub fn as_bool(&self) -> Option<bool> {
40 match self {
41 JsonValue::Bool(b) => Some(*b),
42 _ => None,
43 }
44 }
45
46 /// Look up a key in a JSON object.
47 pub fn get(&self, key: &str) -> Option<&JsonValue> {
48 match self {
49 JsonValue::Object(pairs) => pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v),
50 _ => None,
51 }
52 }
53}
54
55struct Parser<'a> {
56 bytes: &'a [u8],
57 pos: usize,
58}
59
60impl<'a> Parser<'a> {
61 fn new(input: &'a str) -> Self {
62 Self {
63 bytes: input.as_bytes(),
64 pos: 0,
65 }
66 }
67
68 fn skip_ws(&mut self) {
69 while self.pos < self.bytes.len() {
70 match self.bytes[self.pos] {
71 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
72 _ => break,
73 }
74 }
75 }
76
77 fn peek(&self) -> Option<u8> {
78 self.bytes.get(self.pos).copied()
79 }
80
81 fn advance(&mut self) -> Option<u8> {
82 let b = self.bytes.get(self.pos).copied()?;
83 self.pos += 1;
84 Some(b)
85 }
86
87 fn expect(&mut self, ch: u8) -> Result<(), String> {
88 match self.advance() {
89 Some(b) if b == ch => Ok(()),
90 Some(b) => Err(format!(
91 "expected '{}' at pos {}, got '{}'",
92 ch as char, self.pos, b as char
93 )),
94 None => Err(format!(
95 "expected '{}' at pos {}, got EOF",
96 ch as char, self.pos
97 )),
98 }
99 }
100
101 fn parse_value(&mut self) -> Result<JsonValue, String> {
102 self.skip_ws();
103 match self.peek() {
104 Some(b'"') => self.parse_string().map(JsonValue::Str),
105 Some(b'{') => self.parse_object(),
106 Some(b'[') => self.parse_array(),
107 Some(b't') => self.parse_literal("true", JsonValue::Bool(true)),
108 Some(b'f') => self.parse_literal("false", JsonValue::Bool(false)),
109 Some(b'n') => self.parse_literal("null", JsonValue::Null),
110 Some(b'-') | Some(b'0'..=b'9') => self.parse_number(),
111 Some(b) => Err(format!(
112 "unexpected byte '{}' at pos {}",
113 b as char, self.pos
114 )),
115 None => Err("unexpected EOF".into()),
116 }
117 }
118
119 fn parse_string(&mut self) -> Result<String, String> {
120 self.expect(b'"')?;
121 let mut s = String::new();
122 loop {
123 match self.advance() {
124 Some(b'"') => return Ok(s),
125 Some(b'\\') => match self.advance() {
126 Some(b'"') => s.push('"'),
127 Some(b'\\') => s.push('\\'),
128 Some(b'/') => s.push('/'),
129 Some(b'n') => s.push('\n'),
130 Some(b'r') => s.push('\r'),
131 Some(b't') => s.push('\t'),
132 Some(b'b') => s.push('\u{0008}'),
133 Some(b'f') => s.push('\u{000C}'),
134 Some(b'u') => {
135 let cp = self.parse_hex4()?;
136 // Handle surrogate pairs.
137 if (0xD800..=0xDBFF).contains(&cp) {
138 // High surrogate — expect \uXXXX low surrogate.
139 if self.advance() == Some(b'\\') && self.advance() == Some(b'u') {
140 let lo = self.parse_hex4()?;
141 if (0xDC00..=0xDFFF).contains(&lo) {
142 let combined = 0x10000
143 + ((cp as u32 - 0xD800) << 10)
144 + (lo as u32 - 0xDC00);
145 if let Some(ch) = char::from_u32(combined) {
146 s.push(ch);
147 }
148 }
149 }
150 } else if let Some(ch) = char::from_u32(cp as u32) {
151 s.push(ch);
152 }
153 }
154 Some(b) => {
155 s.push('\\');
156 s.push(b as char);
157 }
158 None => return Err("unexpected EOF in string escape".into()),
159 },
160 Some(_) => {
161 // We need to handle multi-byte UTF-8 properly.
162 // Since we're working on bytes, back up and grab the char.
163 self.pos -= 1;
164 let rest = std::str::from_utf8(&self.bytes[self.pos..])
165 .map_err(|e| format!("invalid UTF-8: {}", e))?;
166 let ch = rest.chars().next().unwrap();
167 self.pos += ch.len_utf8();
168 s.push(ch);
169 }
170 None => return Err("unexpected EOF in string".into()),
171 }
172 }
173 }
174
175 fn parse_hex4(&mut self) -> Result<u16, String> {
176 let mut val: u16 = 0;
177 for _ in 0..4 {
178 let b = self.advance().ok_or("unexpected EOF in \\u escape")?;
179 let digit = match b {
180 b'0'..=b'9' => b - b'0',
181 b'a'..=b'f' => b - b'a' + 10,
182 b'A'..=b'F' => b - b'A' + 10,
183 _ => return Err(format!("invalid hex digit '{}'", b as char)),
184 };
185 val = val * 16 + digit as u16;
186 }
187 Ok(val)
188 }
189
190 fn parse_number(&mut self) -> Result<JsonValue, String> {
191 let start = self.pos;
192 if self.peek() == Some(b'-') {
193 self.pos += 1;
194 }
195 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
196 self.pos += 1;
197 }
198 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'.' {
199 self.pos += 1;
200 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
201 self.pos += 1;
202 }
203 }
204 if self.pos < self.bytes.len()
205 && (self.bytes[self.pos] == b'e' || self.bytes[self.pos] == b'E')
206 {
207 self.pos += 1;
208 if self.pos < self.bytes.len()
209 && (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-')
210 {
211 self.pos += 1;
212 }
213 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
214 self.pos += 1;
215 }
216 }
217 let s = std::str::from_utf8(&self.bytes[start..self.pos])
218 .map_err(|e| format!("invalid UTF-8 in number: {}", e))?;
219 let n: f64 = s
220 .parse()
221 .map_err(|e| format!("invalid number '{}': {}", s, e))?;
222 Ok(JsonValue::Number(n))
223 }
224
225 fn parse_object(&mut self) -> Result<JsonValue, String> {
226 self.expect(b'{')?;
227 self.skip_ws();
228 let mut pairs = Vec::new();
229 if self.peek() == Some(b'}') {
230 self.pos += 1;
231 return Ok(JsonValue::Object(pairs));
232 }
233 loop {
234 self.skip_ws();
235 let key = self.parse_string()?;
236 self.skip_ws();
237 self.expect(b':')?;
238 let val = self.parse_value()?;
239 pairs.push((key, val));
240 self.skip_ws();
241 match self.peek() {
242 Some(b',') => {
243 self.pos += 1;
244 }
245 Some(b'}') => {
246 self.pos += 1;
247 return Ok(JsonValue::Object(pairs));
248 }
249 _ => return Err(format!("expected ',' or '}}' at pos {}", self.pos)),
250 }
251 }
252 }
253
254 fn parse_array(&mut self) -> Result<JsonValue, String> {
255 self.expect(b'[')?;
256 self.skip_ws();
257 let mut elems = Vec::new();
258 if self.peek() == Some(b']') {
259 self.pos += 1;
260 return Ok(JsonValue::Array(elems));
261 }
262 loop {
263 let val = self.parse_value()?;
264 elems.push(val);
265 self.skip_ws();
266 match self.peek() {
267 Some(b',') => {
268 self.pos += 1;
269 }
270 Some(b']') => {
271 self.pos += 1;
272 return Ok(JsonValue::Array(elems));
273 }
274 _ => return Err(format!("expected ',' or ']' at pos {}", self.pos)),
275 }
276 }
277 }
278
279 fn parse_literal(&mut self, expected: &str, value: JsonValue) -> Result<JsonValue, String> {
280 for b in expected.bytes() {
281 match self.advance() {
282 Some(got) if got == b => {}
283 _ => return Err(format!("expected literal '{}'", expected)),
284 }
285 }
286 Ok(value)
287 }
288}
289
290/// Parse a JSON string into a `JsonValue`.
291pub fn parse(input: &str) -> Result<JsonValue, String> {
292 let mut parser = Parser::new(input);
293 let val = parser.parse_value()?;
294 parser.skip_ws();
295 if parser.pos != parser.bytes.len() {
296 return Err(format!("trailing data at pos {}", parser.pos));
297 }
298 Ok(val)
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 #[test]
306 fn parse_simple_object() {
307 let val = parse(r#"{"a": 1, "b": "hello"}"#).unwrap();
308 assert_eq!(val.get("a"), Some(&JsonValue::Number(1.0)));
309 assert_eq!(val.get("b"), Some(&JsonValue::Str("hello".into())));
310 }
311
312 #[test]
313 fn parse_array() {
314 let val = parse(r#"[1, "two", true, null]"#).unwrap();
315 let arr = val.as_array().unwrap();
316 assert_eq!(arr.len(), 4);
317 assert_eq!(arr[2], JsonValue::Bool(true));
318 assert_eq!(arr[3], JsonValue::Null);
319 }
320
321 #[test]
322 fn parse_nested() {
323 let val = parse(r#"{"tests": [{"desc": "a"}]}"#).unwrap();
324 let tests = val.get("tests").unwrap().as_array().unwrap();
325 assert_eq!(tests.len(), 1);
326 }
327
328 #[test]
329 fn parse_string_escapes() {
330 let val = parse(r#""hello\nworld""#).unwrap();
331 assert_eq!(val.as_str().unwrap(), "hello\nworld");
332 }
333
334 #[test]
335 fn parse_unicode_escape() {
336 let val = parse(r#""\u0041""#).unwrap();
337 assert_eq!(val.as_str().unwrap(), "A");
338 }
339}