Merge branch 'test-infra': Set up test infrastructure

+6

.gitmodules

··· 1 + [submodule "tests/html5lib-tests"] 2 + path = tests/html5lib-tests 3 + url = https://github.com/html5lib/html5lib-tests.git 4 + [submodule "tests/test262"] 5 + path = tests/test262 6 + url = https://github.com/nicolo-ribaudo/test262

+34

crates/html/src/lib.rs

··· 1 1 //! HTML5 tokenizer and tree builder. 2 + 3 + /// A token emitted by the HTML tokenizer. 4 + #[derive(Debug, Clone, PartialEq)] 5 + pub enum Token { 6 + /// `<!DOCTYPE name public_id system_id>` 7 + Doctype { 8 + name: Option<String>, 9 + public_id: Option<String>, 10 + system_id: Option<String>, 11 + force_quirks: bool, 12 + }, 13 + /// `<tag attr="val">` 14 + StartTag { 15 + name: String, 16 + attributes: Vec<(String, String)>, 17 + self_closing: bool, 18 + }, 19 + /// `</tag>` 20 + EndTag { name: String }, 21 + /// Character data (may be coalesced). 22 + Character(String), 23 + /// `` 24 + Comment(String), 25 + /// End of file. 26 + Eof, 27 + } 28 + 29 + /// Tokenize an HTML input string into a sequence of tokens. 30 + /// 31 + /// This is a stub that returns an empty `Vec`. The real implementation 32 + /// will be a spec-compliant HTML5 tokenizer state machine. 33 + pub fn tokenize(_input: &str) -> Vec<Token> { 34 + Vec::new() 35 + }

+263

crates/html/tests/html5lib_tokenizer.rs

··· 1 + //! html5lib tokenizer test harness. 2 + //! 3 + //! Reads JSON test files from `tests/html5lib-tests/tokenizer/` and runs each 4 + //! test case against our HTML tokenizer. Reports pass/fail/skip counts. 5 + //! 6 + //! Run with: `cargo test -p we-html --test html5lib_tokenizer` 7 + 8 + mod json; 9 + 10 + use json::JsonValue; 11 + use we_html::Token; 12 + 13 + /// Workspace root relative to the crate directory. 14 + const WORKSPACE_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../"); 15 + 16 + /// Convert a JSON output token (array) into our `Token` type for comparison. 17 + fn json_to_token(val: &JsonValue) -> Option<Token> { 18 + let arr = val.as_array()?; 19 + let kind = arr.first()?.as_str()?; 20 + match kind { 21 + "DOCTYPE" => { 22 + let name = arr.get(1).and_then(|v| v.as_str()).map(String::from); 23 + let public_id = match arr.get(2) { 24 + Some(JsonValue::Null) => None, 25 + Some(v) => v.as_str().map(String::from), 26 + None => None, 27 + }; 28 + let system_id = match arr.get(3) { 29 + Some(JsonValue::Null) => None, 30 + Some(v) => v.as_str().map(String::from), 31 + None => None, 32 + }; 33 + let correctness = arr.get(4).and_then(|v| v.as_bool()).unwrap_or(true); 34 + Some(Token::Doctype { 35 + name, 36 + public_id, 37 + system_id, 38 + force_quirks: !correctness, 39 + }) 40 + } 41 + "StartTag" => { 42 + let name = arr.get(1)?.as_str()?.to_string(); 43 + let mut attributes = Vec::new(); 44 + if let Some(attrs_obj) = arr.get(2).and_then(|v| v.as_object()) { 45 + for (k, v) in attrs_obj { 46 + let val_str = v.as_str().unwrap_or("").to_string(); 47 + attributes.push((k.clone(), val_str)); 48 + } 49 + } 50 + let self_closing = arr.get(3).and_then(|v| v.as_bool()).unwrap_or(false); 51 + Some(Token::StartTag { 52 + name, 53 + attributes, 54 + self_closing, 55 + }) 56 + } 57 + "EndTag" => { 58 + let name = arr.get(1)?.as_str()?.to_string(); 59 + Some(Token::EndTag { name }) 60 + } 61 + "Character" => { 62 + let data = arr.get(1)?.as_str()?.to_string(); 63 + Some(Token::Character(data)) 64 + } 65 + "Comment" => { 66 + let data = arr.get(1)?.as_str()?.to_string(); 67 + Some(Token::Comment(data)) 68 + } 69 + _ => None, 70 + } 71 + } 72 + 73 + /// Apply double-escaping as described in the html5lib test format. 74 + /// When `doubleEscaped` is true, the input and expected strings contain 75 + /// literal `\uXXXX` sequences that should be decoded. 76 + fn unescape_double_escaped(s: &str) -> String { 77 + let mut result = String::new(); 78 + let mut chars = s.chars(); 79 + while let Some(ch) = chars.next() { 80 + if ch == '\\' { 81 + match chars.next() { 82 + Some('u') => { 83 + let hex: String = chars.by_ref().take(4).collect(); 84 + if hex.len() == 4 { 85 + if let Ok(cp) = u32::from_str_radix(&hex, 16) { 86 + if let Some(c) = char::from_u32(cp) { 87 + result.push(c); 88 + continue; 89 + } 90 + } 91 + } 92 + result.push('\\'); 93 + result.push('u'); 94 + result.push_str(&hex); 95 + } 96 + Some(other) => { 97 + result.push('\\'); 98 + result.push(other); 99 + } 100 + None => { 101 + result.push('\\'); 102 + } 103 + } 104 + } else { 105 + result.push(ch); 106 + } 107 + } 108 + result 109 + } 110 + 111 + /// Run a single test case and return whether it passed. 112 + fn run_test_case(test: &JsonValue, double_escaped: bool) -> bool { 113 + let input = match test.get("input").and_then(|v| v.as_str()) { 114 + Some(s) => { 115 + if double_escaped { 116 + unescape_double_escaped(s) 117 + } else { 118 + s.to_string() 119 + } 120 + } 121 + None => return false, 122 + }; 123 + 124 + let expected_output = match test.get("output").and_then(|v| v.as_array()) { 125 + Some(arr) => arr, 126 + None => return false, 127 + }; 128 + 129 + // Convert expected output tokens. 130 + let expected_tokens: Vec<Token> = expected_output 131 + .iter() 132 + .filter_map(|tok_json| { 133 + let mut tok = json_to_token(tok_json)?; 134 + if double_escaped { 135 + match &mut tok { 136 + Token::Character(ref mut s) => *s = unescape_double_escaped(s), 137 + Token::Comment(ref mut s) => *s = unescape_double_escaped(s), 138 + _ => {} 139 + } 140 + } 141 + Some(tok) 142 + }) 143 + .collect(); 144 + 145 + // Run our tokenizer. 146 + let actual_tokens = we_html::tokenize(&input); 147 + 148 + actual_tokens == expected_tokens 149 + } 150 + 151 + /// Load and run all test cases from a single html5lib tokenizer test file. 152 + fn run_test_file(path: &std::path::Path) -> (usize, usize, usize) { 153 + let content = match std::fs::read_to_string(path) { 154 + Ok(c) => c, 155 + Err(e) => { 156 + eprintln!(" failed to read {}: {}", path.display(), e); 157 + return (0, 0, 1); 158 + } 159 + }; 160 + 161 + let root = match json::parse(&content) { 162 + Ok(v) => v, 163 + Err(e) => { 164 + eprintln!(" failed to parse {}: {}", path.display(), e); 165 + return (0, 0, 1); 166 + } 167 + }; 168 + 169 + let tests = match root.get("tests").and_then(|v| v.as_array()) { 170 + Some(t) => t, 171 + None => { 172 + eprintln!(" no 'tests' array in {}", path.display()); 173 + return (0, 0, 1); 174 + } 175 + }; 176 + 177 + let mut pass = 0; 178 + let mut fail = 0; 179 + let mut skip = 0; 180 + 181 + for test in tests { 182 + let desc = test 183 + .get("description") 184 + .and_then(|v| v.as_str()) 185 + .unwrap_or("<no description>"); 186 + 187 + let double_escaped = test 188 + .get("doubleEscaped") 189 + .and_then(|v| v.as_bool()) 190 + .unwrap_or(false); 191 + 192 + // If the test specifies initialStates, we run once per state. 193 + // For now we only support the default "Data state" so skip others. 194 + if let Some(states) = test.get("initialStates").and_then(|v| v.as_array()) { 195 + let has_data_state = states.iter().any(|s| s.as_str() == Some("Data state")); 196 + if !has_data_state { 197 + skip += 1; 198 + continue; 199 + } 200 + } 201 + 202 + if run_test_case(test, double_escaped) { 203 + pass += 1; 204 + } else { 205 + fail += 1; 206 + // Only print first few failures to avoid noise. 207 + if fail <= 5 { 208 + eprintln!(" FAIL: {}", desc); 209 + } 210 + } 211 + } 212 + 213 + (pass, fail, skip) 214 + } 215 + 216 + #[test] 217 + fn html5lib_tokenizer_tests() { 218 + let test_dir = std::path::PathBuf::from(WORKSPACE_ROOT).join("tests/html5lib-tests/tokenizer"); 219 + 220 + if !test_dir.exists() { 221 + eprintln!( 222 + "html5lib-tests submodule not checked out at {}", 223 + test_dir.display() 224 + ); 225 + eprintln!("Run: git submodule update --init tests/html5lib-tests"); 226 + // Don't fail the test — the submodule might not be initialized. 227 + return; 228 + } 229 + 230 + let mut total_pass = 0; 231 + let mut total_fail = 0; 232 + let mut total_skip = 0; 233 + 234 + let mut entries: Vec<_> = std::fs::read_dir(&test_dir) 235 + .expect("failed to read tokenizer test dir") 236 + .filter_map(|e| e.ok()) 237 + .filter(|e| e.path().extension().map_or(false, |ext| ext == "test")) 238 + .collect(); 239 + entries.sort_by_key(|e| e.file_name()); 240 + 241 + for entry in &entries { 242 + let path = entry.path(); 243 + let name = path.file_name().unwrap().to_string_lossy(); 244 + let (pass, fail, skip) = run_test_file(&path); 245 + eprintln!("{}: {} pass, {} fail, {} skip", name, pass, fail, skip); 246 + total_pass += pass; 247 + total_fail += fail; 248 + total_skip += skip; 249 + } 250 + 251 + eprintln!(); 252 + eprintln!( 253 + "html5lib tokenizer totals: {} pass, {} fail, {} skip ({} total)", 254 + total_pass, 255 + total_fail, 256 + total_skip, 257 + total_pass + total_fail + total_skip 258 + ); 259 + 260 + // The test "passes" as a harness — it reports results but doesn't fail 261 + // the test suite until we have an implementation to measure against. 262 + // This lets CI always run and report progress. 263 + }

+339

crates/html/tests/json.rs

··· 1 + //! Minimal JSON parser for reading html5lib test fixtures. 2 + //! 3 + //! Supports the subset of JSON used by html5lib-tests: objects, arrays, 4 + //! strings (with escape sequences including `\uXXXX`), numbers, booleans, 5 + //! and null. 6 + 7 + #[derive(Debug, Clone, PartialEq)] 8 + pub enum JsonValue { 9 + Null, 10 + Bool(bool), 11 + Number(f64), 12 + Str(String), 13 + Array(Vec<JsonValue>), 14 + Object(Vec<(String, JsonValue)>), 15 + } 16 + 17 + impl JsonValue { 18 + pub fn as_str(&self) -> Option<&str> { 19 + match self { 20 + JsonValue::Str(s) => Some(s), 21 + _ => None, 22 + } 23 + } 24 + 25 + pub fn as_array(&self) -> Option<&[JsonValue]> { 26 + match self { 27 + JsonValue::Array(a) => Some(a), 28 + _ => None, 29 + } 30 + } 31 + 32 + pub fn as_object(&self) -> Option<&[(String, JsonValue)]> { 33 + match self { 34 + JsonValue::Object(o) => Some(o), 35 + _ => None, 36 + } 37 + } 38 + 39 + pub fn as_bool(&self) -> Option<bool> { 40 + match self { 41 + JsonValue::Bool(b) => Some(*b), 42 + _ => None, 43 + } 44 + } 45 + 46 + /// Look up a key in a JSON object. 47 + pub fn get(&self, key: &str) -> Option<&JsonValue> { 48 + match self { 49 + JsonValue::Object(pairs) => pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v), 50 + _ => None, 51 + } 52 + } 53 + } 54 + 55 + struct Parser<'a> { 56 + bytes: &'a [u8], 57 + pos: usize, 58 + } 59 + 60 + impl<'a> Parser<'a> { 61 + fn new(input: &'a str) -> Self { 62 + Self { 63 + bytes: input.as_bytes(), 64 + pos: 0, 65 + } 66 + } 67 + 68 + fn skip_ws(&mut self) { 69 + while self.pos < self.bytes.len() { 70 + match self.bytes[self.pos] { 71 + b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1, 72 + _ => break, 73 + } 74 + } 75 + } 76 + 77 + fn peek(&self) -> Option<u8> { 78 + self.bytes.get(self.pos).copied() 79 + } 80 + 81 + fn advance(&mut self) -> Option<u8> { 82 + let b = self.bytes.get(self.pos).copied()?; 83 + self.pos += 1; 84 + Some(b) 85 + } 86 + 87 + fn expect(&mut self, ch: u8) -> Result<(), String> { 88 + match self.advance() { 89 + Some(b) if b == ch => Ok(()), 90 + Some(b) => Err(format!( 91 + "expected '{}' at pos {}, got '{}'", 92 + ch as char, self.pos, b as char 93 + )), 94 + None => Err(format!( 95 + "expected '{}' at pos {}, got EOF", 96 + ch as char, self.pos 97 + )), 98 + } 99 + } 100 + 101 + fn parse_value(&mut self) -> Result<JsonValue, String> { 102 + self.skip_ws(); 103 + match self.peek() { 104 + Some(b'"') => self.parse_string().map(JsonValue::Str), 105 + Some(b'{') => self.parse_object(), 106 + Some(b'[') => self.parse_array(), 107 + Some(b't') => self.parse_literal("true", JsonValue::Bool(true)), 108 + Some(b'f') => self.parse_literal("false", JsonValue::Bool(false)), 109 + Some(b'n') => self.parse_literal("null", JsonValue::Null), 110 + Some(b'-') | Some(b'0'..=b'9') => self.parse_number(), 111 + Some(b) => Err(format!( 112 + "unexpected byte '{}' at pos {}", 113 + b as char, self.pos 114 + )), 115 + None => Err("unexpected EOF".into()), 116 + } 117 + } 118 + 119 + fn parse_string(&mut self) -> Result<String, String> { 120 + self.expect(b'"')?; 121 + let mut s = String::new(); 122 + loop { 123 + match self.advance() { 124 + Some(b'"') => return Ok(s), 125 + Some(b'\\') => match self.advance() { 126 + Some(b'"') => s.push('"'), 127 + Some(b'\\') => s.push('\\'), 128 + Some(b'/') => s.push('/'), 129 + Some(b'n') => s.push('\n'), 130 + Some(b'r') => s.push('\r'), 131 + Some(b't') => s.push('\t'), 132 + Some(b'b') => s.push('\u{0008}'), 133 + Some(b'f') => s.push('\u{000C}'), 134 + Some(b'u') => { 135 + let cp = self.parse_hex4()?; 136 + // Handle surrogate pairs. 137 + if (0xD800..=0xDBFF).contains(&cp) { 138 + // High surrogate — expect \uXXXX low surrogate. 139 + if self.advance() == Some(b'\\') && self.advance() == Some(b'u') { 140 + let lo = self.parse_hex4()?; 141 + if (0xDC00..=0xDFFF).contains(&lo) { 142 + let combined = 0x10000 143 + + ((cp as u32 - 0xD800) << 10) 144 + + (lo as u32 - 0xDC00); 145 + if let Some(ch) = char::from_u32(combined) { 146 + s.push(ch); 147 + } 148 + } 149 + } 150 + } else if let Some(ch) = char::from_u32(cp as u32) { 151 + s.push(ch); 152 + } 153 + } 154 + Some(b) => { 155 + s.push('\\'); 156 + s.push(b as char); 157 + } 158 + None => return Err("unexpected EOF in string escape".into()), 159 + }, 160 + Some(_) => { 161 + // We need to handle multi-byte UTF-8 properly. 162 + // Since we're working on bytes, back up and grab the char. 163 + self.pos -= 1; 164 + let rest = std::str::from_utf8(&self.bytes[self.pos..]) 165 + .map_err(|e| format!("invalid UTF-8: {}", e))?; 166 + let ch = rest.chars().next().unwrap(); 167 + self.pos += ch.len_utf8(); 168 + s.push(ch); 169 + } 170 + None => return Err("unexpected EOF in string".into()), 171 + } 172 + } 173 + } 174 + 175 + fn parse_hex4(&mut self) -> Result<u16, String> { 176 + let mut val: u16 = 0; 177 + for _ in 0..4 { 178 + let b = self.advance().ok_or("unexpected EOF in \\u escape")?; 179 + let digit = match b { 180 + b'0'..=b'9' => b - b'0', 181 + b'a'..=b'f' => b - b'a' + 10, 182 + b'A'..=b'F' => b - b'A' + 10, 183 + _ => return Err(format!("invalid hex digit '{}'", b as char)), 184 + }; 185 + val = val * 16 + digit as u16; 186 + } 187 + Ok(val) 188 + } 189 + 190 + fn parse_number(&mut self) -> Result<JsonValue, String> { 191 + let start = self.pos; 192 + if self.peek() == Some(b'-') { 193 + self.pos += 1; 194 + } 195 + while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 196 + self.pos += 1; 197 + } 198 + if self.pos < self.bytes.len() && self.bytes[self.pos] == b'.' { 199 + self.pos += 1; 200 + while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 201 + self.pos += 1; 202 + } 203 + } 204 + if self.pos < self.bytes.len() 205 + && (self.bytes[self.pos] == b'e' || self.bytes[self.pos] == b'E') 206 + { 207 + self.pos += 1; 208 + if self.pos < self.bytes.len() 209 + && (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-') 210 + { 211 + self.pos += 1; 212 + } 213 + while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() { 214 + self.pos += 1; 215 + } 216 + } 217 + let s = std::str::from_utf8(&self.bytes[start..self.pos]) 218 + .map_err(|e| format!("invalid UTF-8 in number: {}", e))?; 219 + let n: f64 = s 220 + .parse() 221 + .map_err(|e| format!("invalid number '{}': {}", s, e))?; 222 + Ok(JsonValue::Number(n)) 223 + } 224 + 225 + fn parse_object(&mut self) -> Result<JsonValue, String> { 226 + self.expect(b'{')?; 227 + self.skip_ws(); 228 + let mut pairs = Vec::new(); 229 + if self.peek() == Some(b'}') { 230 + self.pos += 1; 231 + return Ok(JsonValue::Object(pairs)); 232 + } 233 + loop { 234 + self.skip_ws(); 235 + let key = self.parse_string()?; 236 + self.skip_ws(); 237 + self.expect(b':')?; 238 + let val = self.parse_value()?; 239 + pairs.push((key, val)); 240 + self.skip_ws(); 241 + match self.peek() { 242 + Some(b',') => { 243 + self.pos += 1; 244 + } 245 + Some(b'}') => { 246 + self.pos += 1; 247 + return Ok(JsonValue::Object(pairs)); 248 + } 249 + _ => return Err(format!("expected ',' or '}}' at pos {}", self.pos)), 250 + } 251 + } 252 + } 253 + 254 + fn parse_array(&mut self) -> Result<JsonValue, String> { 255 + self.expect(b'[')?; 256 + self.skip_ws(); 257 + let mut elems = Vec::new(); 258 + if self.peek() == Some(b']') { 259 + self.pos += 1; 260 + return Ok(JsonValue::Array(elems)); 261 + } 262 + loop { 263 + let val = self.parse_value()?; 264 + elems.push(val); 265 + self.skip_ws(); 266 + match self.peek() { 267 + Some(b',') => { 268 + self.pos += 1; 269 + } 270 + Some(b']') => { 271 + self.pos += 1; 272 + return Ok(JsonValue::Array(elems)); 273 + } 274 + _ => return Err(format!("expected ',' or ']' at pos {}", self.pos)), 275 + } 276 + } 277 + } 278 + 279 + fn parse_literal(&mut self, expected: &str, value: JsonValue) -> Result<JsonValue, String> { 280 + for b in expected.bytes() { 281 + match self.advance() { 282 + Some(got) if got == b => {} 283 + _ => return Err(format!("expected literal '{}'", expected)), 284 + } 285 + } 286 + Ok(value) 287 + } 288 + } 289 + 290 + /// Parse a JSON string into a `JsonValue`. 291 + pub fn parse(input: &str) -> Result<JsonValue, String> { 292 + let mut parser = Parser::new(input); 293 + let val = parser.parse_value()?; 294 + parser.skip_ws(); 295 + if parser.pos != parser.bytes.len() { 296 + return Err(format!("trailing data at pos {}", parser.pos)); 297 + } 298 + Ok(val) 299 + } 300 + 301 + #[cfg(test)] 302 + mod tests { 303 + use super::*; 304 + 305 + #[test] 306 + fn parse_simple_object() { 307 + let val = parse(r#"{"a": 1, "b": "hello"}"#).unwrap(); 308 + assert_eq!(val.get("a"), Some(&JsonValue::Number(1.0))); 309 + assert_eq!(val.get("b"), Some(&JsonValue::Str("hello".into()))); 310 + } 311 + 312 + #[test] 313 + fn parse_array() { 314 + let val = parse(r#"[1, "two", true, null]"#).unwrap(); 315 + let arr = val.as_array().unwrap(); 316 + assert_eq!(arr.len(), 4); 317 + assert_eq!(arr[2], JsonValue::Bool(true)); 318 + assert_eq!(arr[3], JsonValue::Null); 319 + } 320 + 321 + #[test] 322 + fn parse_nested() { 323 + let val = parse(r#"{"tests": [{"desc": "a"}]}"#).unwrap(); 324 + let tests = val.get("tests").unwrap().as_array().unwrap(); 325 + assert_eq!(tests.len(), 1); 326 + } 327 + 328 + #[test] 329 + fn parse_string_escapes() { 330 + let val = parse(r#""hello\nworld""#).unwrap(); 331 + assert_eq!(val.as_str().unwrap(), "hello\nworld"); 332 + } 333 + 334 + #[test] 335 + fn parse_unicode_escape() { 336 + let val = parse(r#""\u0041""#).unwrap(); 337 + assert_eq!(val.as_str().unwrap(), "A"); 338 + } 339 + }

+31

crates/js/src/lib.rs

··· 1 1 //! JavaScript engine — lexer, parser, bytecode, register VM, GC, JIT (AArch64). 2 + 3 + use std::fmt; 4 + 5 + /// An error produced by the JavaScript engine. 6 + #[derive(Debug)] 7 + pub enum JsError { 8 + /// The engine does not yet support this feature or syntax. 9 + NotImplemented, 10 + /// A parse/syntax error in the source. 11 + SyntaxError(String), 12 + /// A runtime error during execution. 13 + RuntimeError(String), 14 + } 15 + 16 + impl fmt::Display for JsError { 17 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 18 + match self { 19 + JsError::NotImplemented => write!(f, "not implemented"), 20 + JsError::SyntaxError(msg) => write!(f, "SyntaxError: {}", msg), 21 + JsError::RuntimeError(msg) => write!(f, "RuntimeError: {}", msg), 22 + } 23 + } 24 + } 25 + 26 + /// Evaluate a JavaScript source string and return the completion value. 27 + /// 28 + /// This is a stub that always returns `NotImplemented`. The real 29 + /// implementation will lex, parse, compile to bytecode, and execute. 30 + pub fn evaluate(_source: &str) -> Result<(), JsError> { 31 + Err(JsError::NotImplemented) 32 + }

+304

crates/js/tests/test262.rs

··· 1 + //! Test262 test harness. 2 + //! 3 + //! Walks the Test262 test suite and runs each test case against our JavaScript 4 + //! engine. Reports pass/fail/skip counts. 5 + //! 6 + //! Run with: `cargo test -p we-js --test test262` 7 + 8 + /// Workspace root relative to the crate directory. 9 + const WORKSPACE_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../"); 10 + 11 + /// Metadata extracted from a Test262 test file's YAML frontmatter. 12 + struct TestMeta { 13 + /// If true, the test expects a parse/early error. 14 + negative_phase_parse: bool, 15 + /// If true, the test expects a runtime error. 16 + negative_phase_runtime: bool, 17 + /// The expected error type for negative tests (e.g. "SyntaxError"). 18 + negative_type: Option<String>, 19 + /// If true, this is an async test. 20 + is_async: bool, 21 + /// If true, this test should be run as a module. 22 + is_module: bool, 23 + /// If true, skip the harness preamble. 24 + is_raw: bool, 25 + /// Required features. 26 + features: Vec<String>, 27 + /// Required harness includes. 28 + includes: Vec<String>, 29 + } 30 + 31 + impl TestMeta { 32 + fn should_skip(&self) -> bool { 33 + // Skip async tests and module tests for now. 34 + self.is_async || self.is_module 35 + } 36 + } 37 + 38 + /// Parse the YAML-ish frontmatter from a Test262 test file. 39 + /// 40 + /// The frontmatter is between `/*---` and `---*/`. 41 + fn parse_frontmatter(source: &str) -> TestMeta { 42 + let mut meta = TestMeta { 43 + negative_phase_parse: false, 44 + negative_phase_runtime: false, 45 + negative_type: None, 46 + is_async: false, 47 + is_module: false, 48 + is_raw: false, 49 + features: Vec::new(), 50 + includes: Vec::new(), 51 + }; 52 + 53 + let start = match source.find("/*---") { 54 + Some(i) => i + 5, 55 + None => return meta, 56 + }; 57 + let end = match source[start..].find("---*/") { 58 + Some(i) => start + i, 59 + None => return meta, 60 + }; 61 + let yaml = &source[start..end]; 62 + 63 + // Very simple line-by-line YAML extraction. 64 + let mut in_negative = false; 65 + let mut in_features = false; 66 + let mut in_includes = false; 67 + let mut in_flags = false; 68 + 69 + for line in yaml.lines() { 70 + let trimmed = line.trim(); 71 + 72 + // Detect top-level keys (not indented or with specific indent). 73 + if !trimmed.is_empty() && !trimmed.starts_with('-') && !line.starts_with(' ') { 74 + in_negative = false; 75 + in_features = false; 76 + in_includes = false; 77 + in_flags = false; 78 + } 79 + 80 + if trimmed.starts_with("negative:") { 81 + in_negative = true; 82 + continue; 83 + } 84 + if trimmed.starts_with("features:") { 85 + in_features = true; 86 + // Check for inline list: features: [a, b] 87 + if let Some(rest) = trimmed.strip_prefix("features:") { 88 + let rest = rest.trim(); 89 + if rest.starts_with('[') && rest.ends_with(']') { 90 + let inner = &rest[1..rest.len() - 1]; 91 + for item in inner.split(',') { 92 + let item = item.trim(); 93 + if !item.is_empty() { 94 + meta.features.push(item.to_string()); 95 + } 96 + } 97 + in_features = false; 98 + } 99 + } 100 + continue; 101 + } 102 + if trimmed.starts_with("includes:") { 103 + in_includes = true; 104 + if let Some(rest) = trimmed.strip_prefix("includes:") { 105 + let rest = rest.trim(); 106 + if rest.starts_with('[') && rest.ends_with(']') { 107 + let inner = &rest[1..rest.len() - 1]; 108 + for item in inner.split(',') { 109 + let item = item.trim(); 110 + if !item.is_empty() { 111 + meta.includes.push(item.to_string()); 112 + } 113 + } 114 + in_includes = false; 115 + } 116 + } 117 + continue; 118 + } 119 + if trimmed.starts_with("flags:") { 120 + in_flags = true; 121 + if let Some(rest) = trimmed.strip_prefix("flags:") { 122 + let rest = rest.trim(); 123 + if rest.starts_with('[') && rest.ends_with(']') { 124 + let inner = &rest[1..rest.len() - 1]; 125 + for item in inner.split(',') { 126 + let flag = item.trim(); 127 + match flag { 128 + "async" => meta.is_async = true, 129 + "module" => meta.is_module = true, 130 + "raw" => meta.is_raw = true, 131 + _ => {} 132 + } 133 + } 134 + in_flags = false; 135 + } 136 + } 137 + continue; 138 + } 139 + 140 + // Handle list items under current key. 141 + if let Some(item) = trimmed.strip_prefix("- ") { 142 + if in_features { 143 + meta.features.push(item.to_string()); 144 + } else if in_includes { 145 + meta.includes.push(item.to_string()); 146 + } else if in_flags { 147 + match item { 148 + "async" => meta.is_async = true, 149 + "module" => meta.is_module = true, 150 + "raw" => meta.is_raw = true, 151 + _ => {} 152 + } 153 + } 154 + continue; 155 + } 156 + 157 + // Handle sub-keys under negative. 158 + if in_negative { 159 + if let Some(rest) = trimmed.strip_prefix("phase:") { 160 + let phase = rest.trim(); 161 + match phase { 162 + "parse" | "early" => meta.negative_phase_parse = true, 163 + "runtime" | "resolution" => meta.negative_phase_runtime = true, 164 + _ => {} 165 + } 166 + } 167 + if let Some(rest) = trimmed.strip_prefix("type:") { 168 + meta.negative_type = Some(rest.trim().to_string()); 169 + } 170 + } 171 + } 172 + 173 + meta 174 + } 175 + 176 + /// Recursively collect all `.js` test files under a directory. 177 + fn collect_test_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) { 178 + let entries = match std::fs::read_dir(dir) { 179 + Ok(e) => e, 180 + Err(_) => return, 181 + }; 182 + let mut entries: Vec<_> = entries.filter_map(|e| e.ok()).collect(); 183 + entries.sort_by_key(|e| e.file_name()); 184 + 185 + for entry in entries { 186 + let path = entry.path(); 187 + if path.is_dir() { 188 + collect_test_files(&path, files); 189 + } else if path.extension().map_or(false, |e| e == "js") { 190 + // Skip _FIXTURE files (test helpers, not tests themselves). 191 + let name = path.file_name().unwrap().to_string_lossy(); 192 + if !name.contains("_FIXTURE") { 193 + files.push(path); 194 + } 195 + } 196 + } 197 + } 198 + 199 + /// Run a single Test262 test file. Returns (pass, fail, skip). 200 + fn run_test(path: &std::path::Path) -> (usize, usize, usize) { 201 + let source = match std::fs::read_to_string(path) { 202 + Ok(s) => s, 203 + Err(_) => return (0, 0, 1), 204 + }; 205 + 206 + let meta = parse_frontmatter(&source); 207 + 208 + if meta.should_skip() { 209 + return (0, 0, 1); 210 + } 211 + 212 + // For negative parse tests, if our evaluate returns an error, that's a pass. 213 + // For positive tests, evaluate should succeed (return Ok). 214 + let result = we_js::evaluate(&source); 215 + 216 + if meta.negative_phase_parse { 217 + // We expect a parse error. If our engine returns any error, count as pass. 218 + match result { 219 + Err(_) => (1, 0, 0), 220 + Ok(()) => (0, 1, 0), 221 + } 222 + } else { 223 + // We expect success. 224 + match result { 225 + Ok(()) => (1, 0, 0), 226 + Err(_) => (0, 1, 0), 227 + } 228 + } 229 + } 230 + 231 + #[test] 232 + fn test262_language_tests() { 233 + let test_dir = std::path::PathBuf::from(WORKSPACE_ROOT).join("tests/test262/test/language"); 234 + 235 + if !test_dir.exists() { 236 + eprintln!( 237 + "test262 submodule not checked out at {}", 238 + test_dir.display() 239 + ); 240 + eprintln!("Run: git submodule update --init tests/test262"); 241 + return; 242 + } 243 + 244 + let mut files = Vec::new(); 245 + collect_test_files(&test_dir, &mut files); 246 + 247 + let mut total_pass = 0; 248 + let mut total_fail = 0; 249 + let mut total_skip = 0; 250 + 251 + // Group results by top-level subdirectory for reporting. 252 + let mut current_group = String::new(); 253 + let mut group_pass = 0; 254 + let mut group_fail = 0; 255 + let mut group_skip = 0; 256 + 257 + for path in &files { 258 + // Determine the top-level group (e.g. "expressions", "literals"). 259 + let rel = path.strip_prefix(&test_dir).unwrap_or(path); 260 + let group = rel 261 + .components() 262 + .next() 263 + .map(|c| c.as_os_str().to_string_lossy().to_string()) 264 + .unwrap_or_default(); 265 + 266 + if group != current_group { 267 + if !current_group.is_empty() { 268 + eprintln!( 269 + " {}: {} pass, {} fail, {} skip", 270 + current_group, group_pass, group_fail, group_skip 271 + ); 272 + } 273 + current_group = group; 274 + group_pass = 0; 275 + group_fail = 0; 276 + group_skip = 0; 277 + } 278 + 279 + let (p, f, s) = run_test(path); 280 + group_pass += p; 281 + group_fail += f; 282 + group_skip += s; 283 + total_pass += p; 284 + total_fail += f; 285 + total_skip += s; 286 + } 287 + 288 + // Print last group. 289 + if !current_group.is_empty() { 290 + eprintln!( 291 + " {}: {} pass, {} fail, {} skip", 292 + current_group, group_pass, group_fail, group_skip 293 + ); 294 + } 295 + 296 + eprintln!(); 297 + eprintln!( 298 + "Test262 language totals: {} pass, {} fail, {} skip ({} total)", 299 + total_pass, 300 + total_fail, 301 + total_skip, 302 + total_pass + total_fail + total_skip 303 + ); 304 + }