···11//! HTML5 tokenizer and tree builder.
22+33+/// A token emitted by the HTML tokenizer.
44+#[derive(Debug, Clone, PartialEq)]
55+pub enum Token {
66+ /// `<!DOCTYPE name public_id system_id>`
77+ Doctype {
88+ name: Option<String>,
99+ public_id: Option<String>,
1010+ system_id: Option<String>,
1111+ force_quirks: bool,
1212+ },
1313+ /// `<tag attr="val">`
1414+ StartTag {
1515+ name: String,
1616+ attributes: Vec<(String, String)>,
1717+ self_closing: bool,
1818+ },
1919+ /// `</tag>`
2020+ EndTag { name: String },
2121+ /// Character data (may be coalesced).
2222+ Character(String),
2323+ /// `<!-- comment -->`
2424+ Comment(String),
2525+ /// End of file.
2626+ Eof,
2727+}
2828+2929+/// Tokenize an HTML input string into a sequence of tokens.
3030+///
3131+/// This is a stub that returns an empty `Vec`. The real implementation
3232+/// will be a spec-compliant HTML5 tokenizer state machine.
3333+pub fn tokenize(_input: &str) -> Vec<Token> {
3434+ Vec::new()
3535+}
+263
crates/html/tests/html5lib_tokenizer.rs
···11+//! html5lib tokenizer test harness.
22+//!
33+//! Reads JSON test files from `tests/html5lib-tests/tokenizer/` and runs each
44+//! test case against our HTML tokenizer. Reports pass/fail/skip counts.
55+//!
66+//! Run with: `cargo test -p we-html --test html5lib_tokenizer`
77+88+mod json;
99+1010+use json::JsonValue;
1111+use we_html::Token;
1212+1313+/// Workspace root relative to the crate directory.
1414+const WORKSPACE_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../");
1515+1616+/// Convert a JSON output token (array) into our `Token` type for comparison.
1717+fn json_to_token(val: &JsonValue) -> Option<Token> {
1818+ let arr = val.as_array()?;
1919+ let kind = arr.first()?.as_str()?;
2020+ match kind {
2121+ "DOCTYPE" => {
2222+ let name = arr.get(1).and_then(|v| v.as_str()).map(String::from);
2323+ let public_id = match arr.get(2) {
2424+ Some(JsonValue::Null) => None,
2525+ Some(v) => v.as_str().map(String::from),
2626+ None => None,
2727+ };
2828+ let system_id = match arr.get(3) {
2929+ Some(JsonValue::Null) => None,
3030+ Some(v) => v.as_str().map(String::from),
3131+ None => None,
3232+ };
3333+ let correctness = arr.get(4).and_then(|v| v.as_bool()).unwrap_or(true);
3434+ Some(Token::Doctype {
3535+ name,
3636+ public_id,
3737+ system_id,
3838+ force_quirks: !correctness,
3939+ })
4040+ }
4141+ "StartTag" => {
4242+ let name = arr.get(1)?.as_str()?.to_string();
4343+ let mut attributes = Vec::new();
4444+ if let Some(attrs_obj) = arr.get(2).and_then(|v| v.as_object()) {
4545+ for (k, v) in attrs_obj {
4646+ let val_str = v.as_str().unwrap_or("").to_string();
4747+ attributes.push((k.clone(), val_str));
4848+ }
4949+ }
5050+ let self_closing = arr.get(3).and_then(|v| v.as_bool()).unwrap_or(false);
5151+ Some(Token::StartTag {
5252+ name,
5353+ attributes,
5454+ self_closing,
5555+ })
5656+ }
5757+ "EndTag" => {
5858+ let name = arr.get(1)?.as_str()?.to_string();
5959+ Some(Token::EndTag { name })
6060+ }
6161+ "Character" => {
6262+ let data = arr.get(1)?.as_str()?.to_string();
6363+ Some(Token::Character(data))
6464+ }
6565+ "Comment" => {
6666+ let data = arr.get(1)?.as_str()?.to_string();
6767+ Some(Token::Comment(data))
6868+ }
6969+ _ => None,
7070+ }
7171+}
7272+7373+/// Apply double-escaping as described in the html5lib test format.
7474+/// When `doubleEscaped` is true, the input and expected strings contain
7575+/// literal `\uXXXX` sequences that should be decoded.
7676+fn unescape_double_escaped(s: &str) -> String {
7777+ let mut result = String::new();
7878+ let mut chars = s.chars();
7979+ while let Some(ch) = chars.next() {
8080+ if ch == '\\' {
8181+ match chars.next() {
8282+ Some('u') => {
8383+ let hex: String = chars.by_ref().take(4).collect();
8484+ if hex.len() == 4 {
8585+ if let Ok(cp) = u32::from_str_radix(&hex, 16) {
8686+ if let Some(c) = char::from_u32(cp) {
8787+ result.push(c);
8888+ continue;
8989+ }
9090+ }
9191+ }
9292+ result.push('\\');
9393+ result.push('u');
9494+ result.push_str(&hex);
9595+ }
9696+ Some(other) => {
9797+ result.push('\\');
9898+ result.push(other);
9999+ }
100100+ None => {
101101+ result.push('\\');
102102+ }
103103+ }
104104+ } else {
105105+ result.push(ch);
106106+ }
107107+ }
108108+ result
109109+}
110110+111111+/// Run a single test case and return whether it passed.
112112+fn run_test_case(test: &JsonValue, double_escaped: bool) -> bool {
113113+ let input = match test.get("input").and_then(|v| v.as_str()) {
114114+ Some(s) => {
115115+ if double_escaped {
116116+ unescape_double_escaped(s)
117117+ } else {
118118+ s.to_string()
119119+ }
120120+ }
121121+ None => return false,
122122+ };
123123+124124+ let expected_output = match test.get("output").and_then(|v| v.as_array()) {
125125+ Some(arr) => arr,
126126+ None => return false,
127127+ };
128128+129129+ // Convert expected output tokens.
130130+ let expected_tokens: Vec<Token> = expected_output
131131+ .iter()
132132+ .filter_map(|tok_json| {
133133+ let mut tok = json_to_token(tok_json)?;
134134+ if double_escaped {
135135+ match &mut tok {
136136+ Token::Character(ref mut s) => *s = unescape_double_escaped(s),
137137+ Token::Comment(ref mut s) => *s = unescape_double_escaped(s),
138138+ _ => {}
139139+ }
140140+ }
141141+ Some(tok)
142142+ })
143143+ .collect();
144144+145145+ // Run our tokenizer.
146146+ let actual_tokens = we_html::tokenize(&input);
147147+148148+ actual_tokens == expected_tokens
149149+}
150150+151151+/// Load and run all test cases from a single html5lib tokenizer test file.
152152+fn run_test_file(path: &std::path::Path) -> (usize, usize, usize) {
153153+ let content = match std::fs::read_to_string(path) {
154154+ Ok(c) => c,
155155+ Err(e) => {
156156+ eprintln!(" failed to read {}: {}", path.display(), e);
157157+ return (0, 0, 1);
158158+ }
159159+ };
160160+161161+ let root = match json::parse(&content) {
162162+ Ok(v) => v,
163163+ Err(e) => {
164164+ eprintln!(" failed to parse {}: {}", path.display(), e);
165165+ return (0, 0, 1);
166166+ }
167167+ };
168168+169169+ let tests = match root.get("tests").and_then(|v| v.as_array()) {
170170+ Some(t) => t,
171171+ None => {
172172+ eprintln!(" no 'tests' array in {}", path.display());
173173+ return (0, 0, 1);
174174+ }
175175+ };
176176+177177+ let mut pass = 0;
178178+ let mut fail = 0;
179179+ let mut skip = 0;
180180+181181+ for test in tests {
182182+ let desc = test
183183+ .get("description")
184184+ .and_then(|v| v.as_str())
185185+ .unwrap_or("<no description>");
186186+187187+ let double_escaped = test
188188+ .get("doubleEscaped")
189189+ .and_then(|v| v.as_bool())
190190+ .unwrap_or(false);
191191+192192+ // If the test specifies initialStates, we run once per state.
193193+ // For now we only support the default "Data state" so skip others.
194194+ if let Some(states) = test.get("initialStates").and_then(|v| v.as_array()) {
195195+ let has_data_state = states.iter().any(|s| s.as_str() == Some("Data state"));
196196+ if !has_data_state {
197197+ skip += 1;
198198+ continue;
199199+ }
200200+ }
201201+202202+ if run_test_case(test, double_escaped) {
203203+ pass += 1;
204204+ } else {
205205+ fail += 1;
206206+ // Only print first few failures to avoid noise.
207207+ if fail <= 5 {
208208+ eprintln!(" FAIL: {}", desc);
209209+ }
210210+ }
211211+ }
212212+213213+ (pass, fail, skip)
214214+}
215215+216216+#[test]
217217+fn html5lib_tokenizer_tests() {
218218+ let test_dir = std::path::PathBuf::from(WORKSPACE_ROOT).join("tests/html5lib-tests/tokenizer");
219219+220220+ if !test_dir.exists() {
221221+ eprintln!(
222222+ "html5lib-tests submodule not checked out at {}",
223223+ test_dir.display()
224224+ );
225225+ eprintln!("Run: git submodule update --init tests/html5lib-tests");
226226+ // Don't fail the test — the submodule might not be initialized.
227227+ return;
228228+ }
229229+230230+ let mut total_pass = 0;
231231+ let mut total_fail = 0;
232232+ let mut total_skip = 0;
233233+234234+ let mut entries: Vec<_> = std::fs::read_dir(&test_dir)
235235+ .expect("failed to read tokenizer test dir")
236236+ .filter_map(|e| e.ok())
237237+ .filter(|e| e.path().extension().map_or(false, |ext| ext == "test"))
238238+ .collect();
239239+ entries.sort_by_key(|e| e.file_name());
240240+241241+ for entry in &entries {
242242+ let path = entry.path();
243243+ let name = path.file_name().unwrap().to_string_lossy();
244244+ let (pass, fail, skip) = run_test_file(&path);
245245+ eprintln!("{}: {} pass, {} fail, {} skip", name, pass, fail, skip);
246246+ total_pass += pass;
247247+ total_fail += fail;
248248+ total_skip += skip;
249249+ }
250250+251251+ eprintln!();
252252+ eprintln!(
253253+ "html5lib tokenizer totals: {} pass, {} fail, {} skip ({} total)",
254254+ total_pass,
255255+ total_fail,
256256+ total_skip,
257257+ total_pass + total_fail + total_skip
258258+ );
259259+260260+ // The test "passes" as a harness — it reports results but doesn't fail
261261+ // the test suite until we have an implementation to measure against.
262262+ // This lets CI always run and report progress.
263263+}
+339
crates/html/tests/json.rs
···11+//! Minimal JSON parser for reading html5lib test fixtures.
22+//!
33+//! Supports the subset of JSON used by html5lib-tests: objects, arrays,
44+//! strings (with escape sequences including `\uXXXX`), numbers, booleans,
55+//! and null.
66+77+#[derive(Debug, Clone, PartialEq)]
88+pub enum JsonValue {
99+ Null,
1010+ Bool(bool),
1111+ Number(f64),
1212+ Str(String),
1313+ Array(Vec<JsonValue>),
1414+ Object(Vec<(String, JsonValue)>),
1515+}
1616+1717+impl JsonValue {
1818+ pub fn as_str(&self) -> Option<&str> {
1919+ match self {
2020+ JsonValue::Str(s) => Some(s),
2121+ _ => None,
2222+ }
2323+ }
2424+2525+ pub fn as_array(&self) -> Option<&[JsonValue]> {
2626+ match self {
2727+ JsonValue::Array(a) => Some(a),
2828+ _ => None,
2929+ }
3030+ }
3131+3232+ pub fn as_object(&self) -> Option<&[(String, JsonValue)]> {
3333+ match self {
3434+ JsonValue::Object(o) => Some(o),
3535+ _ => None,
3636+ }
3737+ }
3838+3939+ pub fn as_bool(&self) -> Option<bool> {
4040+ match self {
4141+ JsonValue::Bool(b) => Some(*b),
4242+ _ => None,
4343+ }
4444+ }
4545+4646+ /// Look up a key in a JSON object.
4747+ pub fn get(&self, key: &str) -> Option<&JsonValue> {
4848+ match self {
4949+ JsonValue::Object(pairs) => pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v),
5050+ _ => None,
5151+ }
5252+ }
5353+}
5454+5555+struct Parser<'a> {
5656+ bytes: &'a [u8],
5757+ pos: usize,
5858+}
5959+6060+impl<'a> Parser<'a> {
6161+ fn new(input: &'a str) -> Self {
6262+ Self {
6363+ bytes: input.as_bytes(),
6464+ pos: 0,
6565+ }
6666+ }
6767+6868+ fn skip_ws(&mut self) {
6969+ while self.pos < self.bytes.len() {
7070+ match self.bytes[self.pos] {
7171+ b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
7272+ _ => break,
7373+ }
7474+ }
7575+ }
7676+7777+ fn peek(&self) -> Option<u8> {
7878+ self.bytes.get(self.pos).copied()
7979+ }
8080+8181+ fn advance(&mut self) -> Option<u8> {
8282+ let b = self.bytes.get(self.pos).copied()?;
8383+ self.pos += 1;
8484+ Some(b)
8585+ }
8686+8787+ fn expect(&mut self, ch: u8) -> Result<(), String> {
8888+ match self.advance() {
8989+ Some(b) if b == ch => Ok(()),
9090+ Some(b) => Err(format!(
9191+ "expected '{}' at pos {}, got '{}'",
9292+ ch as char, self.pos, b as char
9393+ )),
9494+ None => Err(format!(
9595+ "expected '{}' at pos {}, got EOF",
9696+ ch as char, self.pos
9797+ )),
9898+ }
9999+ }
100100+101101+ fn parse_value(&mut self) -> Result<JsonValue, String> {
102102+ self.skip_ws();
103103+ match self.peek() {
104104+ Some(b'"') => self.parse_string().map(JsonValue::Str),
105105+ Some(b'{') => self.parse_object(),
106106+ Some(b'[') => self.parse_array(),
107107+ Some(b't') => self.parse_literal("true", JsonValue::Bool(true)),
108108+ Some(b'f') => self.parse_literal("false", JsonValue::Bool(false)),
109109+ Some(b'n') => self.parse_literal("null", JsonValue::Null),
110110+ Some(b'-') | Some(b'0'..=b'9') => self.parse_number(),
111111+ Some(b) => Err(format!(
112112+ "unexpected byte '{}' at pos {}",
113113+ b as char, self.pos
114114+ )),
115115+ None => Err("unexpected EOF".into()),
116116+ }
117117+ }
118118+119119+ fn parse_string(&mut self) -> Result<String, String> {
120120+ self.expect(b'"')?;
121121+ let mut s = String::new();
122122+ loop {
123123+ match self.advance() {
124124+ Some(b'"') => return Ok(s),
125125+ Some(b'\\') => match self.advance() {
126126+ Some(b'"') => s.push('"'),
127127+ Some(b'\\') => s.push('\\'),
128128+ Some(b'/') => s.push('/'),
129129+ Some(b'n') => s.push('\n'),
130130+ Some(b'r') => s.push('\r'),
131131+ Some(b't') => s.push('\t'),
132132+ Some(b'b') => s.push('\u{0008}'),
133133+ Some(b'f') => s.push('\u{000C}'),
134134+ Some(b'u') => {
135135+ let cp = self.parse_hex4()?;
136136+ // Handle surrogate pairs.
137137+ if (0xD800..=0xDBFF).contains(&cp) {
138138+ // High surrogate — expect \uXXXX low surrogate.
139139+ if self.advance() == Some(b'\\') && self.advance() == Some(b'u') {
140140+ let lo = self.parse_hex4()?;
141141+ if (0xDC00..=0xDFFF).contains(&lo) {
142142+ let combined = 0x10000
143143+ + ((cp as u32 - 0xD800) << 10)
144144+ + (lo as u32 - 0xDC00);
145145+ if let Some(ch) = char::from_u32(combined) {
146146+ s.push(ch);
147147+ }
148148+ }
149149+ }
150150+ } else if let Some(ch) = char::from_u32(cp as u32) {
151151+ s.push(ch);
152152+ }
153153+ }
154154+ Some(b) => {
155155+ s.push('\\');
156156+ s.push(b as char);
157157+ }
158158+ None => return Err("unexpected EOF in string escape".into()),
159159+ },
160160+ Some(_) => {
161161+ // We need to handle multi-byte UTF-8 properly.
162162+ // Since we're working on bytes, back up and grab the char.
163163+ self.pos -= 1;
164164+ let rest = std::str::from_utf8(&self.bytes[self.pos..])
165165+ .map_err(|e| format!("invalid UTF-8: {}", e))?;
166166+ let ch = rest.chars().next().unwrap();
167167+ self.pos += ch.len_utf8();
168168+ s.push(ch);
169169+ }
170170+ None => return Err("unexpected EOF in string".into()),
171171+ }
172172+ }
173173+ }
174174+175175+ fn parse_hex4(&mut self) -> Result<u16, String> {
176176+ let mut val: u16 = 0;
177177+ for _ in 0..4 {
178178+ let b = self.advance().ok_or("unexpected EOF in \\u escape")?;
179179+ let digit = match b {
180180+ b'0'..=b'9' => b - b'0',
181181+ b'a'..=b'f' => b - b'a' + 10,
182182+ b'A'..=b'F' => b - b'A' + 10,
183183+ _ => return Err(format!("invalid hex digit '{}'", b as char)),
184184+ };
185185+ val = val * 16 + digit as u16;
186186+ }
187187+ Ok(val)
188188+ }
189189+190190+ fn parse_number(&mut self) -> Result<JsonValue, String> {
191191+ let start = self.pos;
192192+ if self.peek() == Some(b'-') {
193193+ self.pos += 1;
194194+ }
195195+ while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
196196+ self.pos += 1;
197197+ }
198198+ if self.pos < self.bytes.len() && self.bytes[self.pos] == b'.' {
199199+ self.pos += 1;
200200+ while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
201201+ self.pos += 1;
202202+ }
203203+ }
204204+ if self.pos < self.bytes.len()
205205+ && (self.bytes[self.pos] == b'e' || self.bytes[self.pos] == b'E')
206206+ {
207207+ self.pos += 1;
208208+ if self.pos < self.bytes.len()
209209+ && (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-')
210210+ {
211211+ self.pos += 1;
212212+ }
213213+ while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
214214+ self.pos += 1;
215215+ }
216216+ }
217217+ let s = std::str::from_utf8(&self.bytes[start..self.pos])
218218+ .map_err(|e| format!("invalid UTF-8 in number: {}", e))?;
219219+ let n: f64 = s
220220+ .parse()
221221+ .map_err(|e| format!("invalid number '{}': {}", s, e))?;
222222+ Ok(JsonValue::Number(n))
223223+ }
224224+225225+ fn parse_object(&mut self) -> Result<JsonValue, String> {
226226+ self.expect(b'{')?;
227227+ self.skip_ws();
228228+ let mut pairs = Vec::new();
229229+ if self.peek() == Some(b'}') {
230230+ self.pos += 1;
231231+ return Ok(JsonValue::Object(pairs));
232232+ }
233233+ loop {
234234+ self.skip_ws();
235235+ let key = self.parse_string()?;
236236+ self.skip_ws();
237237+ self.expect(b':')?;
238238+ let val = self.parse_value()?;
239239+ pairs.push((key, val));
240240+ self.skip_ws();
241241+ match self.peek() {
242242+ Some(b',') => {
243243+ self.pos += 1;
244244+ }
245245+ Some(b'}') => {
246246+ self.pos += 1;
247247+ return Ok(JsonValue::Object(pairs));
248248+ }
249249+ _ => return Err(format!("expected ',' or '}}' at pos {}", self.pos)),
250250+ }
251251+ }
252252+ }
253253+254254+ fn parse_array(&mut self) -> Result<JsonValue, String> {
255255+ self.expect(b'[')?;
256256+ self.skip_ws();
257257+ let mut elems = Vec::new();
258258+ if self.peek() == Some(b']') {
259259+ self.pos += 1;
260260+ return Ok(JsonValue::Array(elems));
261261+ }
262262+ loop {
263263+ let val = self.parse_value()?;
264264+ elems.push(val);
265265+ self.skip_ws();
266266+ match self.peek() {
267267+ Some(b',') => {
268268+ self.pos += 1;
269269+ }
270270+ Some(b']') => {
271271+ self.pos += 1;
272272+ return Ok(JsonValue::Array(elems));
273273+ }
274274+ _ => return Err(format!("expected ',' or ']' at pos {}", self.pos)),
275275+ }
276276+ }
277277+ }
278278+279279+ fn parse_literal(&mut self, expected: &str, value: JsonValue) -> Result<JsonValue, String> {
280280+ for b in expected.bytes() {
281281+ match self.advance() {
282282+ Some(got) if got == b => {}
283283+ _ => return Err(format!("expected literal '{}'", expected)),
284284+ }
285285+ }
286286+ Ok(value)
287287+ }
288288+}
289289+290290+/// Parse a JSON string into a `JsonValue`.
291291+pub fn parse(input: &str) -> Result<JsonValue, String> {
292292+ let mut parser = Parser::new(input);
293293+ let val = parser.parse_value()?;
294294+ parser.skip_ws();
295295+ if parser.pos != parser.bytes.len() {
296296+ return Err(format!("trailing data at pos {}", parser.pos));
297297+ }
298298+ Ok(val)
299299+}
300300+301301+#[cfg(test)]
302302+mod tests {
303303+ use super::*;
304304+305305+ #[test]
306306+ fn parse_simple_object() {
307307+ let val = parse(r#"{"a": 1, "b": "hello"}"#).unwrap();
308308+ assert_eq!(val.get("a"), Some(&JsonValue::Number(1.0)));
309309+ assert_eq!(val.get("b"), Some(&JsonValue::Str("hello".into())));
310310+ }
311311+312312+ #[test]
313313+ fn parse_array() {
314314+ let val = parse(r#"[1, "two", true, null]"#).unwrap();
315315+ let arr = val.as_array().unwrap();
316316+ assert_eq!(arr.len(), 4);
317317+ assert_eq!(arr[2], JsonValue::Bool(true));
318318+ assert_eq!(arr[3], JsonValue::Null);
319319+ }
320320+321321+ #[test]
322322+ fn parse_nested() {
323323+ let val = parse(r#"{"tests": [{"desc": "a"}]}"#).unwrap();
324324+ let tests = val.get("tests").unwrap().as_array().unwrap();
325325+ assert_eq!(tests.len(), 1);
326326+ }
327327+328328+ #[test]
329329+ fn parse_string_escapes() {
330330+ let val = parse(r#""hello\nworld""#).unwrap();
331331+ assert_eq!(val.as_str().unwrap(), "hello\nworld");
332332+ }
333333+334334+ #[test]
335335+ fn parse_unicode_escape() {
336336+ let val = parse(r#""\u0041""#).unwrap();
337337+ assert_eq!(val.as_str().unwrap(), "A");
338338+ }
339339+}
+31
crates/js/src/lib.rs
···11//! JavaScript engine — lexer, parser, bytecode, register VM, GC, JIT (AArch64).
22+33+use std::fmt;
44+55+/// An error produced by the JavaScript engine.
66+#[derive(Debug)]
77+pub enum JsError {
88+ /// The engine does not yet support this feature or syntax.
99+ NotImplemented,
1010+ /// A parse/syntax error in the source.
1111+ SyntaxError(String),
1212+ /// A runtime error during execution.
1313+ RuntimeError(String),
1414+}
1515+1616+impl fmt::Display for JsError {
1717+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1818+ match self {
1919+ JsError::NotImplemented => write!(f, "not implemented"),
2020+ JsError::SyntaxError(msg) => write!(f, "SyntaxError: {}", msg),
2121+ JsError::RuntimeError(msg) => write!(f, "RuntimeError: {}", msg),
2222+ }
2323+ }
2424+}
2525+2626+/// Evaluate a JavaScript source string and return the completion value.
2727+///
2828+/// This is a stub that always returns `NotImplemented`. The real
2929+/// implementation will lex, parse, compile to bytecode, and execute.
3030+pub fn evaluate(_source: &str) -> Result<(), JsError> {
3131+ Err(JsError::NotImplemented)
3232+}
+304
crates/js/tests/test262.rs
···11+//! Test262 test harness.
22+//!
33+//! Walks the Test262 test suite and runs each test case against our JavaScript
44+//! engine. Reports pass/fail/skip counts.
55+//!
66+//! Run with: `cargo test -p we-js --test test262`
77+88+/// Workspace root relative to the crate directory.
99+const WORKSPACE_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../");
1010+1111+/// Metadata extracted from a Test262 test file's YAML frontmatter.
1212+struct TestMeta {
1313+ /// If true, the test expects a parse/early error.
1414+ negative_phase_parse: bool,
1515+ /// If true, the test expects a runtime error.
1616+ negative_phase_runtime: bool,
1717+ /// The expected error type for negative tests (e.g. "SyntaxError").
1818+ negative_type: Option<String>,
1919+ /// If true, this is an async test.
2020+ is_async: bool,
2121+ /// If true, this test should be run as a module.
2222+ is_module: bool,
2323+ /// If true, skip the harness preamble.
2424+ is_raw: bool,
2525+ /// Required features.
2626+ features: Vec<String>,
2727+ /// Required harness includes.
2828+ includes: Vec<String>,
2929+}
3030+3131+impl TestMeta {
3232+ fn should_skip(&self) -> bool {
3333+ // Skip async tests and module tests for now.
3434+ self.is_async || self.is_module
3535+ }
3636+}
3737+3838+/// Parse the YAML-ish frontmatter from a Test262 test file.
3939+///
4040+/// The frontmatter is between `/*---` and `---*/`.
4141+fn parse_frontmatter(source: &str) -> TestMeta {
4242+ let mut meta = TestMeta {
4343+ negative_phase_parse: false,
4444+ negative_phase_runtime: false,
4545+ negative_type: None,
4646+ is_async: false,
4747+ is_module: false,
4848+ is_raw: false,
4949+ features: Vec::new(),
5050+ includes: Vec::new(),
5151+ };
5252+5353+ let start = match source.find("/*---") {
5454+ Some(i) => i + 5,
5555+ None => return meta,
5656+ };
5757+ let end = match source[start..].find("---*/") {
5858+ Some(i) => start + i,
5959+ None => return meta,
6060+ };
6161+ let yaml = &source[start..end];
6262+6363+ // Very simple line-by-line YAML extraction.
6464+ let mut in_negative = false;
6565+ let mut in_features = false;
6666+ let mut in_includes = false;
6767+ let mut in_flags = false;
6868+6969+ for line in yaml.lines() {
7070+ let trimmed = line.trim();
7171+7272+ // Detect top-level keys (not indented or with specific indent).
7373+ if !trimmed.is_empty() && !trimmed.starts_with('-') && !line.starts_with(' ') {
7474+ in_negative = false;
7575+ in_features = false;
7676+ in_includes = false;
7777+ in_flags = false;
7878+ }
7979+8080+ if trimmed.starts_with("negative:") {
8181+ in_negative = true;
8282+ continue;
8383+ }
8484+ if trimmed.starts_with("features:") {
8585+ in_features = true;
8686+ // Check for inline list: features: [a, b]
8787+ if let Some(rest) = trimmed.strip_prefix("features:") {
8888+ let rest = rest.trim();
8989+ if rest.starts_with('[') && rest.ends_with(']') {
9090+ let inner = &rest[1..rest.len() - 1];
9191+ for item in inner.split(',') {
9292+ let item = item.trim();
9393+ if !item.is_empty() {
9494+ meta.features.push(item.to_string());
9595+ }
9696+ }
9797+ in_features = false;
9898+ }
9999+ }
100100+ continue;
101101+ }
102102+ if trimmed.starts_with("includes:") {
103103+ in_includes = true;
104104+ if let Some(rest) = trimmed.strip_prefix("includes:") {
105105+ let rest = rest.trim();
106106+ if rest.starts_with('[') && rest.ends_with(']') {
107107+ let inner = &rest[1..rest.len() - 1];
108108+ for item in inner.split(',') {
109109+ let item = item.trim();
110110+ if !item.is_empty() {
111111+ meta.includes.push(item.to_string());
112112+ }
113113+ }
114114+ in_includes = false;
115115+ }
116116+ }
117117+ continue;
118118+ }
119119+ if trimmed.starts_with("flags:") {
120120+ in_flags = true;
121121+ if let Some(rest) = trimmed.strip_prefix("flags:") {
122122+ let rest = rest.trim();
123123+ if rest.starts_with('[') && rest.ends_with(']') {
124124+ let inner = &rest[1..rest.len() - 1];
125125+ for item in inner.split(',') {
126126+ let flag = item.trim();
127127+ match flag {
128128+ "async" => meta.is_async = true,
129129+ "module" => meta.is_module = true,
130130+ "raw" => meta.is_raw = true,
131131+ _ => {}
132132+ }
133133+ }
134134+ in_flags = false;
135135+ }
136136+ }
137137+ continue;
138138+ }
139139+140140+ // Handle list items under current key.
141141+ if let Some(item) = trimmed.strip_prefix("- ") {
142142+ if in_features {
143143+ meta.features.push(item.to_string());
144144+ } else if in_includes {
145145+ meta.includes.push(item.to_string());
146146+ } else if in_flags {
147147+ match item {
148148+ "async" => meta.is_async = true,
149149+ "module" => meta.is_module = true,
150150+ "raw" => meta.is_raw = true,
151151+ _ => {}
152152+ }
153153+ }
154154+ continue;
155155+ }
156156+157157+ // Handle sub-keys under negative.
158158+ if in_negative {
159159+ if let Some(rest) = trimmed.strip_prefix("phase:") {
160160+ let phase = rest.trim();
161161+ match phase {
162162+ "parse" | "early" => meta.negative_phase_parse = true,
163163+ "runtime" | "resolution" => meta.negative_phase_runtime = true,
164164+ _ => {}
165165+ }
166166+ }
167167+ if let Some(rest) = trimmed.strip_prefix("type:") {
168168+ meta.negative_type = Some(rest.trim().to_string());
169169+ }
170170+ }
171171+ }
172172+173173+ meta
174174+}
175175+176176+/// Recursively collect all `.js` test files under a directory.
177177+fn collect_test_files(dir: &std::path::Path, files: &mut Vec<std::path::PathBuf>) {
178178+ let entries = match std::fs::read_dir(dir) {
179179+ Ok(e) => e,
180180+ Err(_) => return,
181181+ };
182182+ let mut entries: Vec<_> = entries.filter_map(|e| e.ok()).collect();
183183+ entries.sort_by_key(|e| e.file_name());
184184+185185+ for entry in entries {
186186+ let path = entry.path();
187187+ if path.is_dir() {
188188+ collect_test_files(&path, files);
189189+ } else if path.extension().map_or(false, |e| e == "js") {
190190+ // Skip _FIXTURE files (test helpers, not tests themselves).
191191+ let name = path.file_name().unwrap().to_string_lossy();
192192+ if !name.contains("_FIXTURE") {
193193+ files.push(path);
194194+ }
195195+ }
196196+ }
197197+}
198198+199199+/// Run a single Test262 test file. Returns (pass, fail, skip).
200200+fn run_test(path: &std::path::Path) -> (usize, usize, usize) {
201201+ let source = match std::fs::read_to_string(path) {
202202+ Ok(s) => s,
203203+ Err(_) => return (0, 0, 1),
204204+ };
205205+206206+ let meta = parse_frontmatter(&source);
207207+208208+ if meta.should_skip() {
209209+ return (0, 0, 1);
210210+ }
211211+212212+ // For negative parse tests, if our evaluate returns an error, that's a pass.
213213+ // For positive tests, evaluate should succeed (return Ok).
214214+ let result = we_js::evaluate(&source);
215215+216216+ if meta.negative_phase_parse {
217217+ // We expect a parse error. If our engine returns any error, count as pass.
218218+ match result {
219219+ Err(_) => (1, 0, 0),
220220+ Ok(()) => (0, 1, 0),
221221+ }
222222+ } else {
223223+ // We expect success.
224224+ match result {
225225+ Ok(()) => (1, 0, 0),
226226+ Err(_) => (0, 1, 0),
227227+ }
228228+ }
229229+}
230230+231231+#[test]
232232+fn test262_language_tests() {
233233+ let test_dir = std::path::PathBuf::from(WORKSPACE_ROOT).join("tests/test262/test/language");
234234+235235+ if !test_dir.exists() {
236236+ eprintln!(
237237+ "test262 submodule not checked out at {}",
238238+ test_dir.display()
239239+ );
240240+ eprintln!("Run: git submodule update --init tests/test262");
241241+ return;
242242+ }
243243+244244+ let mut files = Vec::new();
245245+ collect_test_files(&test_dir, &mut files);
246246+247247+ let mut total_pass = 0;
248248+ let mut total_fail = 0;
249249+ let mut total_skip = 0;
250250+251251+ // Group results by top-level subdirectory for reporting.
252252+ let mut current_group = String::new();
253253+ let mut group_pass = 0;
254254+ let mut group_fail = 0;
255255+ let mut group_skip = 0;
256256+257257+ for path in &files {
258258+ // Determine the top-level group (e.g. "expressions", "literals").
259259+ let rel = path.strip_prefix(&test_dir).unwrap_or(path);
260260+ let group = rel
261261+ .components()
262262+ .next()
263263+ .map(|c| c.as_os_str().to_string_lossy().to_string())
264264+ .unwrap_or_default();
265265+266266+ if group != current_group {
267267+ if !current_group.is_empty() {
268268+ eprintln!(
269269+ " {}: {} pass, {} fail, {} skip",
270270+ current_group, group_pass, group_fail, group_skip
271271+ );
272272+ }
273273+ current_group = group;
274274+ group_pass = 0;
275275+ group_fail = 0;
276276+ group_skip = 0;
277277+ }
278278+279279+ let (p, f, s) = run_test(path);
280280+ group_pass += p;
281281+ group_fail += f;
282282+ group_skip += s;
283283+ total_pass += p;
284284+ total_fail += f;
285285+ total_skip += s;
286286+ }
287287+288288+ // Print last group.
289289+ if !current_group.is_empty() {
290290+ eprintln!(
291291+ " {}: {} pass, {} fail, {} skip",
292292+ current_group, group_pass, group_fail, group_skip
293293+ );
294294+ }
295295+296296+ eprintln!();
297297+ eprintln!(
298298+ "Test262 language totals: {} pass, {} fail, {} skip ({} total)",
299299+ total_pass,
300300+ total_fail,
301301+ total_skip,
302302+ total_pass + total_fail + total_skip
303303+ );
304304+}