");
assert_eq!(
tokens,
vec![Token::StartTag {
name: "div".to_string(),
attributes: vec![("id".to_string(), "main".to_string())],
self_closing: false,
}]
);
}
#[test]
fn tag_with_unquoted_attribute() {
let tokens = tokenize("
");
assert_eq!(
tokens,
vec![Token::StartTag {
name: "input".to_string(),
attributes: vec![("type".to_string(), "text".to_string())],
self_closing: false,
}]
);
}
#[test]
fn comment() {
let tokens = tokenize("");
assert_eq!(tokens, vec![Token::Comment(" comment ".to_string())]);
}
#[test]
fn empty_comment() {
let tokens = tokenize("");
assert_eq!(tokens, vec![Token::Comment("".to_string())]);
}
#[test]
fn doctype_html() {
let tokens = tokenize("");
assert_eq!(
tokens,
vec![Token::Doctype {
name: Some("html".to_string()),
public_id: None,
system_id: None,
force_quirks: false,
}]
);
}
#[test]
fn doctype_case_insensitive() {
let tokens = tokenize("");
assert_eq!(
tokens,
vec![Token::Doctype {
name: Some("html".to_string()),
public_id: None,
system_id: None,
force_quirks: false,
}]
);
}
#[test]
fn char_ref_named() {
let tokens = tokenize("&<>"");
assert_eq!(tokens, vec![Token::Character("&<>\"".to_string())]);
}
#[test]
fn char_ref_numeric_decimal() {
let tokens = tokenize("A");
assert_eq!(tokens, vec![Token::Character("A".to_string())]);
}
#[test]
fn char_ref_numeric_hex() {
let tokens = tokenize("A");
assert_eq!(tokens, vec![Token::Character("A".to_string())]);
}
#[test]
fn char_ref_numeric_hex_uppercase() {
let tokens = tokenize("A");
assert_eq!(tokens, vec![Token::Character("A".to_string())]);
}
#[test]
fn full_html_document() {
let tokens =
tokenize("
TestHello
");
assert_eq!(
tokens,
vec![
Token::StartTag {
name: "html".to_string(),
attributes: vec![],
self_closing: false,
},
Token::StartTag {
name: "head".to_string(),
attributes: vec![],
self_closing: false,
},
Token::StartTag {
name: "title".to_string(),
attributes: vec![],
self_closing: false,
},
Token::Character("Test".to_string()),
Token::EndTag {
name: "title".to_string(),
},
Token::EndTag {
name: "head".to_string(),
},
Token::StartTag {
name: "body".to_string(),
attributes: vec![],
self_closing: false,
},
Token::StartTag {
name: "p".to_string(),
attributes: vec![],
self_closing: false,
},
Token::Character("Hello".to_string()),
Token::EndTag {
name: "p".to_string(),
},
Token::EndTag {
name: "body".to_string(),
},
Token::EndTag {
name: "html".to_string(),
},
]
);
}
#[test]
fn uppercase_tag_names_lowercased() {
let tokens = tokenize("
");
assert_eq!(
tokens,
vec![
Token::StartTag {
name: "div".to_string(),
attributes: vec![],
self_closing: false,
},
Token::EndTag {
name: "div".to_string(),
},
]
);
}
#[test]
fn uppercase_attribute_names_lowercased() {
let tokens = tokenize(r#"
"#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "div".to_string(),
attributes: vec![("class".to_string(), "x".to_string())],
self_closing: false,
}]
);
}
#[test]
fn duplicate_attributes_first_wins() {
let tokens = tokenize(r#"
"#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "div".to_string(),
attributes: vec![("class".to_string(), "a".to_string())],
self_closing: false,
}]
);
}
#[test]
fn char_ref_in_attribute() {
let tokens = tokenize(r#"
"#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "a".to_string(),
attributes: vec![("href".to_string(), "?a=1&b=2".to_string())],
self_closing: false,
}]
);
}
#[test]
fn multiple_attributes() {
let tokens = tokenize(r#""#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "input".to_string(),
attributes: vec![
("type".to_string(), "text".to_string()),
("name".to_string(), "foo".to_string()),
("value".to_string(), "bar".to_string()),
],
self_closing: false,
}]
);
}
#[test]
fn boolean_attribute() {
let tokens = tokenize("");
assert_eq!(
tokens,
vec![Token::StartTag {
name: "input".to_string(),
attributes: vec![("disabled".to_string(), "".to_string())],
self_closing: false,
}]
);
}
#[test]
fn mixed_content() {
let tokens = tokenize("Hello World");
assert_eq!(
tokens,
vec![
Token::Character("Hello ".to_string()),
Token::Comment(" comment ".to_string()),
Token::Character(" World".to_string()),
]
);
}
#[test]
fn doctype_with_public_id() {
let tokens = tokenize(
r#""#,
);
assert_eq!(
tokens,
vec![Token::Doctype {
name: Some("html".to_string()),
public_id: Some("-//W3C//DTD XHTML 1.0 Strict//EN".to_string()),
system_id: Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string()),
force_quirks: false,
}]
);
}
#[test]
fn null_in_text() {
let tokens = tokenize("a\0b");
assert_eq!(tokens, vec![Token::Character("a\u{FFFD}b".to_string())]);
}
#[test]
fn windows_1252_numeric_refs() {
// should map to Euro sign.
let tokens = tokenize("");
assert_eq!(tokens, vec![Token::Character("\u{20AC}".to_string())]);
}
#[test]
fn attribute_with_empty_value() {
let tokens = tokenize(r#""#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "div".to_string(),
attributes: vec![("class".to_string(), "".to_string())],
self_closing: false,
}]
);
}
#[test]
fn adjacent_tags() {
let tokens = tokenize("
");
assert_eq!(
tokens,
vec![
Token::StartTag {
name: "b".to_string(),
attributes: vec![],
self_closing: false,
},
Token::EndTag {
name: "b".to_string(),
},
Token::StartTag {
name: "i".to_string(),
attributes: vec![],
self_closing: false,
},
Token::EndTag {
name: "i".to_string(),
},
]
);
}
#[test]
fn newlines_in_text() {
let tokens = tokenize("line1\nline2\nline3");
assert_eq!(
tokens,
vec![Token::Character("line1\nline2\nline3".to_string())]
);
}
#[test]
fn self_closing_with_attribute() {
let tokens = tokenize(r#"

"#);
assert_eq!(
tokens,
vec![Token::StartTag {
name: "img".to_string(),
attributes: vec![("src".to_string(), "test.png".to_string())],
self_closing: true,
}]
);
}
#[test]
fn less_than_in_text_not_tag() {
// A bare '<' not followed by a letter should be emitted as text.
let tokens = tokenize("1 < 2");
assert_eq!(tokens, vec![Token::Character("1 < 2".to_string())]);
}
#[test]
fn ampersand_not_entity() {
let tokens = tokenize("a & b");
assert_eq!(tokens, vec![Token::Character("a & b".to_string())]);
}
#[test]
fn cdata_in_html_becomes_comment() {
let tokens = tokenize("");
// In HTML (non-foreign) context, CDATA is a parse error โ bogus comment.
assert_eq!(tokens, vec![Token::Comment("[CDATA[hello]]".to_string())]);
}
}