web engine - experimental web browser
at poly1305-h4-fix 981 lines 27 kB view raw
1//! CSS tokenizer per CSS Syntax Module Level 3 §4. 2//! 3//! Consumes a stream of code points and produces CSS tokens. 4 5/// A CSS token produced by the tokenizer. 6#[derive(Debug, Clone, PartialEq)] 7pub enum Token { 8 Ident(String), 9 Function(String), 10 AtKeyword(String), 11 Hash(String, HashType), 12 String(String), 13 BadString, 14 Url(String), 15 BadUrl, 16 Number(f64, NumericType), 17 Percentage(f64), 18 Dimension(f64, NumericType, String), 19 Whitespace, 20 Colon, 21 Semicolon, 22 Comma, 23 LeftBracket, 24 RightBracket, 25 LeftParen, 26 RightParen, 27 LeftBrace, 28 RightBrace, 29 Delim(char), 30 Cdo, 31 Cdc, 32 Eof, 33} 34 35/// Whether a `<hash-token>` is "id" (valid identifier) or "unrestricted". 36#[derive(Debug, Clone, Copy, PartialEq, Eq)] 37pub enum HashType { 38 Id, 39 Unrestricted, 40} 41 42/// Whether a number is integer or number (float). 43#[derive(Debug, Clone, Copy, PartialEq, Eq)] 44pub enum NumericType { 45 Integer, 46 Number, 47} 48 49/// CSS tokenizer state machine. 50pub struct Tokenizer { 51 input: Vec<char>, 52 pos: usize, 53} 54 55impl Tokenizer { 56 pub fn new(input: &str) -> Self { 57 // Preprocessing: replace \r\n, \r, \f with \n (§3.3) 58 let mut chars = Vec::with_capacity(input.len()); 59 let raw: Vec<char> = input.chars().collect(); 60 let mut i = 0; 61 while i < raw.len() { 62 match raw[i] { 63 '\r' => { 64 chars.push('\n'); 65 if i + 1 < raw.len() && raw[i + 1] == '\n' { 66 i += 1; 67 } 68 } 69 '\x0C' => chars.push('\n'), 70 '\0' => chars.push('\u{FFFD}'), 71 c => chars.push(c), 72 } 73 i += 1; 74 } 75 Self { 76 input: chars, 77 pos: 0, 78 } 79 } 80 81 /// Tokenize the entire input into a list of tokens (excluding EOF). 82 pub fn tokenize(input: &str) -> Vec<Token> { 83 let mut tokenizer = Self::new(input); 84 let mut tokens = Vec::new(); 85 loop { 86 let token = tokenizer.next_token(); 87 if token == Token::Eof { 88 break; 89 } 90 tokens.push(token); 91 } 92 tokens 93 } 94 95 /// Consume and return the next token. 96 pub fn next_token(&mut self) -> Token { 97 self.consume_comments(); 98 self.consume_token() 99 } 100 101 fn peek(&self) -> char { 102 self.input.get(self.pos).copied().unwrap_or('\0') 103 } 104 105 fn peek_at(&self, offset: usize) -> char { 106 self.input.get(self.pos + offset).copied().unwrap_or('\0') 107 } 108 109 fn advance(&mut self) -> char { 110 let c = self.peek(); 111 if self.pos < self.input.len() { 112 self.pos += 1; 113 } 114 c 115 } 116 117 fn is_eof(&self) -> bool { 118 self.pos >= self.input.len() 119 } 120 121 fn consume_comments(&mut self) { 122 while self.peek() == '/' && self.peek_at(1) == '*' { 123 self.pos += 2; 124 loop { 125 if self.is_eof() { 126 return; 127 } 128 if self.peek() == '*' && self.peek_at(1) == '/' { 129 self.pos += 2; 130 break; 131 } 132 self.pos += 1; 133 } 134 } 135 } 136 137 fn consume_token(&mut self) -> Token { 138 if self.is_eof() { 139 return Token::Eof; 140 } 141 142 let c = self.peek(); 143 144 // Whitespace 145 if is_whitespace(c) { 146 self.consume_whitespace(); 147 return Token::Whitespace; 148 } 149 150 // String 151 if c == '"' || c == '\'' { 152 return self.consume_string(c); 153 } 154 155 // Hash 156 if c == '#' { 157 self.advance(); 158 if is_name_char(self.peek()) || self.starts_valid_escape() { 159 let hash_type = if self.would_start_ident() { 160 HashType::Id 161 } else { 162 HashType::Unrestricted 163 }; 164 let name = self.consume_name(); 165 return Token::Hash(name, hash_type); 166 } 167 return Token::Delim('#'); 168 } 169 170 // Left paren 171 if c == '(' { 172 self.advance(); 173 return Token::LeftParen; 174 } 175 176 // Right paren 177 if c == ')' { 178 self.advance(); 179 return Token::RightParen; 180 } 181 182 // Plus sign 183 if c == '+' { 184 if self.starts_number() { 185 return self.consume_numeric(); 186 } 187 self.advance(); 188 return Token::Delim('+'); 189 } 190 191 // Comma 192 if c == ',' { 193 self.advance(); 194 return Token::Comma; 195 } 196 197 // Hyphen-minus 198 if c == '-' { 199 if self.starts_number() { 200 return self.consume_numeric(); 201 } 202 if self.peek_at(1) == '-' && self.peek_at(2) == '>' { 203 self.pos += 3; 204 return Token::Cdc; 205 } 206 if self.would_start_ident() { 207 return self.consume_ident_like(); 208 } 209 self.advance(); 210 return Token::Delim('-'); 211 } 212 213 // Full stop 214 if c == '.' { 215 if self.starts_number() { 216 return self.consume_numeric(); 217 } 218 self.advance(); 219 return Token::Delim('.'); 220 } 221 222 // Colon 223 if c == ':' { 224 self.advance(); 225 return Token::Colon; 226 } 227 228 // Semicolon 229 if c == ';' { 230 self.advance(); 231 return Token::Semicolon; 232 } 233 234 // Less-than sign 235 if c == '<' { 236 if self.peek_at(1) == '!' && self.peek_at(2) == '-' && self.peek_at(3) == '-' { 237 self.pos += 4; 238 return Token::Cdo; 239 } 240 self.advance(); 241 return Token::Delim('<'); 242 } 243 244 // At sign 245 if c == '@' { 246 self.advance(); 247 if self.would_start_ident() { 248 let name = self.consume_name(); 249 return Token::AtKeyword(name); 250 } 251 return Token::Delim('@'); 252 } 253 254 // Left bracket 255 if c == '[' { 256 self.advance(); 257 return Token::LeftBracket; 258 } 259 260 // Backslash 261 if c == '\\' { 262 if self.starts_valid_escape() { 263 return self.consume_ident_like(); 264 } 265 self.advance(); 266 return Token::Delim('\\'); 267 } 268 269 // Right bracket 270 if c == ']' { 271 self.advance(); 272 return Token::RightBracket; 273 } 274 275 // Left brace 276 if c == '{' { 277 self.advance(); 278 return Token::LeftBrace; 279 } 280 281 // Right brace 282 if c == '}' { 283 self.advance(); 284 return Token::RightBrace; 285 } 286 287 // Digit 288 if c.is_ascii_digit() { 289 return self.consume_numeric(); 290 } 291 292 // Name start 293 if is_name_start_char(c) { 294 return self.consume_ident_like(); 295 } 296 297 // Anything else 298 self.advance(); 299 Token::Delim(c) 300 } 301 302 fn consume_whitespace(&mut self) { 303 while !self.is_eof() && is_whitespace(self.peek()) { 304 self.advance(); 305 } 306 } 307 308 fn consume_string(&mut self, ending: char) -> Token { 309 self.advance(); // consume opening quote 310 let mut value = String::new(); 311 loop { 312 if self.is_eof() { 313 return Token::String(value); 314 } 315 let c = self.advance(); 316 match c { 317 c if c == ending => return Token::String(value), 318 '\n' => { 319 // Unescaped newline in string → bad string 320 self.pos -= 1; // reconsume 321 return Token::BadString; 322 } 323 '\\' => { 324 if self.is_eof() { 325 // Backslash at end of input: do nothing 326 } else if self.peek() == '\n' { 327 self.advance(); // consume newline (line continuation) 328 } else { 329 value.push(self.consume_escaped_char()); 330 } 331 } 332 _ => value.push(c), 333 } 334 } 335 } 336 337 fn consume_escaped_char(&mut self) -> char { 338 if self.is_eof() { 339 return '\u{FFFD}'; 340 } 341 let c = self.advance(); 342 if c.is_ascii_hexdigit() { 343 let mut hex = String::new(); 344 hex.push(c); 345 for _ in 0..5 { 346 if !self.is_eof() && self.peek().is_ascii_hexdigit() { 347 hex.push(self.advance()); 348 } else { 349 break; 350 } 351 } 352 // Consume optional trailing whitespace 353 if !self.is_eof() && is_whitespace(self.peek()) { 354 self.advance(); 355 } 356 let code_point = u32::from_str_radix(&hex, 16).unwrap_or(0); 357 if code_point == 0 || code_point > 0x10FFFF || (0xD800..=0xDFFF).contains(&code_point) { 358 '\u{FFFD}' 359 } else { 360 char::from_u32(code_point).unwrap_or('\u{FFFD}') 361 } 362 } else { 363 c 364 } 365 } 366 367 fn starts_valid_escape(&self) -> bool { 368 self.peek() == '\\' && self.peek_at(1) != '\n' 369 } 370 371 fn starts_valid_escape_at(&self, offset: usize) -> bool { 372 self.peek_at(offset) == '\\' && self.peek_at(offset + 1) != '\n' 373 } 374 375 /// Check if the next chars would start an identifier (§4.3.9). 376 fn would_start_ident(&self) -> bool { 377 self.would_start_ident_at(0) 378 } 379 380 fn would_start_ident_at(&self, offset: usize) -> bool { 381 let c = self.peek_at(offset); 382 if is_name_start_char(c) { 383 return true; 384 } 385 if c == '-' { 386 let next = self.peek_at(offset + 1); 387 return is_name_start_char(next) 388 || next == '-' 389 || self.starts_valid_escape_at(offset + 1); 390 } 391 if c == '\\' { 392 return self.starts_valid_escape_at(offset); 393 } 394 false 395 } 396 397 /// Check if the next chars would start a number (§4.3.10). 398 fn starts_number(&self) -> bool { 399 let c = self.peek(); 400 if c == '+' || c == '-' { 401 let next = self.peek_at(1); 402 if next.is_ascii_digit() { 403 return true; 404 } 405 if next == '.' && self.peek_at(2).is_ascii_digit() { 406 return true; 407 } 408 return false; 409 } 410 if c == '.' { 411 return self.peek_at(1).is_ascii_digit(); 412 } 413 c.is_ascii_digit() 414 } 415 416 fn consume_name(&mut self) -> String { 417 let mut name = String::new(); 418 loop { 419 if self.is_eof() { 420 break; 421 } 422 let c = self.peek(); 423 if is_name_char(c) { 424 name.push(c); 425 self.advance(); 426 } else if self.starts_valid_escape() { 427 self.advance(); // consume backslash 428 name.push(self.consume_escaped_char()); 429 } else { 430 break; 431 } 432 } 433 name 434 } 435 436 fn consume_numeric(&mut self) -> Token { 437 let (value, num_type) = self.consume_number(); 438 439 if self.would_start_ident() { 440 let unit = self.consume_name(); 441 return Token::Dimension(value, num_type, unit); 442 } 443 444 if self.peek() == '%' { 445 self.advance(); 446 return Token::Percentage(value); 447 } 448 449 Token::Number(value, num_type) 450 } 451 452 fn consume_number(&mut self) -> (f64, NumericType) { 453 let mut repr = String::new(); 454 let mut num_type = NumericType::Integer; 455 456 // Sign 457 if self.peek() == '+' || self.peek() == '-' { 458 repr.push(self.advance()); 459 } 460 461 // Integer part 462 while !self.is_eof() && self.peek().is_ascii_digit() { 463 repr.push(self.advance()); 464 } 465 466 // Fractional part 467 if self.peek() == '.' && self.peek_at(1).is_ascii_digit() { 468 repr.push(self.advance()); // '.' 469 num_type = NumericType::Number; 470 while !self.is_eof() && self.peek().is_ascii_digit() { 471 repr.push(self.advance()); 472 } 473 } 474 475 // Exponent 476 if self.peek() == 'e' || self.peek() == 'E' { 477 let next = self.peek_at(1); 478 if next.is_ascii_digit() 479 || ((next == '+' || next == '-') && self.peek_at(2).is_ascii_digit()) 480 { 481 repr.push(self.advance()); // 'e'/'E' 482 num_type = NumericType::Number; 483 if self.peek() == '+' || self.peek() == '-' { 484 repr.push(self.advance()); 485 } 486 while !self.is_eof() && self.peek().is_ascii_digit() { 487 repr.push(self.advance()); 488 } 489 } 490 } 491 492 let value = repr.parse::<f64>().unwrap_or(0.0); 493 (value, num_type) 494 } 495 496 fn consume_ident_like(&mut self) -> Token { 497 let name = self.consume_name(); 498 499 // Check for url( function 500 if name.eq_ignore_ascii_case("url") && self.peek() == '(' { 501 self.advance(); // consume '(' 502 // Skip whitespace 503 let saved = self.pos; 504 self.consume_whitespace(); 505 if self.peek() == '"' || self.peek() == '\'' { 506 // url("...") → treat as function token, parser handles the rest 507 self.pos = saved; 508 return Token::Function(name); 509 } 510 return self.consume_url(); 511 } 512 513 if self.peek() == '(' { 514 self.advance(); 515 return Token::Function(name); 516 } 517 518 Token::Ident(name) 519 } 520 521 fn consume_url(&mut self) -> Token { 522 let mut value = String::new(); 523 self.consume_whitespace(); 524 525 loop { 526 if self.is_eof() { 527 return Token::Url(value); 528 } 529 match self.peek() { 530 ')' => { 531 self.advance(); 532 return Token::Url(value); 533 } 534 c if is_whitespace(c) => { 535 self.consume_whitespace(); 536 if self.is_eof() || self.peek() == ')' { 537 if !self.is_eof() { 538 self.advance(); 539 } 540 return Token::Url(value); 541 } 542 self.consume_bad_url_remnants(); 543 return Token::BadUrl; 544 } 545 '"' | '\'' | '(' => { 546 self.consume_bad_url_remnants(); 547 return Token::BadUrl; 548 } 549 '\\' => { 550 if self.starts_valid_escape() { 551 self.advance(); 552 value.push(self.consume_escaped_char()); 553 } else { 554 self.consume_bad_url_remnants(); 555 return Token::BadUrl; 556 } 557 } 558 c if is_non_printable(c) => { 559 self.consume_bad_url_remnants(); 560 return Token::BadUrl; 561 } 562 _ => { 563 value.push(self.advance()); 564 } 565 } 566 } 567 } 568 569 fn consume_bad_url_remnants(&mut self) { 570 loop { 571 if self.is_eof() { 572 return; 573 } 574 let c = self.advance(); 575 if c == ')' { 576 return; 577 } 578 if self.peek_at(0) != '\n' && c == '\\' { 579 self.advance(); // consume escaped char 580 } 581 } 582 } 583} 584 585fn is_whitespace(c: char) -> bool { 586 matches!(c, ' ' | '\t' | '\n') 587} 588 589fn is_name_start_char(c: char) -> bool { 590 c.is_ascii_alphabetic() || !c.is_ascii() || c == '_' 591} 592 593fn is_name_char(c: char) -> bool { 594 is_name_start_char(c) || c.is_ascii_digit() || c == '-' 595} 596 597fn is_non_printable(c: char) -> bool { 598 matches!(c, '\x00'..='\x08' | '\x0B' | '\x0E'..='\x1F' | '\x7F') 599} 600 601#[cfg(test)] 602mod tests { 603 use super::*; 604 605 fn tokenize(input: &str) -> Vec<Token> { 606 Tokenizer::tokenize(input) 607 } 608 609 #[test] 610 fn test_empty() { 611 assert_eq!(tokenize(""), vec![]); 612 } 613 614 #[test] 615 fn test_whitespace() { 616 assert_eq!(tokenize(" \t\n "), vec![Token::Whitespace]); 617 } 618 619 #[test] 620 fn test_ident() { 621 assert_eq!(tokenize("color"), vec![Token::Ident("color".into())]); 622 assert_eq!(tokenize("div"), vec![Token::Ident("div".into())]); 623 assert_eq!(tokenize("--custom"), vec![Token::Ident("--custom".into())]); 624 assert_eq!(tokenize("_foo"), vec![Token::Ident("_foo".into())]); 625 assert_eq!( 626 tokenize("-webkit-foo"), 627 vec![Token::Ident("-webkit-foo".into())] 628 ); 629 } 630 631 #[test] 632 fn test_function() { 633 assert_eq!(tokenize("rgb("), vec![Token::Function("rgb".into())]); 634 let tokens = tokenize("rgb(255, 0, 0)"); 635 assert_eq!(tokens[0], Token::Function("rgb".into())); 636 assert_eq!(tokenize("calc("), vec![Token::Function("calc".into())]); 637 } 638 639 #[test] 640 fn test_at_keyword() { 641 assert_eq!(tokenize("@media"), vec![Token::AtKeyword("media".into())]); 642 assert_eq!(tokenize("@import"), vec![Token::AtKeyword("import".into())]); 643 } 644 645 #[test] 646 fn test_hash() { 647 assert_eq!( 648 tokenize("#id"), 649 vec![Token::Hash("id".into(), HashType::Id)] 650 ); 651 assert_eq!( 652 tokenize("#fff"), 653 vec![Token::Hash("fff".into(), HashType::Id)] 654 ); 655 assert_eq!( 656 tokenize("#123"), 657 vec![Token::Hash("123".into(), HashType::Unrestricted)] 658 ); 659 } 660 661 #[test] 662 fn test_string_double_quote() { 663 assert_eq!(tokenize(r#""hello""#), vec![Token::String("hello".into())]); 664 } 665 666 #[test] 667 fn test_string_single_quote() { 668 assert_eq!(tokenize("'world'"), vec![Token::String("world".into())]); 669 } 670 671 #[test] 672 fn test_string_escape() { 673 assert_eq!(tokenize(r#""he\6Co""#), vec![Token::String("helo".into())]); 674 } 675 676 #[test] 677 fn test_string_newline_escape() { 678 assert_eq!( 679 tokenize("\"line\\\ncontinued\""), 680 vec![Token::String("linecontinued".into())] 681 ); 682 } 683 684 #[test] 685 fn test_bad_string() { 686 let tokens = tokenize("\"unterminated\n"); 687 assert_eq!(tokens[0], Token::BadString); 688 } 689 690 #[test] 691 fn test_number_integer() { 692 assert_eq!( 693 tokenize("42"), 694 vec![Token::Number(42.0, NumericType::Integer)] 695 ); 696 } 697 698 #[test] 699 fn test_number_float() { 700 assert_eq!( 701 tokenize("3.14"), 702 vec![Token::Number(3.14, NumericType::Number)] 703 ); 704 } 705 706 #[test] 707 fn test_number_signed() { 708 assert_eq!( 709 tokenize("+10"), 710 vec![Token::Number(10.0, NumericType::Integer)] 711 ); 712 assert_eq!( 713 tokenize("-5"), 714 vec![Token::Number(-5.0, NumericType::Integer)] 715 ); 716 } 717 718 #[test] 719 fn test_number_exponent() { 720 assert_eq!( 721 tokenize("1e2"), 722 vec![Token::Number(100.0, NumericType::Number)] 723 ); 724 assert_eq!( 725 tokenize("2E+3"), 726 vec![Token::Number(2000.0, NumericType::Number)] 727 ); 728 } 729 730 #[test] 731 fn test_percentage() { 732 assert_eq!(tokenize("50%"), vec![Token::Percentage(50.0)]); 733 } 734 735 #[test] 736 fn test_dimension() { 737 assert_eq!( 738 tokenize("10px"), 739 vec![Token::Dimension(10.0, NumericType::Integer, "px".into())] 740 ); 741 assert_eq!( 742 tokenize("2em"), 743 vec![Token::Dimension(2.0, NumericType::Integer, "em".into())] 744 ); 745 assert_eq!( 746 tokenize("1.5rem"), 747 vec![Token::Dimension(1.5, NumericType::Number, "rem".into())] 748 ); 749 } 750 751 #[test] 752 fn test_delimiters() { 753 assert_eq!(tokenize(":"), vec![Token::Colon]); 754 assert_eq!(tokenize(";"), vec![Token::Semicolon]); 755 assert_eq!(tokenize(","), vec![Token::Comma]); 756 assert_eq!(tokenize("("), vec![Token::LeftParen]); 757 assert_eq!(tokenize(")"), vec![Token::RightParen]); 758 assert_eq!(tokenize("["), vec![Token::LeftBracket]); 759 assert_eq!(tokenize("]"), vec![Token::RightBracket]); 760 assert_eq!(tokenize("{"), vec![Token::LeftBrace]); 761 assert_eq!(tokenize("}"), vec![Token::RightBrace]); 762 } 763 764 #[test] 765 fn test_delim_tokens() { 766 assert_eq!(tokenize("."), vec![Token::Delim('.')]); 767 assert_eq!(tokenize(">"), vec![Token::Delim('>')]); 768 assert_eq!(tokenize("+"), vec![Token::Delim('+')]); 769 assert_eq!(tokenize("~"), vec![Token::Delim('~')]); 770 assert_eq!(tokenize("*"), vec![Token::Delim('*')]); 771 } 772 773 #[test] 774 fn test_cdo_cdc() { 775 assert_eq!(tokenize("<!--"), vec![Token::Cdo]); 776 assert_eq!(tokenize("-->"), vec![Token::Cdc]); 777 } 778 779 #[test] 780 fn test_comments() { 781 assert_eq!( 782 tokenize("/* comment */color"), 783 vec![Token::Ident("color".into())] 784 ); 785 assert_eq!( 786 tokenize("a/* x */b"), 787 vec![Token::Ident("a".into()), Token::Ident("b".into())] 788 ); 789 } 790 791 #[test] 792 fn test_unclosed_comment() { 793 assert_eq!(tokenize("/* unclosed"), vec![]); 794 } 795 796 #[test] 797 fn test_url_token() { 798 assert_eq!( 799 tokenize("url(https://example.com)"), 800 vec![Token::Url("https://example.com".into())] 801 ); 802 } 803 804 #[test] 805 fn test_url_with_whitespace() { 806 assert_eq!( 807 tokenize("url( foo.png )"), 808 vec![Token::Url("foo.png".into())] 809 ); 810 } 811 812 #[test] 813 fn test_url_function_with_quotes() { 814 let tokens = tokenize("url(\"foo.png\")"); 815 assert_eq!(tokens[0], Token::Function("url".into())); 816 } 817 818 #[test] 819 fn test_bad_url() { 820 let tokens = tokenize("url(foo bar)"); 821 assert_eq!(tokens[0], Token::BadUrl); 822 } 823 824 #[test] 825 fn test_escape_in_ident() { 826 assert_eq!(tokenize(r"c\6Flor"), vec![Token::Ident("color".into())]); 827 } 828 829 #[test] 830 fn test_css_rule() { 831 let tokens = tokenize("div { color: red; }"); 832 assert_eq!( 833 tokens, 834 vec![ 835 Token::Ident("div".into()), 836 Token::Whitespace, 837 Token::LeftBrace, 838 Token::Whitespace, 839 Token::Ident("color".into()), 840 Token::Colon, 841 Token::Whitespace, 842 Token::Ident("red".into()), 843 Token::Semicolon, 844 Token::Whitespace, 845 Token::RightBrace, 846 ] 847 ); 848 } 849 850 #[test] 851 fn test_selector_with_class() { 852 let tokens = tokenize("div.foo"); 853 assert_eq!( 854 tokens, 855 vec![ 856 Token::Ident("div".into()), 857 Token::Delim('.'), 858 Token::Ident("foo".into()), 859 ] 860 ); 861 } 862 863 #[test] 864 fn test_selector_with_id() { 865 let tokens = tokenize("#main"); 866 assert_eq!(tokens, vec![Token::Hash("main".into(), HashType::Id)]); 867 } 868 869 #[test] 870 fn test_dimension_with_float() { 871 assert_eq!( 872 tokenize("0.5em"), 873 vec![Token::Dimension(0.5, NumericType::Number, "em".into())] 874 ); 875 } 876 877 #[test] 878 fn test_multiple_numbers() { 879 let tokens = tokenize("10px 20px"); 880 assert_eq!( 881 tokens, 882 vec![ 883 Token::Dimension(10.0, NumericType::Integer, "px".into()), 884 Token::Whitespace, 885 Token::Dimension(20.0, NumericType::Integer, "px".into()), 886 ] 887 ); 888 } 889 890 #[test] 891 fn test_at_rule() { 892 let tokens = tokenize("@media screen"); 893 assert_eq!( 894 tokens, 895 vec![ 896 Token::AtKeyword("media".into()), 897 Token::Whitespace, 898 Token::Ident("screen".into()), 899 ] 900 ); 901 } 902 903 #[test] 904 fn test_function_with_args() { 905 let tokens = tokenize("calc(100% - 20px)"); 906 assert_eq!( 907 tokens, 908 vec![ 909 Token::Function("calc".into()), 910 Token::Percentage(100.0), 911 Token::Whitespace, 912 Token::Delim('-'), 913 Token::Whitespace, 914 Token::Dimension(20.0, NumericType::Integer, "px".into()), 915 Token::RightParen, 916 ] 917 ); 918 } 919 920 #[test] 921 fn test_color_hex() { 922 let tokens = tokenize("#ff0000"); 923 assert_eq!(tokens, vec![Token::Hash("ff0000".into(), HashType::Id)]); 924 } 925 926 #[test] 927 fn test_negative_dimension() { 928 assert_eq!( 929 tokenize("-10px"), 930 vec![Token::Dimension(-10.0, NumericType::Integer, "px".into())] 931 ); 932 } 933 934 #[test] 935 fn test_unicode_ident() { 936 assert_eq!(tokenize("côté"), vec![Token::Ident("côté".into())]); 937 } 938 939 #[test] 940 fn test_null_replacement() { 941 let tokens = tokenize("a\0b"); 942 assert_eq!(tokens, vec![Token::Ident("a\u{FFFD}b".into())]); 943 } 944 945 #[test] 946 fn test_crlf_normalization() { 947 let tokens = tokenize("a\r\nb"); 948 assert_eq!( 949 tokens, 950 vec![ 951 Token::Ident("a".into()), 952 Token::Whitespace, 953 Token::Ident("b".into()), 954 ] 955 ); 956 } 957 958 #[test] 959 fn test_escape_hex_with_trailing_space() { 960 // \41 followed by space should produce 'A' 961 assert_eq!(tokenize(r"\41 B"), vec![Token::Ident("AB".into())]); 962 } 963 964 #[test] 965 fn test_at_sign_alone() { 966 assert_eq!(tokenize("@"), vec![Token::Delim('@')]); 967 } 968 969 #[test] 970 fn test_hash_alone() { 971 // # followed by non-name char 972 assert_eq!(tokenize("# "), vec![Token::Delim('#'), Token::Whitespace]); 973 } 974 975 #[test] 976 fn test_nested_comments() { 977 // CSS comments don't nest, so "/* /* */" closes at first */ 978 let tokens = tokenize("/* /* */ a"); 979 assert_eq!(tokens, vec![Token::Whitespace, Token::Ident("a".into())]); 980 } 981}