web engine - experimental web browser
1//! CSS tokenizer per CSS Syntax Module Level 3 §4.
2//!
3//! Consumes a stream of code points and produces CSS tokens.
4
5/// A CSS token produced by the tokenizer.
6#[derive(Debug, Clone, PartialEq)]
7pub enum Token {
8 Ident(String),
9 Function(String),
10 AtKeyword(String),
11 Hash(String, HashType),
12 String(String),
13 BadString,
14 Url(String),
15 BadUrl,
16 Number(f64, NumericType),
17 Percentage(f64),
18 Dimension(f64, NumericType, String),
19 Whitespace,
20 Colon,
21 Semicolon,
22 Comma,
23 LeftBracket,
24 RightBracket,
25 LeftParen,
26 RightParen,
27 LeftBrace,
28 RightBrace,
29 Delim(char),
30 Cdo,
31 Cdc,
32 Eof,
33}
34
35/// Whether a `<hash-token>` is "id" (valid identifier) or "unrestricted".
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum HashType {
38 Id,
39 Unrestricted,
40}
41
42/// Whether a number is integer or number (float).
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum NumericType {
45 Integer,
46 Number,
47}
48
49/// CSS tokenizer state machine.
50pub struct Tokenizer {
51 input: Vec<char>,
52 pos: usize,
53}
54
55impl Tokenizer {
56 pub fn new(input: &str) -> Self {
57 // Preprocessing: replace \r\n, \r, \f with \n (§3.3)
58 let mut chars = Vec::with_capacity(input.len());
59 let raw: Vec<char> = input.chars().collect();
60 let mut i = 0;
61 while i < raw.len() {
62 match raw[i] {
63 '\r' => {
64 chars.push('\n');
65 if i + 1 < raw.len() && raw[i + 1] == '\n' {
66 i += 1;
67 }
68 }
69 '\x0C' => chars.push('\n'),
70 '\0' => chars.push('\u{FFFD}'),
71 c => chars.push(c),
72 }
73 i += 1;
74 }
75 Self {
76 input: chars,
77 pos: 0,
78 }
79 }
80
81 /// Tokenize the entire input into a list of tokens (excluding EOF).
82 pub fn tokenize(input: &str) -> Vec<Token> {
83 let mut tokenizer = Self::new(input);
84 let mut tokens = Vec::new();
85 loop {
86 let token = tokenizer.next_token();
87 if token == Token::Eof {
88 break;
89 }
90 tokens.push(token);
91 }
92 tokens
93 }
94
95 /// Consume and return the next token.
96 pub fn next_token(&mut self) -> Token {
97 self.consume_comments();
98 self.consume_token()
99 }
100
101 fn peek(&self) -> char {
102 self.input.get(self.pos).copied().unwrap_or('\0')
103 }
104
105 fn peek_at(&self, offset: usize) -> char {
106 self.input.get(self.pos + offset).copied().unwrap_or('\0')
107 }
108
109 fn advance(&mut self) -> char {
110 let c = self.peek();
111 if self.pos < self.input.len() {
112 self.pos += 1;
113 }
114 c
115 }
116
117 fn is_eof(&self) -> bool {
118 self.pos >= self.input.len()
119 }
120
121 fn consume_comments(&mut self) {
122 while self.peek() == '/' && self.peek_at(1) == '*' {
123 self.pos += 2;
124 loop {
125 if self.is_eof() {
126 return;
127 }
128 if self.peek() == '*' && self.peek_at(1) == '/' {
129 self.pos += 2;
130 break;
131 }
132 self.pos += 1;
133 }
134 }
135 }
136
137 fn consume_token(&mut self) -> Token {
138 if self.is_eof() {
139 return Token::Eof;
140 }
141
142 let c = self.peek();
143
144 // Whitespace
145 if is_whitespace(c) {
146 self.consume_whitespace();
147 return Token::Whitespace;
148 }
149
150 // String
151 if c == '"' || c == '\'' {
152 return self.consume_string(c);
153 }
154
155 // Hash
156 if c == '#' {
157 self.advance();
158 if is_name_char(self.peek()) || self.starts_valid_escape() {
159 let hash_type = if self.would_start_ident() {
160 HashType::Id
161 } else {
162 HashType::Unrestricted
163 };
164 let name = self.consume_name();
165 return Token::Hash(name, hash_type);
166 }
167 return Token::Delim('#');
168 }
169
170 // Left paren
171 if c == '(' {
172 self.advance();
173 return Token::LeftParen;
174 }
175
176 // Right paren
177 if c == ')' {
178 self.advance();
179 return Token::RightParen;
180 }
181
182 // Plus sign
183 if c == '+' {
184 if self.starts_number() {
185 return self.consume_numeric();
186 }
187 self.advance();
188 return Token::Delim('+');
189 }
190
191 // Comma
192 if c == ',' {
193 self.advance();
194 return Token::Comma;
195 }
196
197 // Hyphen-minus
198 if c == '-' {
199 if self.starts_number() {
200 return self.consume_numeric();
201 }
202 if self.peek_at(1) == '-' && self.peek_at(2) == '>' {
203 self.pos += 3;
204 return Token::Cdc;
205 }
206 if self.would_start_ident() {
207 return self.consume_ident_like();
208 }
209 self.advance();
210 return Token::Delim('-');
211 }
212
213 // Full stop
214 if c == '.' {
215 if self.starts_number() {
216 return self.consume_numeric();
217 }
218 self.advance();
219 return Token::Delim('.');
220 }
221
222 // Colon
223 if c == ':' {
224 self.advance();
225 return Token::Colon;
226 }
227
228 // Semicolon
229 if c == ';' {
230 self.advance();
231 return Token::Semicolon;
232 }
233
234 // Less-than sign
235 if c == '<' {
236 if self.peek_at(1) == '!' && self.peek_at(2) == '-' && self.peek_at(3) == '-' {
237 self.pos += 4;
238 return Token::Cdo;
239 }
240 self.advance();
241 return Token::Delim('<');
242 }
243
244 // At sign
245 if c == '@' {
246 self.advance();
247 if self.would_start_ident() {
248 let name = self.consume_name();
249 return Token::AtKeyword(name);
250 }
251 return Token::Delim('@');
252 }
253
254 // Left bracket
255 if c == '[' {
256 self.advance();
257 return Token::LeftBracket;
258 }
259
260 // Backslash
261 if c == '\\' {
262 if self.starts_valid_escape() {
263 return self.consume_ident_like();
264 }
265 self.advance();
266 return Token::Delim('\\');
267 }
268
269 // Right bracket
270 if c == ']' {
271 self.advance();
272 return Token::RightBracket;
273 }
274
275 // Left brace
276 if c == '{' {
277 self.advance();
278 return Token::LeftBrace;
279 }
280
281 // Right brace
282 if c == '}' {
283 self.advance();
284 return Token::RightBrace;
285 }
286
287 // Digit
288 if c.is_ascii_digit() {
289 return self.consume_numeric();
290 }
291
292 // Name start
293 if is_name_start_char(c) {
294 return self.consume_ident_like();
295 }
296
297 // Anything else
298 self.advance();
299 Token::Delim(c)
300 }
301
302 fn consume_whitespace(&mut self) {
303 while !self.is_eof() && is_whitespace(self.peek()) {
304 self.advance();
305 }
306 }
307
308 fn consume_string(&mut self, ending: char) -> Token {
309 self.advance(); // consume opening quote
310 let mut value = String::new();
311 loop {
312 if self.is_eof() {
313 return Token::String(value);
314 }
315 let c = self.advance();
316 match c {
317 c if c == ending => return Token::String(value),
318 '\n' => {
319 // Unescaped newline in string → bad string
320 self.pos -= 1; // reconsume
321 return Token::BadString;
322 }
323 '\\' => {
324 if self.is_eof() {
325 // Backslash at end of input: do nothing
326 } else if self.peek() == '\n' {
327 self.advance(); // consume newline (line continuation)
328 } else {
329 value.push(self.consume_escaped_char());
330 }
331 }
332 _ => value.push(c),
333 }
334 }
335 }
336
337 fn consume_escaped_char(&mut self) -> char {
338 if self.is_eof() {
339 return '\u{FFFD}';
340 }
341 let c = self.advance();
342 if c.is_ascii_hexdigit() {
343 let mut hex = String::new();
344 hex.push(c);
345 for _ in 0..5 {
346 if !self.is_eof() && self.peek().is_ascii_hexdigit() {
347 hex.push(self.advance());
348 } else {
349 break;
350 }
351 }
352 // Consume optional trailing whitespace
353 if !self.is_eof() && is_whitespace(self.peek()) {
354 self.advance();
355 }
356 let code_point = u32::from_str_radix(&hex, 16).unwrap_or(0);
357 if code_point == 0 || code_point > 0x10FFFF || (0xD800..=0xDFFF).contains(&code_point) {
358 '\u{FFFD}'
359 } else {
360 char::from_u32(code_point).unwrap_or('\u{FFFD}')
361 }
362 } else {
363 c
364 }
365 }
366
367 fn starts_valid_escape(&self) -> bool {
368 self.peek() == '\\' && self.peek_at(1) != '\n'
369 }
370
371 fn starts_valid_escape_at(&self, offset: usize) -> bool {
372 self.peek_at(offset) == '\\' && self.peek_at(offset + 1) != '\n'
373 }
374
375 /// Check if the next chars would start an identifier (§4.3.9).
376 fn would_start_ident(&self) -> bool {
377 self.would_start_ident_at(0)
378 }
379
380 fn would_start_ident_at(&self, offset: usize) -> bool {
381 let c = self.peek_at(offset);
382 if is_name_start_char(c) {
383 return true;
384 }
385 if c == '-' {
386 let next = self.peek_at(offset + 1);
387 return is_name_start_char(next)
388 || next == '-'
389 || self.starts_valid_escape_at(offset + 1);
390 }
391 if c == '\\' {
392 return self.starts_valid_escape_at(offset);
393 }
394 false
395 }
396
397 /// Check if the next chars would start a number (§4.3.10).
398 fn starts_number(&self) -> bool {
399 let c = self.peek();
400 if c == '+' || c == '-' {
401 let next = self.peek_at(1);
402 if next.is_ascii_digit() {
403 return true;
404 }
405 if next == '.' && self.peek_at(2).is_ascii_digit() {
406 return true;
407 }
408 return false;
409 }
410 if c == '.' {
411 return self.peek_at(1).is_ascii_digit();
412 }
413 c.is_ascii_digit()
414 }
415
416 fn consume_name(&mut self) -> String {
417 let mut name = String::new();
418 loop {
419 if self.is_eof() {
420 break;
421 }
422 let c = self.peek();
423 if is_name_char(c) {
424 name.push(c);
425 self.advance();
426 } else if self.starts_valid_escape() {
427 self.advance(); // consume backslash
428 name.push(self.consume_escaped_char());
429 } else {
430 break;
431 }
432 }
433 name
434 }
435
436 fn consume_numeric(&mut self) -> Token {
437 let (value, num_type) = self.consume_number();
438
439 if self.would_start_ident() {
440 let unit = self.consume_name();
441 return Token::Dimension(value, num_type, unit);
442 }
443
444 if self.peek() == '%' {
445 self.advance();
446 return Token::Percentage(value);
447 }
448
449 Token::Number(value, num_type)
450 }
451
452 fn consume_number(&mut self) -> (f64, NumericType) {
453 let mut repr = String::new();
454 let mut num_type = NumericType::Integer;
455
456 // Sign
457 if self.peek() == '+' || self.peek() == '-' {
458 repr.push(self.advance());
459 }
460
461 // Integer part
462 while !self.is_eof() && self.peek().is_ascii_digit() {
463 repr.push(self.advance());
464 }
465
466 // Fractional part
467 if self.peek() == '.' && self.peek_at(1).is_ascii_digit() {
468 repr.push(self.advance()); // '.'
469 num_type = NumericType::Number;
470 while !self.is_eof() && self.peek().is_ascii_digit() {
471 repr.push(self.advance());
472 }
473 }
474
475 // Exponent
476 if self.peek() == 'e' || self.peek() == 'E' {
477 let next = self.peek_at(1);
478 if next.is_ascii_digit()
479 || ((next == '+' || next == '-') && self.peek_at(2).is_ascii_digit())
480 {
481 repr.push(self.advance()); // 'e'/'E'
482 num_type = NumericType::Number;
483 if self.peek() == '+' || self.peek() == '-' {
484 repr.push(self.advance());
485 }
486 while !self.is_eof() && self.peek().is_ascii_digit() {
487 repr.push(self.advance());
488 }
489 }
490 }
491
492 let value = repr.parse::<f64>().unwrap_or(0.0);
493 (value, num_type)
494 }
495
496 fn consume_ident_like(&mut self) -> Token {
497 let name = self.consume_name();
498
499 // Check for url( function
500 if name.eq_ignore_ascii_case("url") && self.peek() == '(' {
501 self.advance(); // consume '('
502 // Skip whitespace
503 let saved = self.pos;
504 self.consume_whitespace();
505 if self.peek() == '"' || self.peek() == '\'' {
506 // url("...") → treat as function token, parser handles the rest
507 self.pos = saved;
508 return Token::Function(name);
509 }
510 return self.consume_url();
511 }
512
513 if self.peek() == '(' {
514 self.advance();
515 return Token::Function(name);
516 }
517
518 Token::Ident(name)
519 }
520
521 fn consume_url(&mut self) -> Token {
522 let mut value = String::new();
523 self.consume_whitespace();
524
525 loop {
526 if self.is_eof() {
527 return Token::Url(value);
528 }
529 match self.peek() {
530 ')' => {
531 self.advance();
532 return Token::Url(value);
533 }
534 c if is_whitespace(c) => {
535 self.consume_whitespace();
536 if self.is_eof() || self.peek() == ')' {
537 if !self.is_eof() {
538 self.advance();
539 }
540 return Token::Url(value);
541 }
542 self.consume_bad_url_remnants();
543 return Token::BadUrl;
544 }
545 '"' | '\'' | '(' => {
546 self.consume_bad_url_remnants();
547 return Token::BadUrl;
548 }
549 '\\' => {
550 if self.starts_valid_escape() {
551 self.advance();
552 value.push(self.consume_escaped_char());
553 } else {
554 self.consume_bad_url_remnants();
555 return Token::BadUrl;
556 }
557 }
558 c if is_non_printable(c) => {
559 self.consume_bad_url_remnants();
560 return Token::BadUrl;
561 }
562 _ => {
563 value.push(self.advance());
564 }
565 }
566 }
567 }
568
569 fn consume_bad_url_remnants(&mut self) {
570 loop {
571 if self.is_eof() {
572 return;
573 }
574 let c = self.advance();
575 if c == ')' {
576 return;
577 }
578 if self.peek_at(0) != '\n' && c == '\\' {
579 self.advance(); // consume escaped char
580 }
581 }
582 }
583}
584
585fn is_whitespace(c: char) -> bool {
586 matches!(c, ' ' | '\t' | '\n')
587}
588
589fn is_name_start_char(c: char) -> bool {
590 c.is_ascii_alphabetic() || !c.is_ascii() || c == '_'
591}
592
593fn is_name_char(c: char) -> bool {
594 is_name_start_char(c) || c.is_ascii_digit() || c == '-'
595}
596
597fn is_non_printable(c: char) -> bool {
598 matches!(c, '\x00'..='\x08' | '\x0B' | '\x0E'..='\x1F' | '\x7F')
599}
600
601#[cfg(test)]
602mod tests {
603 use super::*;
604
605 fn tokenize(input: &str) -> Vec<Token> {
606 Tokenizer::tokenize(input)
607 }
608
609 #[test]
610 fn test_empty() {
611 assert_eq!(tokenize(""), vec![]);
612 }
613
614 #[test]
615 fn test_whitespace() {
616 assert_eq!(tokenize(" \t\n "), vec![Token::Whitespace]);
617 }
618
619 #[test]
620 fn test_ident() {
621 assert_eq!(tokenize("color"), vec![Token::Ident("color".into())]);
622 assert_eq!(tokenize("div"), vec![Token::Ident("div".into())]);
623 assert_eq!(tokenize("--custom"), vec![Token::Ident("--custom".into())]);
624 assert_eq!(tokenize("_foo"), vec![Token::Ident("_foo".into())]);
625 assert_eq!(
626 tokenize("-webkit-foo"),
627 vec![Token::Ident("-webkit-foo".into())]
628 );
629 }
630
631 #[test]
632 fn test_function() {
633 assert_eq!(tokenize("rgb("), vec![Token::Function("rgb".into())]);
634 let tokens = tokenize("rgb(255, 0, 0)");
635 assert_eq!(tokens[0], Token::Function("rgb".into()));
636 assert_eq!(tokenize("calc("), vec![Token::Function("calc".into())]);
637 }
638
639 #[test]
640 fn test_at_keyword() {
641 assert_eq!(tokenize("@media"), vec![Token::AtKeyword("media".into())]);
642 assert_eq!(tokenize("@import"), vec![Token::AtKeyword("import".into())]);
643 }
644
645 #[test]
646 fn test_hash() {
647 assert_eq!(
648 tokenize("#id"),
649 vec![Token::Hash("id".into(), HashType::Id)]
650 );
651 assert_eq!(
652 tokenize("#fff"),
653 vec![Token::Hash("fff".into(), HashType::Id)]
654 );
655 assert_eq!(
656 tokenize("#123"),
657 vec![Token::Hash("123".into(), HashType::Unrestricted)]
658 );
659 }
660
661 #[test]
662 fn test_string_double_quote() {
663 assert_eq!(tokenize(r#""hello""#), vec![Token::String("hello".into())]);
664 }
665
666 #[test]
667 fn test_string_single_quote() {
668 assert_eq!(tokenize("'world'"), vec![Token::String("world".into())]);
669 }
670
671 #[test]
672 fn test_string_escape() {
673 assert_eq!(tokenize(r#""he\6Co""#), vec![Token::String("helo".into())]);
674 }
675
676 #[test]
677 fn test_string_newline_escape() {
678 assert_eq!(
679 tokenize("\"line\\\ncontinued\""),
680 vec![Token::String("linecontinued".into())]
681 );
682 }
683
684 #[test]
685 fn test_bad_string() {
686 let tokens = tokenize("\"unterminated\n");
687 assert_eq!(tokens[0], Token::BadString);
688 }
689
690 #[test]
691 fn test_number_integer() {
692 assert_eq!(
693 tokenize("42"),
694 vec![Token::Number(42.0, NumericType::Integer)]
695 );
696 }
697
698 #[test]
699 fn test_number_float() {
700 assert_eq!(
701 tokenize("3.14"),
702 vec![Token::Number(3.14, NumericType::Number)]
703 );
704 }
705
706 #[test]
707 fn test_number_signed() {
708 assert_eq!(
709 tokenize("+10"),
710 vec![Token::Number(10.0, NumericType::Integer)]
711 );
712 assert_eq!(
713 tokenize("-5"),
714 vec![Token::Number(-5.0, NumericType::Integer)]
715 );
716 }
717
718 #[test]
719 fn test_number_exponent() {
720 assert_eq!(
721 tokenize("1e2"),
722 vec![Token::Number(100.0, NumericType::Number)]
723 );
724 assert_eq!(
725 tokenize("2E+3"),
726 vec![Token::Number(2000.0, NumericType::Number)]
727 );
728 }
729
730 #[test]
731 fn test_percentage() {
732 assert_eq!(tokenize("50%"), vec![Token::Percentage(50.0)]);
733 }
734
735 #[test]
736 fn test_dimension() {
737 assert_eq!(
738 tokenize("10px"),
739 vec![Token::Dimension(10.0, NumericType::Integer, "px".into())]
740 );
741 assert_eq!(
742 tokenize("2em"),
743 vec![Token::Dimension(2.0, NumericType::Integer, "em".into())]
744 );
745 assert_eq!(
746 tokenize("1.5rem"),
747 vec![Token::Dimension(1.5, NumericType::Number, "rem".into())]
748 );
749 }
750
751 #[test]
752 fn test_delimiters() {
753 assert_eq!(tokenize(":"), vec![Token::Colon]);
754 assert_eq!(tokenize(";"), vec![Token::Semicolon]);
755 assert_eq!(tokenize(","), vec![Token::Comma]);
756 assert_eq!(tokenize("("), vec![Token::LeftParen]);
757 assert_eq!(tokenize(")"), vec![Token::RightParen]);
758 assert_eq!(tokenize("["), vec![Token::LeftBracket]);
759 assert_eq!(tokenize("]"), vec![Token::RightBracket]);
760 assert_eq!(tokenize("{"), vec![Token::LeftBrace]);
761 assert_eq!(tokenize("}"), vec![Token::RightBrace]);
762 }
763
764 #[test]
765 fn test_delim_tokens() {
766 assert_eq!(tokenize("."), vec![Token::Delim('.')]);
767 assert_eq!(tokenize(">"), vec![Token::Delim('>')]);
768 assert_eq!(tokenize("+"), vec![Token::Delim('+')]);
769 assert_eq!(tokenize("~"), vec![Token::Delim('~')]);
770 assert_eq!(tokenize("*"), vec![Token::Delim('*')]);
771 }
772
773 #[test]
774 fn test_cdo_cdc() {
775 assert_eq!(tokenize("<!--"), vec![Token::Cdo]);
776 assert_eq!(tokenize("-->"), vec![Token::Cdc]);
777 }
778
779 #[test]
780 fn test_comments() {
781 assert_eq!(
782 tokenize("/* comment */color"),
783 vec![Token::Ident("color".into())]
784 );
785 assert_eq!(
786 tokenize("a/* x */b"),
787 vec![Token::Ident("a".into()), Token::Ident("b".into())]
788 );
789 }
790
791 #[test]
792 fn test_unclosed_comment() {
793 assert_eq!(tokenize("/* unclosed"), vec![]);
794 }
795
796 #[test]
797 fn test_url_token() {
798 assert_eq!(
799 tokenize("url(https://example.com)"),
800 vec![Token::Url("https://example.com".into())]
801 );
802 }
803
804 #[test]
805 fn test_url_with_whitespace() {
806 assert_eq!(
807 tokenize("url( foo.png )"),
808 vec![Token::Url("foo.png".into())]
809 );
810 }
811
812 #[test]
813 fn test_url_function_with_quotes() {
814 let tokens = tokenize("url(\"foo.png\")");
815 assert_eq!(tokens[0], Token::Function("url".into()));
816 }
817
818 #[test]
819 fn test_bad_url() {
820 let tokens = tokenize("url(foo bar)");
821 assert_eq!(tokens[0], Token::BadUrl);
822 }
823
824 #[test]
825 fn test_escape_in_ident() {
826 assert_eq!(tokenize(r"c\6Flor"), vec![Token::Ident("color".into())]);
827 }
828
829 #[test]
830 fn test_css_rule() {
831 let tokens = tokenize("div { color: red; }");
832 assert_eq!(
833 tokens,
834 vec![
835 Token::Ident("div".into()),
836 Token::Whitespace,
837 Token::LeftBrace,
838 Token::Whitespace,
839 Token::Ident("color".into()),
840 Token::Colon,
841 Token::Whitespace,
842 Token::Ident("red".into()),
843 Token::Semicolon,
844 Token::Whitespace,
845 Token::RightBrace,
846 ]
847 );
848 }
849
850 #[test]
851 fn test_selector_with_class() {
852 let tokens = tokenize("div.foo");
853 assert_eq!(
854 tokens,
855 vec![
856 Token::Ident("div".into()),
857 Token::Delim('.'),
858 Token::Ident("foo".into()),
859 ]
860 );
861 }
862
863 #[test]
864 fn test_selector_with_id() {
865 let tokens = tokenize("#main");
866 assert_eq!(tokens, vec![Token::Hash("main".into(), HashType::Id)]);
867 }
868
869 #[test]
870 fn test_dimension_with_float() {
871 assert_eq!(
872 tokenize("0.5em"),
873 vec![Token::Dimension(0.5, NumericType::Number, "em".into())]
874 );
875 }
876
877 #[test]
878 fn test_multiple_numbers() {
879 let tokens = tokenize("10px 20px");
880 assert_eq!(
881 tokens,
882 vec![
883 Token::Dimension(10.0, NumericType::Integer, "px".into()),
884 Token::Whitespace,
885 Token::Dimension(20.0, NumericType::Integer, "px".into()),
886 ]
887 );
888 }
889
890 #[test]
891 fn test_at_rule() {
892 let tokens = tokenize("@media screen");
893 assert_eq!(
894 tokens,
895 vec![
896 Token::AtKeyword("media".into()),
897 Token::Whitespace,
898 Token::Ident("screen".into()),
899 ]
900 );
901 }
902
903 #[test]
904 fn test_function_with_args() {
905 let tokens = tokenize("calc(100% - 20px)");
906 assert_eq!(
907 tokens,
908 vec![
909 Token::Function("calc".into()),
910 Token::Percentage(100.0),
911 Token::Whitespace,
912 Token::Delim('-'),
913 Token::Whitespace,
914 Token::Dimension(20.0, NumericType::Integer, "px".into()),
915 Token::RightParen,
916 ]
917 );
918 }
919
920 #[test]
921 fn test_color_hex() {
922 let tokens = tokenize("#ff0000");
923 assert_eq!(tokens, vec![Token::Hash("ff0000".into(), HashType::Id)]);
924 }
925
926 #[test]
927 fn test_negative_dimension() {
928 assert_eq!(
929 tokenize("-10px"),
930 vec![Token::Dimension(-10.0, NumericType::Integer, "px".into())]
931 );
932 }
933
934 #[test]
935 fn test_unicode_ident() {
936 assert_eq!(tokenize("côté"), vec![Token::Ident("côté".into())]);
937 }
938
939 #[test]
940 fn test_null_replacement() {
941 let tokens = tokenize("a\0b");
942 assert_eq!(tokens, vec![Token::Ident("a\u{FFFD}b".into())]);
943 }
944
945 #[test]
946 fn test_crlf_normalization() {
947 let tokens = tokenize("a\r\nb");
948 assert_eq!(
949 tokens,
950 vec![
951 Token::Ident("a".into()),
952 Token::Whitespace,
953 Token::Ident("b".into()),
954 ]
955 );
956 }
957
958 #[test]
959 fn test_escape_hex_with_trailing_space() {
960 // \41 followed by space should produce 'A'
961 assert_eq!(tokenize(r"\41 B"), vec![Token::Ident("AB".into())]);
962 }
963
964 #[test]
965 fn test_at_sign_alone() {
966 assert_eq!(tokenize("@"), vec![Token::Delim('@')]);
967 }
968
969 #[test]
970 fn test_hash_alone() {
971 // # followed by non-name char
972 assert_eq!(tokenize("# "), vec![Token::Delim('#'), Token::Whitespace]);
973 }
974
975 #[test]
976 fn test_nested_comments() {
977 // CSS comments don't nest, so "/* /* */" closes at first */
978 let tokens = tokenize("/* /* */ a");
979 assert_eq!(tokens, vec![Token::Whitespace, Token::Ident("a".into())]);
980 }
981}