//! Lexer for `mipl`. mod delimiter; use delimiter::*; /// Groups items relevant to delimiters. pub mod del { pub use super::delimiter::{ Delimiters, DiscardDelimiters, KeepDelimiters, DelimitersParam, }; } /// The lexer prelude. pub mod prelude { pub use super::*; pub use super::del::*; } /// Type for marking expected [Token::End] cases. #[derive(Debug, PartialEq, Clone)] pub struct EndToken; /// Type for marking expected [Token::Newline] cases. #[derive(Debug, PartialEq, Clone)] pub struct NewlineToken; /// Type for marking expected [Token::Tab] cases. #[derive(Debug, PartialEq, Clone)] pub struct TabToken; /// The core token type. #[derive(Debug, PartialEq, Clone)] pub enum Token { /// A newline. Newline(NewlineToken), /// A tab character Tab(TabToken), /// The end of the tokens stream. End(EndToken), /// A mere string. Str(String) } use std::collections::VecDeque; /// Iterator of tokens, and the tokenizer. #[derive(Debug, Clone)] pub struct Tokens(VecDeque); impl Tokens { fn chars_buf_into_tok( chars_buf: &mut Vec, inner: &mut VecDeque ) { let new_tok: String = chars_buf.iter().collect(); chars_buf.drain(..); inner.push_back(Token::Str( new_tok )); } fn char_into_tok(ch: char) -> Token { match ch { '\n' => Token::Newline(NewlineToken), '\t' => Token::Tab(TabToken), _ => Token::Str(String::from(ch)) } } /// Add a token to the list ad-hoc. pub fn add_token(&mut self, token: Token) { self.0.push_back(token) } /// Get [Tokens] from a vector of [Token] items. pub fn from_vec(toks: Vec) -> Tokens { Tokens( VecDeque::from(toks) ) } /// Get a new empty list of tokens. pub fn new_empty() -> Tokens { Tokens(VecDeque::new()) } /// Tokenize string input. pub fn new>( value: T, delimiters: DelimitersParam ) -> Tokens { let mut inner = VecDeque::::new(); let mut chars_buf: Vec = Vec::new(); let chars = value.as_ref().chars(); for ch in chars { let is_d_del = delimiters.discard.contains(ch); let is_k_del = delimiters.keep.contains(ch); let is_del = is_d_del | is_k_del; if is_del { if !chars_buf.is_empty() { Tokens::chars_buf_into_tok(&mut chars_buf, &mut inner); } if is_k_del { inner.push_back(Tokens::char_into_tok(ch)) } } else { chars_buf.push(ch) } } if !chars_buf.is_empty() { Tokens::chars_buf_into_tok(&mut chars_buf, &mut inner); } inner.push_back(Token::End(EndToken)); Tokens(inner) } } impl Iterator for Tokens { type Item = Token; fn next(&mut self) -> Option { self.0.pop_front() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_tokens_new() { let d_dels = DiscardDelimiters::new( vec![' '] ); let del_params = DelimitersParam{ keep: KeepDelimiters::new( vec!['\n'] ), discard: d_dels }; let mut toks = Tokens::new( "A line.\nA quick brown fox.", del_params ); println!("{:#?}", toks); toks.0.pop_front(); toks.0.pop_front(); let newline = toks.0.pop_front().unwrap(); toks.0.pop_front(); let val = match toks.0.pop_front().unwrap() { Token::Str(s) => s, _ => panic!("Expected string.") }; assert_eq!("quick", val); assert_eq!(Token::Newline(NewlineToken), newline); } }