Minimal Imperative Parsing Library | https://docs.rs/mipl
1//! Lexer for `mipl`.
2
3mod delimiter;
4use delimiter::*;
5
6/// Groups items relevant to delimiters.
7pub mod del {
8 pub use super::delimiter::{
9 Delimiters,
10 DiscardDelimiters,
11 KeepDelimiters,
12 DelimitersParam,
13 };
14}
15/// The lexer prelude.
16pub mod prelude {
17 pub use super::*;
18 pub use super::del::*;
19}
20
21/// Type for marking expected [Token::End] cases.
22#[derive(Debug, PartialEq, Clone)]
23pub struct EndToken;
24/// Type for marking expected [Token::Newline] cases.
25#[derive(Debug, PartialEq, Clone)]
26pub struct NewlineToken;
27/// Type for marking expected [Token::Tab] cases.
28#[derive(Debug, PartialEq, Clone)]
29pub struct TabToken;
30
31/// The core token type.
32#[derive(Debug, PartialEq, Clone)]
33pub enum Token {
34 /// A newline.
35 Newline(NewlineToken),
36 /// A tab character
37 Tab(TabToken),
38 /// The end of the tokens stream.
39 End(EndToken),
40 /// A mere string.
41 Str(String)
42}
43
44use std::collections::VecDeque;
45/// Iterator of tokens, and the tokenizer.
46#[derive(Debug, Clone)]
47pub struct Tokens(VecDeque<Token>);
48impl Tokens {
49 fn chars_buf_into_tok(
50 chars_buf: &mut Vec<char>,
51 inner: &mut VecDeque<Token>
52 ) {
53 let new_tok: String = chars_buf.iter().collect();
54
55 chars_buf.drain(..);
56
57 inner.push_back(Token::Str(
58 new_tok
59 ));
60 }
61
62 fn char_into_tok(ch: char) -> Token {
63 match ch {
64 '\n' => Token::Newline(NewlineToken),
65 '\t' => Token::Tab(TabToken),
66 _ => Token::Str(String::from(ch))
67 }
68 }
69
70 /// Add a token to the list ad-hoc.
71 pub fn add_token(&mut self, token: Token) {
72 self.0.push_back(token)
73 }
74
75 /// Get [Tokens] from a vector of [Token] items.
76 pub fn from_vec(toks: Vec<Token>) -> Tokens {
77 Tokens(
78 VecDeque::from(toks)
79 )
80 }
81
82 /// Get a new empty list of tokens.
83 pub fn new_empty() -> Tokens {
84 Tokens(VecDeque::new())
85 }
86
87 /// Tokenize string input.
88 pub fn new<T: AsRef<str>>(
89 value: T,
90 delimiters: DelimitersParam
91 ) -> Tokens {
92 let mut inner = VecDeque::<Token>::new();
93
94 let mut chars_buf: Vec<char> = Vec::new();
95
96 let chars = value.as_ref().chars();
97
98 for ch in chars {
99 let is_d_del = delimiters.discard.contains(ch);
100 let is_k_del = delimiters.keep.contains(ch);
101 let is_del = is_d_del | is_k_del;
102
103 if is_del {
104 if !chars_buf.is_empty() {
105 Tokens::chars_buf_into_tok(&mut chars_buf, &mut inner);
106 }
107
108 if is_k_del {
109 inner.push_back(Tokens::char_into_tok(ch))
110 }
111 } else {
112 chars_buf.push(ch)
113 }
114 }
115
116 if !chars_buf.is_empty() {
117 Tokens::chars_buf_into_tok(&mut chars_buf, &mut inner);
118 }
119
120 inner.push_back(Token::End(EndToken));
121
122 Tokens(inner)
123 }
124}
125impl Iterator for Tokens {
126 type Item = Token;
127
128 fn next(&mut self) -> Option<Self::Item> {
129 self.0.pop_front()
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn test_tokens_new() {
139 let d_dels = DiscardDelimiters::new( vec![' '] );
140 let del_params = DelimitersParam{
141 keep: KeepDelimiters::new( vec!['\n'] ),
142 discard: d_dels
143 };
144
145 let mut toks = Tokens::new(
146 "A line.\nA quick brown fox.",
147 del_params
148 );
149
150 println!("{:#?}", toks);
151
152 toks.0.pop_front();
153 toks.0.pop_front();
154 let newline = toks.0.pop_front().unwrap();
155 toks.0.pop_front();
156
157 let val = match toks.0.pop_front().unwrap() {
158 Token::Str(s) => s,
159 _ => panic!("Expected string.")
160 };
161
162 assert_eq!("quick", val);
163 assert_eq!(Token::Newline(NewlineToken), newline);
164 }
165}