A JavaScript lexer and syntax highlighter for Gleam!

Switch to using `next` function

+100 -161
+98 -161
src/just.gleam
··· 55 55 } 56 56 57 57 fn do_tokenise(lexer: Lexer, tokens: List(Token)) -> List(Token) { 58 + case next(lexer) { 59 + #(_, token.EndOfFile) -> list.reverse([token.EndOfFile, ..tokens]) 60 + #(lexer, token) -> do_tokenise(lexer, [token, ..tokens]) 61 + } 62 + } 63 + 64 + fn next(lexer: Lexer) -> #(Lexer, Token) { 58 65 case lexer.source { 59 - "" -> list.reverse(tokens) 66 + "" -> #(lexer, token.EndOfFile) 60 67 61 68 "\u{0009}" as space <> source 62 69 | "\u{000B}" as space <> source ··· 78 85 | "\u{205F}" as space <> source 79 86 | "\u{3000}" as space <> source 80 87 | "\u{FEFF}" as space <> source -> { 81 - let #(lexer, tokens) = whitespace(advance(lexer, source), tokens, space) 82 - do_tokenise(lexer, tokens) 88 + whitespace(advance(lexer, source), space) 83 89 } 84 90 85 91 "\u{000A}" as space <> source 86 92 | "\u{000D}" as space <> source 87 93 | "\u{2028}" as space <> source 88 - | "\u{2029}" as space <> source -> 89 - do_tokenise(advance(lexer, source), case lexer.ignore_whitespace { 90 - True -> tokens 91 - False -> [token.LineTerminator(space), ..tokens] 92 - }) 94 + | "\u{2029}" as space <> source -> { 95 + let lexer = advance(lexer, source) 96 + case lexer.ignore_whitespace { 97 + True -> next(lexer) 98 + False -> #(lexer, token.LineTerminator(space)) 99 + } 100 + } 93 101 94 - "0b" as prefix <> source -> { 95 - let #(lexer, token) = 96 - lex_radix_number(advance(lexer, source), 2, prefix, False) 97 - do_tokenise(lexer, [token, ..tokens]) 98 - } 99 - "0o" as prefix <> source -> { 100 - let #(lexer, token) = 101 - lex_radix_number(advance(lexer, source), 8, prefix, False) 102 - do_tokenise(lexer, [token, ..tokens]) 103 - } 104 - "0x" as prefix <> source -> { 105 - let #(lexer, token) = 106 - lex_radix_number(advance(lexer, source), 16, prefix, False) 107 - do_tokenise(lexer, [token, ..tokens]) 108 - } 102 + "0b" as prefix <> source -> 103 + lex_radix_number(advance(lexer, source), 2, prefix, False) 104 + 105 + "0o" as prefix <> source -> 106 + lex_radix_number(advance(lexer, source), 8, prefix, False) 107 + 108 + "0x" as prefix <> source -> 109 + lex_radix_number(advance(lexer, source), 16, prefix, False) 109 110 110 111 "00" as digit <> source 111 112 | "01" as digit <> source ··· 114 115 | "04" as digit <> source 115 116 | "05" as digit <> source 116 117 | "06" as digit <> source 117 - | "07" as digit <> source -> { 118 - let #(lexer, token) = 119 - lex_leading_zero_number(advance(lexer, source), digit) 120 - do_tokenise(lexer, [token, ..tokens]) 121 - } 118 + | "07" as digit <> source -> 119 + lex_leading_zero_number(advance(lexer, source), digit) 122 120 123 121 "1" as digit <> source 124 122 | "2" as digit <> source ··· 129 127 | "7" as digit <> source 130 128 | "8" as digit <> source 131 129 | "9" as digit <> source 132 - | "0" as digit <> source -> { 133 - let #(lexer, token) = 134 - lex_number(advance(lexer, source), digit, Initial, AfterNumber) 135 - do_tokenise(lexer, [token, ..tokens]) 136 - } 130 + | "0" as digit <> source -> 131 + lex_number(advance(lexer, source), digit, Initial, AfterNumber) 137 132 138 133 ".1" as digit <> source 139 134 | ".2" as digit <> source ··· 144 139 | ".7" as digit <> source 145 140 | ".8" as digit <> source 146 141 | ".9" as digit <> source 147 - | ".0" as digit <> source -> { 148 - let #(lexer, token) = 149 - lex_number(advance(lexer, source), digit, Decimal, AfterNumber) 150 - do_tokenise(lexer, [token, ..tokens]) 151 - } 142 + | ".0" as digit <> source -> 143 + lex_number(advance(lexer, source), digit, Decimal, AfterNumber) 152 144 153 - "{" <> source -> 154 - do_tokenise(advance(lexer, source), [token.LeftBrace, ..tokens]) 155 - "}" <> source -> 156 - do_tokenise(advance(lexer, source), [token.RightBrace, ..tokens]) 157 - "(" <> source -> 158 - do_tokenise(advance(lexer, source), [token.LeftParen, ..tokens]) 159 - ")" <> source -> 160 - do_tokenise(advance(lexer, source), [token.RightParen, ..tokens]) 161 - "[" <> source -> 162 - do_tokenise(advance(lexer, source), [token.LeftSquare, ..tokens]) 163 - "]" <> source -> 164 - do_tokenise(advance(lexer, source), [token.RightSquare, ..tokens]) 145 + "{" <> source -> #(advance(lexer, source), token.LeftBrace) 146 + "}" <> source -> #(advance(lexer, source), token.RightBrace) 147 + "(" <> source -> #(advance(lexer, source), token.LeftParen) 148 + ")" <> source -> #(advance(lexer, source), token.RightParen) 149 + "[" <> source -> #(advance(lexer, source), token.LeftSquare) 150 + "]" <> source -> #(advance(lexer, source), token.RightSquare) 165 151 166 - "..." <> source -> 167 - do_tokenise(advance(lexer, source), [token.TripleDot, ..tokens]) 168 - "." <> source -> do_tokenise(advance(lexer, source), [token.Dot, ..tokens]) 169 - ";" <> source -> 170 - do_tokenise(advance(lexer, source), [token.Semicolon, ..tokens]) 171 - "," <> source -> 172 - do_tokenise(advance(lexer, source), [token.Comma, ..tokens]) 173 - ":" <> source -> 174 - do_tokenise(advance(lexer, source), [token.Colon, ..tokens]) 175 - "=>" <> source -> 176 - do_tokenise(advance(lexer, source), [token.Arrow, ..tokens]) 152 + "..." <> source -> #(advance(lexer, source), token.TripleDot) 153 + "." <> source -> #(advance(lexer, source), token.Dot) 154 + ";" <> source -> #(advance(lexer, source), token.Semicolon) 155 + "," <> source -> #(advance(lexer, source), token.Comma) 156 + ":" <> source -> #(advance(lexer, source), token.Colon) 157 + "=>" <> source -> #(advance(lexer, source), token.Arrow) 177 158 178 - "<=" <> source -> 179 - do_tokenise(advance(lexer, source), [token.LessEqual, ..tokens]) 180 - ">=" <> source -> 181 - do_tokenise(advance(lexer, source), [token.GreaterEqual, ..tokens]) 182 - "===" <> source -> 183 - do_tokenise(advance(lexer, source), [token.TripleEqual, ..tokens]) 184 - "!==" <> source -> 185 - do_tokenise(advance(lexer, source), [token.BangDoubleEqual, ..tokens]) 186 - "==" <> source -> 187 - do_tokenise(advance(lexer, source), [token.DoubleEqual, ..tokens]) 188 - "!=" <> source -> 189 - do_tokenise(advance(lexer, source), [token.BangEqual, ..tokens]) 159 + "<=" <> source -> #(advance(lexer, source), token.LessEqual) 160 + ">=" <> source -> #(advance(lexer, source), token.GreaterEqual) 161 + "===" <> source -> #(advance(lexer, source), token.TripleEqual) 162 + "!==" <> source -> #(advance(lexer, source), token.BangDoubleEqual) 163 + "==" <> source -> #(advance(lexer, source), token.DoubleEqual) 164 + "!=" <> source -> #(advance(lexer, source), token.BangEqual) 190 165 191 - "=" <> source -> 192 - do_tokenise(advance(lexer, source), [token.Equal, ..tokens]) 193 - "+=" <> source -> 194 - do_tokenise(advance(lexer, source), [token.PlusEqual, ..tokens]) 195 - "-=" <> source -> 196 - do_tokenise(advance(lexer, source), [token.MinusEqual, ..tokens]) 197 - "*=" <> source -> 198 - do_tokenise(advance(lexer, source), [token.StarEqual, ..tokens]) 199 - "/=" <> source -> 200 - do_tokenise(advance(lexer, source), [token.SlashEqual, ..tokens]) 201 - "%=" <> source -> 202 - do_tokenise(advance(lexer, source), [token.PercentEqual, ..tokens]) 203 - "**=" <> source -> 204 - do_tokenise(advance(lexer, source), [token.DoubleStarEqual, ..tokens]) 205 - "<<=" <> source -> 206 - do_tokenise(advance(lexer, source), [token.DoubleLessEqual, ..tokens]) 207 - ">>=" <> source -> 208 - do_tokenise(advance(lexer, source), [token.DoubleGreaterEqual, ..tokens]) 209 - ">>>=" <> source -> 210 - do_tokenise(advance(lexer, source), [token.TripleGreaterEqual, ..tokens]) 211 - "&=" <> source -> 212 - do_tokenise(advance(lexer, source), [token.AmpersandEqual, ..tokens]) 213 - "|=" <> source -> 214 - do_tokenise(advance(lexer, source), [token.PipeEqual, ..tokens]) 215 - "^=" <> source -> 216 - do_tokenise(advance(lexer, source), [token.CaratEqual, ..tokens]) 217 - "&&=" <> source -> 218 - do_tokenise(advance(lexer, source), [token.DoubleAmpersandEqual, ..tokens]) 219 - "||=" <> source -> 220 - do_tokenise(advance(lexer, source), [token.DoublePipeEqual, ..tokens]) 221 - "??=" <> source -> 222 - do_tokenise(advance(lexer, source), [token.DoubleQuestionEqual, ..tokens]) 166 + "=" <> source -> #(advance(lexer, source), token.Equal) 167 + "+=" <> source -> #(advance(lexer, source), token.PlusEqual) 168 + "-=" <> source -> #(advance(lexer, source), token.MinusEqual) 169 + "*=" <> source -> #(advance(lexer, source), token.StarEqual) 170 + "/=" <> source -> #(advance(lexer, source), token.SlashEqual) 171 + "%=" <> source -> #(advance(lexer, source), token.PercentEqual) 172 + "**=" <> source -> #(advance(lexer, source), token.DoubleStarEqual) 173 + "<<=" <> source -> #(advance(lexer, source), token.DoubleLessEqual) 174 + ">>=" <> source -> #(advance(lexer, source), token.DoubleGreaterEqual) 175 + ">>>=" <> source -> #(advance(lexer, source), token.TripleGreaterEqual) 176 + "&=" <> source -> #(advance(lexer, source), token.AmpersandEqual) 177 + "|=" <> source -> #(advance(lexer, source), token.PipeEqual) 178 + "^=" <> source -> #(advance(lexer, source), token.CaratEqual) 179 + "&&=" <> source -> #(advance(lexer, source), token.DoubleAmpersandEqual) 180 + "||=" <> source -> #(advance(lexer, source), token.DoublePipeEqual) 181 + "??=" <> source -> #(advance(lexer, source), token.DoubleQuestionEqual) 223 182 224 - "<<" <> source -> 225 - do_tokenise(advance(lexer, source), [token.DoubleLess, ..tokens]) 226 - ">>>" <> source -> 227 - do_tokenise(advance(lexer, source), [token.TripleGreater, ..tokens]) 228 - ">>" <> source -> 229 - do_tokenise(advance(lexer, source), [token.DoubleGreater, ..tokens]) 183 + "<<" <> source -> #(advance(lexer, source), token.DoubleLess) 184 + ">>>" <> source -> #(advance(lexer, source), token.TripleGreater) 185 + ">>" <> source -> #(advance(lexer, source), token.DoubleGreater) 230 186 231 - "!" <> source -> do_tokenise(advance(lexer, source), [token.Bang, ..tokens]) 232 - "&&" <> source -> 233 - do_tokenise(advance(lexer, source), [token.DoubleAmpersand, ..tokens]) 234 - "||" <> source -> 235 - do_tokenise(advance(lexer, source), [token.DoublePipe, ..tokens]) 236 - "??" <> source -> 237 - do_tokenise(advance(lexer, source), [token.DoubleQuestion, ..tokens]) 238 - "?." <> source -> 239 - do_tokenise(advance(lexer, source), [token.QuestionDot, ..tokens]) 240 - "?" <> source -> 241 - do_tokenise(advance(lexer, source), [token.Question, ..tokens]) 187 + "!" <> source -> #(advance(lexer, source), token.Bang) 188 + "&&" <> source -> #(advance(lexer, source), token.DoubleAmpersand) 189 + "||" <> source -> #(advance(lexer, source), token.DoublePipe) 190 + "??" <> source -> #(advance(lexer, source), token.DoubleQuestion) 191 + "?." <> source -> #(advance(lexer, source), token.QuestionDot) 192 + "?" <> source -> #(advance(lexer, source), token.Question) 242 193 243 - "<" <> source -> do_tokenise(advance(lexer, source), [token.Less, ..tokens]) 244 - ">" <> source -> 245 - do_tokenise(advance(lexer, source), [token.Greater, ..tokens]) 194 + "<" <> source -> #(advance(lexer, source), token.Less) 195 + ">" <> source -> #(advance(lexer, source), token.Greater) 246 196 247 - "**" <> source -> 248 - do_tokenise(advance(lexer, source), [token.DoubleStar, ..tokens]) 249 - "++" <> source -> 250 - do_tokenise(advance(lexer, source), [token.DoublePlus, ..tokens]) 251 - "--" <> source -> 252 - do_tokenise(advance(lexer, source), [token.DoubleMinus, ..tokens]) 253 - "+" <> source -> do_tokenise(advance(lexer, source), [token.Plus, ..tokens]) 254 - "-" <> source -> 255 - do_tokenise(advance(lexer, source), [token.Minus, ..tokens]) 256 - "*" <> source -> do_tokenise(advance(lexer, source), [token.Star, ..tokens]) 257 - "/" <> source -> 258 - do_tokenise(advance(lexer, source), [token.Slash, ..tokens]) 259 - "%" <> source -> 260 - do_tokenise(advance(lexer, source), [token.Percent, ..tokens]) 261 - "&" <> source -> 262 - do_tokenise(advance(lexer, source), [token.Ampersand, ..tokens]) 263 - "|" <> source -> do_tokenise(advance(lexer, source), [token.Pipe, ..tokens]) 264 - "^" <> source -> 265 - do_tokenise(advance(lexer, source), [token.Caret, ..tokens]) 266 - "~" <> source -> 267 - do_tokenise(advance(lexer, source), [token.Tilde, ..tokens]) 197 + "**" <> source -> #(advance(lexer, source), token.DoubleStar) 198 + "++" <> source -> #(advance(lexer, source), token.DoublePlus) 199 + "--" <> source -> #(advance(lexer, source), token.DoubleMinus) 200 + "+" <> source -> #(advance(lexer, source), token.Plus) 201 + "-" <> source -> #(advance(lexer, source), token.Minus) 202 + "*" <> source -> #(advance(lexer, source), token.Star) 203 + "/" <> source -> #(advance(lexer, source), token.Slash) 204 + "%" <> source -> #(advance(lexer, source), token.Percent) 205 + "&" <> source -> #(advance(lexer, source), token.Ampersand) 206 + "|" <> source -> #(advance(lexer, source), token.Pipe) 207 + "^" <> source -> #(advance(lexer, source), token.Caret) 208 + "~" <> source -> #(advance(lexer, source), token.Tilde) 268 209 269 210 "#" <> source -> { 270 211 let #(lexer, name) = lex_identifier(advance(lexer, source), "") 271 - do_tokenise(lexer, [token.PrivateIdentifier(name), ..tokens]) 212 + #(lexer, token.PrivateIdentifier(name)) 272 213 } 273 214 274 215 "_" as character <> source ··· 328 269 let #(lexer, name) = lex_identifier(advance(lexer, source), character) 329 270 330 271 let token = identifier_token(name, lexer.strict_mode) 331 - 332 - do_tokenise(lexer, [token, ..tokens]) 272 + #(lexer, token) 333 273 } 334 274 335 275 "'" as quote <> source | "\"" as quote <> source -> { 336 276 let #(lexer, string) = lex_string(advance(lexer, source), quote, "") 337 - do_tokenise(lexer, [token.String(quote, string), ..tokens]) 277 + #(lexer, token.String(quote, string)) 338 278 } 339 279 340 - _ -> list.reverse(tokens) 280 + _ -> #(lexer, token.EndOfFile) 341 281 } 342 282 } 343 283 ··· 642 582 } 643 583 } 644 584 645 - fn whitespace( 646 - lexer: Lexer, 647 - tokens: List(Token), 648 - lexed: String, 649 - ) -> #(Lexer, List(Token)) { 585 + fn whitespace(lexer: Lexer, lexed: String) -> #(Lexer, Token) { 650 586 case lexer.source { 651 587 "\t" as space <> source 652 588 | "\u{000B}" as space <> source ··· 668 604 | "\u{205F}" as space <> source 669 605 | "\u{3000}" as space <> source 670 606 | "\u{FEFF}" as space <> source -> 671 - whitespace(advance(lexer, source), tokens, lexed <> space) 607 + whitespace(advance(lexer, source), lexed <> space) 672 608 673 - _ -> #(lexer, case lexer.ignore_whitespace { 674 - True -> tokens 675 - False -> [token.Whitespace(lexed), ..tokens] 676 - }) 609 + _ -> 610 + case lexer.ignore_whitespace { 611 + True -> next(lexer) 612 + False -> #(lexer, token.Whitespace(lexed)) 613 + } 677 614 } 678 615 } 679 616
+2
src/just/token.gleam
··· 5 5 HashBangComment(String) 6 6 Whitespace(String) 7 7 LineTerminator(String) 8 + EndOfFile 8 9 9 10 // Literals 10 11 Identifier(String) ··· 178 179 HashBangComment(value) -> "#!" <> value 179 180 Whitespace(value) -> value 180 181 LineTerminator(value) -> value 182 + EndOfFile -> "" 181 183 182 184 // Literals 183 185 Identifier(value) -> value