A JavaScript lexer and syntax highlighter for Gleam!

Implement highlighting

+348
+2
gleam.toml
··· 14 14 15 15 [dependencies] 16 16 gleam_stdlib = ">= 0.44.0 and < 2.0.0" 17 + gleam_community_ansi = ">= 1.4.3 and < 2.0.0" 18 + houdini = ">= 1.1.0 and < 2.0.0" 17 19 18 20 [dev-dependencies] 19 21 gleeunit = ">= 1.0.0 and < 2.0.0"
+7
manifest.toml
··· 2 2 # You typically do not need to edit this file 3 3 4 4 packages = [ 5 + { name = "gleam_community_ansi", version = "1.4.3", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_regexp", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "8A62AE9CC6EA65BEA630D95016D6C07E4F9973565FA3D0DE68DC4200D8E0DD27" }, 6 + { name = "gleam_community_colour", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_json", "gleam_stdlib"], otp_app = "gleam_community_colour", source = "hex", outer_checksum = "FDD6AC62C6EC8506C005949A4FCEF032038191D5EAAEC3C9A203CD53AE956ACA" }, 7 + { name = "gleam_json", version = "2.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "C55C5C2B318533A8072D221C5E06E5A75711C129E420DD1CE463342106012E5D" }, 8 + { name = "gleam_regexp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "7F5E0C0BBEB3C58E57C9CB05FA9002F970C85AD4A63BA1E55CBCB35C15809179" }, 5 9 { name = "gleam_stdlib", version = "0.58.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "091F2D2C4A3A4E2047986C47E2C2C9D728A4E068ABB31FDA17B0D347E6248467" }, 6 10 { name = "gleeunit", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "0E6C83834BA65EDCAAF4FE4FB94AC697D9262D83E6F58A750D63C9F6C8A9D9FF" }, 11 + { name = "houdini", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "houdini", source = "hex", outer_checksum = "5BA517E5179F132F0471CB314F27FE210A10407387DA1EA4F6FD084F74469FC2" }, 7 12 ] 8 13 9 14 [requirements] 15 + gleam_community_ansi = { version = ">= 1.4.3 and < 2.0.0" } 10 16 gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } 11 17 gleeunit = { version = ">= 1.0.0 and < 2.0.0" } 18 + houdini = { version = ">= 1.1.0 and < 2.0.0" }
+339
src/just/highlight.gleam
··· 1 + import gleam/list 2 + import gleam_community/ansi 3 + import houdini 4 + import just 5 + import just/token as t 6 + 7 + /// A highlighting token, containing information about the kind of syntax 8 + /// being used. Many similar tokens (e.g. all keywords) are grouped together 9 + /// to simplify them. 10 + /// 11 + /// For syntax tokens, see `just/token.{type Token}`. 12 + /// 13 + pub type Token { 14 + Whitespace(String) 15 + Keyword(String) 16 + Variable(String) 17 + Class(String) 18 + String(String) 19 + Regexp(String) 20 + Number(String) 21 + Function(String) 22 + Operator(String) 23 + Comment(String) 24 + Punctuation(String) 25 + } 26 + 27 + /// Convert a string of JavaScript source code into ansi highlighting. 28 + /// 29 + /// Colours taken from [`contour`](https://hexdocs.pm/contour): 30 + /// | Token | Colour | 31 + /// | ---------------------- | ----------- | 32 + /// | Keyword | Yellow | 33 + /// | Class | Cyan | 34 + /// | Function | Blue | 35 + /// | Operator | Magenta | 36 + /// | Comment | Italic grey | 37 + /// | String, Number, Regexp | Green | 38 + /// | Whitespace, Variable | No colour | 39 + /// 40 + /// If you wish to use other colours or another format, use `to_tokens`. 41 + /// 42 + pub fn to_ansi(code: String) -> String { 43 + to_tokens(code) 44 + |> list.fold("", fn(code, token) { 45 + code 46 + <> case token { 47 + Whitespace(s) -> ansi.reset(s) 48 + Keyword(s) -> ansi.yellow(s) 49 + Variable(s) -> ansi.reset(s) 50 + Class(s) -> ansi.cyan(s) 51 + String(s) -> ansi.green(s) 52 + Regexp(s) -> ansi.green(s) 53 + Number(s) -> ansi.green(s) 54 + Function(s) -> ansi.blue(s) 55 + Operator(s) -> ansi.magenta(s) 56 + Comment(s) -> ansi.italic(ansi.gray(s)) 57 + Punctuation(s) -> ansi.reset(s) 58 + } 59 + }) 60 + } 61 + 62 + /// Convert a string of JavaScript source code into an HTML string. 63 + /// Each token is wrapped in a `<span>` with a class indicating the type of token. 64 + /// 65 + /// Class names taken from [`contour`](https://hexdocs.pm/contour): 66 + /// | Token | CSS class | 67 + /// | ----------- | -------------- | 68 + /// | Keyword | hl-keyword | 69 + /// | Variable | hl-variable | 70 + /// | Class | hl-class | 71 + /// | Function | hl-function | 72 + /// | Operator | hl-operator | 73 + /// | Punctuation | hl-punctuation | 74 + /// | Comment | hl-comment | 75 + /// | String | hl-string | 76 + /// | Regexp | hl-regexp | 77 + /// | Number | hl-number | 78 + /// | Whitespace | no class | 79 + /// 80 + /// Place the output within a `<pre><code>...</code></pre>` and add styling for 81 + /// these CSS classes to get highlighting on your website. Here's some CSS you 82 + /// could use: 83 + /// 84 + /// ```css 85 + /// pre code .hl-comment { color: #d4d4d4; font-style: italic } 86 + /// pre code .hl-function { color: #9ce7ff } 87 + /// pre code .hl-keyword { color: #ffd596 } 88 + /// pre code .hl-operator { color: #ffaff3 } 89 + /// pre code .hl-string { color: #c8ffa7 } 90 + /// pre code .hl-number { color: #c8ffa7 } 91 + /// pre code .hl-regexp { color: #c8ffa7 } 92 + /// pre code .hl-class { color: #ffddfa } 93 + /// ``` 94 + /// 95 + /// If you wish to use another format see `to_ansi` or `to_tokens`. 96 + /// 97 + pub fn to_html(code: String) -> String { 98 + to_tokens(code) 99 + |> list.fold("", fn(acc, token) { 100 + case token { 101 + Whitespace(s) -> acc <> s 102 + Keyword(s) -> 103 + acc <> "<span class=hl-keyword>" <> houdini.escape(s) <> "</span>" 104 + Variable(s) -> 105 + acc <> "<span class=hl-variable>" <> houdini.escape(s) <> "</span>" 106 + Class(s) -> 107 + acc <> "<span class=hl-class>" <> houdini.escape(s) <> "</span>" 108 + String(s) -> 109 + acc <> "<span class=hl-string>" <> houdini.escape(s) <> "</span>" 110 + Regexp(s) -> 111 + acc <> "<span class=hl-regexp>" <> houdini.escape(s) <> "</span>" 112 + Number(s) -> 113 + acc <> "<span class=hl-number>" <> houdini.escape(s) <> "</span>" 114 + Function(s) -> 115 + acc <> "<span class=hl-function>" <> houdini.escape(s) <> "</span>" 116 + Operator(s) -> 117 + acc <> "<span class=hl-operator>" <> houdini.escape(s) <> "</span>" 118 + Comment(s) -> 119 + acc <> "<span class=hl-comment>" <> houdini.escape(s) <> "</span>" 120 + Punctuation(s) -> 121 + acc <> "<span class=hl-punctuation>" <> houdini.escape(s) <> "</span>" 122 + } 123 + }) 124 + } 125 + 126 + /// Convert a string of JavaScript source code into highlighting tokens. 127 + /// Highlighting tokens only contain information about the kind of syntax 128 + /// being used, grouping similar tokens (e.g. all keywords) into one category. 129 + /// 130 + /// To convert code into syntax tokens, see `just.tokenise`. 131 + /// 132 + pub fn to_tokens(code: String) -> List(Token) { 133 + let lexer = just.new(code) 134 + do_to_tokens(just.tokenise(lexer), []) 135 + } 136 + 137 + fn do_to_tokens(in: List(t.Token), out: List(Token)) -> List(Token) { 138 + case in { 139 + [] -> list.reverse(out) 140 + 141 + // Identifiers and specific constructs 142 + [t.Identifier(value), t.LeftParen, ..in] -> 143 + do_to_tokens(in, [Punctuation("("), Function(value), ..out]) 144 + [t.ContextualKeyword(keyword), t.LeftParen, ..in] -> 145 + do_to_tokens(in, [ 146 + Punctuation("("), 147 + Function(t.contextual_keyword_name(keyword)), 148 + ..out 149 + ]) 150 + 151 + [t.Let, t.Whitespace(space), t.ContextualKeyword(keyword), ..in] -> 152 + do_to_tokens(in, [ 153 + Variable(t.contextual_keyword_name(keyword)), 154 + Whitespace(space), 155 + Keyword("let"), 156 + ..out 157 + ]) 158 + [t.Const, t.Whitespace(space), t.ContextualKeyword(keyword), ..in] -> 159 + do_to_tokens(in, [ 160 + Variable(t.contextual_keyword_name(keyword)), 161 + Whitespace(space), 162 + Keyword("const"), 163 + ..out 164 + ]) 165 + 166 + [t.New, t.Whitespace(space), t.Identifier(name), ..in] -> 167 + do_to_tokens(in, [Class(name), Whitespace(space), Keyword("new"), ..out]) 168 + [t.Class, t.Whitespace(space), t.Identifier(name), ..in] -> 169 + do_to_tokens(in, [Class(name), Whitespace(space), Keyword("class"), ..out]) 170 + [t.Extends, t.Whitespace(space), t.Identifier(name), ..in] -> 171 + do_to_tokens(in, [ 172 + Class(name), 173 + Whitespace(space), 174 + Keyword("extends"), 175 + ..out 176 + ]) 177 + [t.Instanceof, t.Whitespace(space), t.Identifier(name), ..in] -> 178 + do_to_tokens(in, [ 179 + Class(name), 180 + Whitespace(space), 181 + Keyword("instanceof"), 182 + ..out 183 + ]) 184 + 185 + [t.Identifier(name), ..in] -> do_to_tokens(in, [Variable(name), ..out]) 186 + [t.PrivateIdentifier(name), ..in] -> 187 + do_to_tokens(in, [Variable("#" <> name), ..out]) 188 + 189 + [t.ContextualKeyword(keyword), ..in] -> 190 + do_to_tokens(in, [Keyword(t.contextual_keyword_name(keyword)), ..out]) 191 + 192 + // Comments and whitespace 193 + [t.SingleLineComment(value), ..in] -> 194 + do_to_tokens(in, [Comment("//" <> value), ..out]) 195 + [t.MultiLineComment(value), ..in] -> 196 + do_to_tokens(in, [Comment("/*" <> value <> "*/"), ..out]) 197 + [t.HashBangComment(value), ..in] -> 198 + do_to_tokens(in, [Comment("#!" <> value), ..out]) 199 + [t.Whitespace(value), ..in] -> do_to_tokens(in, [Whitespace(value), ..out]) 200 + [t.LineTerminator(value), ..in] -> 201 + do_to_tokens(in, [Whitespace(value), ..out]) 202 + [t.EndOfFile, ..in] -> do_to_tokens(in, out) 203 + 204 + // Literals 205 + [t.Number(value), ..in] -> do_to_tokens(in, [Number(value), ..out]) 206 + [t.BigInt(value), ..in] -> do_to_tokens(in, [Number(value <> "n"), ..out]) 207 + [t.String(quote:, contents:), ..in] -> 208 + do_to_tokens(in, [String(quote <> contents <> quote), ..out]) 209 + [t.RegularExpression(value), ..in] -> 210 + do_to_tokens(in, [Regexp("/" <> value <> "/"), ..out]) 211 + [t.TemplateHead(value), ..in] -> 212 + do_to_tokens(in, [String("`" <> value <> "${"), ..out]) 213 + [t.TemplateMiddle(value), ..in] -> 214 + do_to_tokens(in, [String("}" <> value <> "${"), ..out]) 215 + [t.TemplateTail(value), ..in] -> 216 + do_to_tokens(in, [String("}" <> value <> "`"), ..out]) 217 + 218 + // Keywords 219 + [t.Break, ..in] -> do_to_tokens(in, [Keyword("break"), ..out]) 220 + [t.Case, ..in] -> do_to_tokens(in, [Keyword("case"), ..out]) 221 + [t.Catch, ..in] -> do_to_tokens(in, [Keyword("catch"), ..out]) 222 + [t.Class, ..in] -> do_to_tokens(in, [Keyword("class"), ..out]) 223 + [t.Const, ..in] -> do_to_tokens(in, [Keyword("const"), ..out]) 224 + [t.Continue, ..in] -> do_to_tokens(in, [Keyword("continue"), ..out]) 225 + [t.Debugger, ..in] -> do_to_tokens(in, [Keyword("debugger"), ..out]) 226 + [t.Default, ..in] -> do_to_tokens(in, [Keyword("default"), ..out]) 227 + [t.Delete, ..in] -> do_to_tokens(in, [Keyword("delete"), ..out]) 228 + [t.Do, ..in] -> do_to_tokens(in, [Keyword("do"), ..out]) 229 + [t.Else, ..in] -> do_to_tokens(in, [Keyword("else"), ..out]) 230 + [t.Export, ..in] -> do_to_tokens(in, [Keyword("export"), ..out]) 231 + [t.Extends, ..in] -> do_to_tokens(in, [Keyword("extends"), ..out]) 232 + [t.False, ..in] -> do_to_tokens(in, [Keyword("false"), ..out]) 233 + [t.Finally, ..in] -> do_to_tokens(in, [Keyword("finally"), ..out]) 234 + [t.For, ..in] -> do_to_tokens(in, [Keyword("for"), ..out]) 235 + [t.Function, ..in] -> do_to_tokens(in, [Keyword("function"), ..out]) 236 + [t.If, ..in] -> do_to_tokens(in, [Keyword("if"), ..out]) 237 + [t.Import, ..in] -> do_to_tokens(in, [Keyword("import"), ..out]) 238 + [t.In, ..in] -> do_to_tokens(in, [Keyword("in"), ..out]) 239 + [t.Instanceof, ..in] -> do_to_tokens(in, [Keyword("instanceof"), ..out]) 240 + [t.New, ..in] -> do_to_tokens(in, [Keyword("new"), ..out]) 241 + [t.Null, ..in] -> do_to_tokens(in, [Keyword("null"), ..out]) 242 + [t.Return, ..in] -> do_to_tokens(in, [Keyword("return"), ..out]) 243 + [t.Super, ..in] -> do_to_tokens(in, [Keyword("super"), ..out]) 244 + [t.Switch, ..in] -> do_to_tokens(in, [Keyword("switch"), ..out]) 245 + [t.This, ..in] -> do_to_tokens(in, [Keyword("this"), ..out]) 246 + [t.Throw, ..in] -> do_to_tokens(in, [Keyword("throw"), ..out]) 247 + [t.True, ..in] -> do_to_tokens(in, [Keyword("true"), ..out]) 248 + [t.Try, ..in] -> do_to_tokens(in, [Keyword("try"), ..out]) 249 + [t.Typeof, ..in] -> do_to_tokens(in, [Keyword("typeof"), ..out]) 250 + [t.Var, ..in] -> do_to_tokens(in, [Keyword("var"), ..out]) 251 + [t.Void, ..in] -> do_to_tokens(in, [Keyword("void"), ..out]) 252 + [t.While, ..in] -> do_to_tokens(in, [Keyword("while"), ..out]) 253 + [t.With, ..in] -> do_to_tokens(in, [Keyword("with"), ..out]) 254 + 255 + // Keywords in strict mode 256 + [t.Let, ..in] -> do_to_tokens(in, [Keyword("let"), ..out]) 257 + [t.Static, ..in] -> do_to_tokens(in, [Keyword("static"), ..out]) 258 + [t.Yield, ..in] -> do_to_tokens(in, [Keyword("yield"), ..out]) 259 + 260 + // Future reserved words 261 + [t.Enum, ..in] -> do_to_tokens(in, [Keyword("enum"), ..out]) 262 + 263 + // Future reserved words in strict mode 264 + [t.Implements, ..in] -> do_to_tokens(in, [Keyword("implements"), ..out]) 265 + [t.Interface, ..in] -> do_to_tokens(in, [Keyword("interface"), ..out]) 266 + [t.Package, ..in] -> do_to_tokens(in, [Keyword("package"), ..out]) 267 + [t.Private, ..in] -> do_to_tokens(in, [Keyword("private"), ..out]) 268 + [t.Protected, ..in] -> do_to_tokens(in, [Keyword("protected"), ..out]) 269 + 270 + // Grouping 271 + [t.LeftBrace, ..in] -> do_to_tokens(in, [Punctuation("{"), ..out]) 272 + [t.RightBrace, ..in] -> do_to_tokens(in, [Punctuation("}"), ..out]) 273 + [t.LeftParen, ..in] -> do_to_tokens(in, [Punctuation("("), ..out]) 274 + [t.RightParen, ..in] -> do_to_tokens(in, [Punctuation(")"), ..out]) 275 + [t.LeftSquare, ..in] -> do_to_tokens(in, [Punctuation("["), ..out]) 276 + [t.RightSquare, ..in] -> do_to_tokens(in, [Punctuation("]"), ..out]) 277 + 278 + // Separators 279 + [t.Dot, ..in] -> do_to_tokens(in, [Punctuation("."), ..out]) 280 + [t.TripleDot, ..in] -> do_to_tokens(in, [Punctuation("..."), ..out]) 281 + [t.Semicolon, ..in] -> do_to_tokens(in, [Punctuation(";"), ..out]) 282 + [t.Comma, ..in] -> do_to_tokens(in, [Punctuation(","), ..out]) 283 + [t.Colon, ..in] -> do_to_tokens(in, [Punctuation(":"), ..out]) 284 + [t.Arrow, ..in] -> do_to_tokens(in, [Punctuation("=>"), ..out]) 285 + 286 + // Comparison 287 + [t.Less, ..in] -> do_to_tokens(in, [Operator("<"), ..out]) 288 + [t.Greater, ..in] -> do_to_tokens(in, [Operator(">"), ..out]) 289 + [t.LessEqual, ..in] -> do_to_tokens(in, [Operator("<="), ..out]) 290 + [t.GreaterEqual, ..in] -> do_to_tokens(in, [Operator(">="), ..out]) 291 + [t.DoubleEqual, ..in] -> do_to_tokens(in, [Operator("=="), ..out]) 292 + [t.BangEqual, ..in] -> do_to_tokens(in, [Operator("!="), ..out]) 293 + [t.TripleEqual, ..in] -> do_to_tokens(in, [Operator("==="), ..out]) 294 + [t.BangDoubleEqual, ..in] -> do_to_tokens(in, [Operator("!=="), ..out]) 295 + 296 + // Arithmetic 297 + [t.Plus, ..in] -> do_to_tokens(in, [Operator("+"), ..out]) 298 + [t.Minus, ..in] -> do_to_tokens(in, [Operator("-"), ..out]) 299 + [t.Star, ..in] -> do_to_tokens(in, [Operator("*"), ..out]) 300 + [t.Slash, ..in] -> do_to_tokens(in, [Operator("/"), ..out]) 301 + [t.Percent, ..in] -> do_to_tokens(in, [Operator("%"), ..out]) 302 + [t.DoubleStar, ..in] -> do_to_tokens(in, [Operator("**"), ..out]) 303 + [t.DoublePlus, ..in] -> do_to_tokens(in, [Operator("++"), ..out]) 304 + [t.DoubleMinus, ..in] -> do_to_tokens(in, [Operator("--"), ..out]) 305 + [t.DoubleLess, ..in] -> do_to_tokens(in, [Operator("<<"), ..out]) 306 + [t.DoubleGreater, ..in] -> do_to_tokens(in, [Operator(">>"), ..out]) 307 + [t.TripleGreater, ..in] -> do_to_tokens(in, [Operator(">>>"), ..out]) 308 + [t.Ampersand, ..in] -> do_to_tokens(in, [Operator("&"), ..out]) 309 + [t.Pipe, ..in] -> do_to_tokens(in, [Operator("|"), ..out]) 310 + [t.Caret, ..in] -> do_to_tokens(in, [Operator("^"), ..out]) 311 + [t.Tilde, ..in] -> do_to_tokens(in, [Operator("~"), ..out]) 312 + 313 + // Logic 314 + [t.Bang, ..in] -> do_to_tokens(in, [Operator("!"), ..out]) 315 + [t.DoubleAmpersand, ..in] -> do_to_tokens(in, [Operator("&&"), ..out]) 316 + [t.DoublePipe, ..in] -> do_to_tokens(in, [Operator("||"), ..out]) 317 + [t.Question, ..in] -> do_to_tokens(in, [Operator("?"), ..out]) 318 + [t.DoubleQuestion, ..in] -> do_to_tokens(in, [Operator("??"), ..out]) 319 + [t.QuestionDot, ..in] -> do_to_tokens(in, [Operator("?."), ..out]) 320 + 321 + // Assignment 322 + [t.Equal, ..in] -> do_to_tokens(in, [Operator("="), ..out]) 323 + [t.PlusEqual, ..in] -> do_to_tokens(in, [Operator("+="), ..out]) 324 + [t.MinusEqual, ..in] -> do_to_tokens(in, [Operator("-="), ..out]) 325 + [t.StarEqual, ..in] -> do_to_tokens(in, [Operator("*="), ..out]) 326 + [t.SlashEqual, ..in] -> do_to_tokens(in, [Operator("/="), ..out]) 327 + [t.PercentEqual, ..in] -> do_to_tokens(in, [Operator("%="), ..out]) 328 + [t.DoubleStarEqual, ..in] -> do_to_tokens(in, [Operator("**="), ..out]) 329 + [t.DoubleLessEqual, ..in] -> do_to_tokens(in, [Operator("<<="), ..out]) 330 + [t.DoubleGreaterEqual, ..in] -> do_to_tokens(in, [Operator(">>="), ..out]) 331 + [t.TripleGreaterEqual, ..in] -> do_to_tokens(in, [Operator(">>>="), ..out]) 332 + [t.AmpersandEqual, ..in] -> do_to_tokens(in, [Operator("&="), ..out]) 333 + [t.PipeEqual, ..in] -> do_to_tokens(in, [Operator("|="), ..out]) 334 + [t.CaratEqual, ..in] -> do_to_tokens(in, [Operator("^="), ..out]) 335 + [t.DoubleAmpersandEqual, ..in] -> do_to_tokens(in, [Operator("&&="), ..out]) 336 + [t.DoublePipeEqual, ..in] -> do_to_tokens(in, [Operator("||="), ..out]) 337 + [t.DoubleQuestionEqual, ..in] -> do_to_tokens(in, [Operator("??="), ..out]) 338 + } 339 + }