this repo has no description

rewritten parser

12Me21 9425587d b8dbae11

+422 -1048
+1 -1
index.html
··· 4 4 <title>Markup2 Demo</title> 5 5 6 6 <script src=langs.js></script> 7 - <script src=parse2.js></script> 7 + <script src=parse.js></script> 8 8 <script src=legacy.js></script> 9 9 <script src=render.js></script> 10 10 <script src=runtime.js></script>
+417 -410
parse.js
··· 8 8 **/ 9 9 class Markup_12y2 { constructor() { 10 10 11 - // TokenType ๐Ÿท enum 12 - // BlockType ๐Ÿท enum 13 - // Text ๐Ÿท string ๐Ÿ“ from input text 14 - // ArgPattern ๐Ÿท RegExp 15 - // GroupNum ๐Ÿท number - regex capturing group num 16 - // RawArgs ๐Ÿท Array - array with .named field 17 - // Block ๐Ÿท Object - has .type .args .content 18 - // CurrentBlock ๐Ÿท Object - block + other fields 19 - 20 11 // all state is stored in these vars (and REGEX.lastIndex) 21 12 let current, brackets 22 13 ··· 26 17 // elements which can survive an eol (without a body) 27 18 const IS_BLOCK = {__proto__:null, code:1, divider:1, ROOT:1, heading:1, quote:1, table:1, table_cell:1, image:1, video:1, audio:1, spoiler:1, align:1, list:1, list_item:1, youtube:1, anchor:1} 28 19 29 - // RegExp 30 - // GroupNum -> TokenType 31 - // GroupNum -> ArgPattern 32 20 const MACROS = { 33 21 '{EOL}': "(?![^\\n])", 34 22 '{BOL}': "^", ··· 36 24 '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*", 37 25 '{URL_FINAL}': "[-\\w/%&=#+~@$*']", 38 26 } 39 - const GROUPS = [], ARGTYPES = [] 27 + const GROUPS = [] 40 28 let regi = [] 41 - function PAT({raw}, ...groups) { 29 + const PAT=({raw}, ...groups)=>{ 42 30 regi.push( 43 31 raw.join("()") 44 32 .replace(/\\`/g, "`") 45 33 .replace(/[(](?![?)])/g, "(?:") 46 34 .replace(/[{][A-Z_]+[}]/g, match=>MACROS[match]) 47 35 ) 48 - for (let g of groups) { 49 - GROUPS.push(Object.keys(g)[0]) 50 - ARGTYPES.push(Object.values(g)[0]) 51 - } 36 + GROUPS.push(...groups) 52 37 } 53 38 54 - // ArgPattern 55 - const ARGS_NORMAL = // /[...]?{?/ 56 - /(?:\[([^\]\n]*)\])?({\n?)?/y 57 - 58 - const ARGS_WORD = // /[...]?{/ or /[...] ?<word>/ or / <word>/ 59 - /(?:\[([^\]\n]*)\]|(?=[ {]))({\n?| ?([^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*))/y // todo: more complex rule for word parsing //TODO: does this set the body flag right? //(what did i mean by this?) 60 - const ARGS_LINE = // /[...]?{/ or /[...] ?/ or / / 61 - /(?:\[([^\]\n]*)\]|(?=[ {]))(?:({\n?)| ?)/y // probably dont need this, we can strip space after { in all cases instead. 62 - const ARGS_HEADING = // /[...]?{/ or /[...] ?/ or / / 63 - /(?:\[([^\]\n]*)\]|(?=[ {]))(?:({\n?)| ?)/y 64 - 65 - // this is like args_heading kinda, except always counts as a line start. maybe backport this to args heading etc.? 66 - const ARGS_ANCHOR = // /[...]{?/ 67 - /\[([^\]\n]*)\]({\n?| ?|)/y 68 - 69 - const ARGS_BODYLESS = // /[...]?/ 70 - /(?:\[([^\]\n]*)\])?/y 71 - const ARGS_TABLE = // /[...]? */ 72 - /(?:\[([^\]\n]*)\])? */y 73 - 74 - const ARGS_CODE = // ... ``` 75 - /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y 76 - 77 - PAT`[\n]?[}]${{ BLOCK_END: 0}}` 78 - PAT`[\n]${{ NEWLINE: 0}}` 79 - PAT`{BOL}[#]{1,4}${{ HEADING: ARGS_HEADING}}` 80 - PAT`{BOL}[-]{3,}{EOL}${{ DIVIDER: 0}}` 81 - PAT`([*][*]|[_][_]|[~][~]|[/])${{ STYLE: true}}` 82 - PAT`[\\][a-z]+(?![a-zA-Z0-9])${{ TAG: true}}` 83 - PAT`[\\][{][\n]?${{ NULL_ENV: 0}}` 84 - PAT`[\\]{ANY}${{ ESCAPED: 0}}` 85 - PAT`{BOL}[>]${{ QUOTE: ARGS_HEADING}}` 86 - PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${{ CODE_BLOCK: ARGS_CODE}}` 87 - PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${{ INLINE_CODE: 0}}` 88 - PAT`([!]${{ EMBED: ARGS_BODYLESS}})?\b(https?://|sbs:){URL_CHARS}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${{ LINK: ARGS_NORMAL}}` 89 - PAT`{BOL} *[|]${{ TABLE_START: ARGS_TABLE}}` 90 - PAT` *[|]${{ TABLE_CELL: ARGS_TABLE}}` 91 - PAT`{BOL} *[-]${{ LIST_ITEM: ARGS_HEADING}}` 39 + PAT`[\n]?[}]${'BLOCK_END'}` 40 + PAT`[\n]${'NEWLINE'}` 41 + PAT`{BOL}[#]{1,4}(?=[\[{ ])${'HEADING'}` 42 + PAT`{BOL}[>](?=[\[{ ])${'QUOTE'}` 43 + PAT`{BOL}[-]{3,}{EOL}${'DIVIDER'}` 44 + PAT`([*][*]|[_][_]|[~][~]|[/])${'STYLE'}` 45 + PAT`[\\]((https?|sbs)${'ESCAPED'}|[a-z]+)(?![a-zA-Z0-9])${'TAG'}` 46 + PAT`[\\][{][\n]?${'NULL_ENV'}` 47 + PAT`[\\]{ANY}${'ESCAPED'}` 48 + PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${'CODE_BLOCK'}` 49 + PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}` 50 + //PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${'LINK'}` 51 + PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}` 52 + //PAT`([!]${'EMBED'})?\b(https?://|sbs:)({URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)])?)+${'LINK'}` 53 + PAT`{BOL} *[|]${'TABLE_START'}` 54 + PAT` *[|]${'TABLE_CELL'}` 55 + PAT`{BOL} *[-]${'LIST_ITEM'}` 92 56 93 57 const REGEX = new RegExp(regi.join("|"), 'g') 94 58 regi = null 95 59 96 60 //todo: org tables separators? 97 61 98 - // TokenType -> ArgRegex 99 - const TAGS = { 100 - __proto__:null, 101 - '\\sub': ARGS_WORD, 102 - '\\sup': ARGS_WORD, 103 - '\\b': ARGS_WORD, 104 - '\\i': ARGS_WORD, 105 - '\\u': ARGS_WORD, 106 - '\\s': ARGS_WORD, 107 - '\\quote': ARGS_LINE, 108 - '\\align': ARGS_LINE, 109 - '\\spoiler': ARGS_LINE, '\\h': ARGS_LINE, 110 - '\\ruby': ARGS_WORD, 111 - '\\key': ARGS_WORD, 112 - '\\a': ARGS_ANCHOR, 113 - '\\link': ARGS_NORMAL, // should use arg parse mode, i think? 114 - } 115 - 116 - // process a token 117 - // ๐Ÿ“ฅ _token_type ๐Ÿท TokenType ๐Ÿ“ 118 - // ๐Ÿ“ฅ token ๐Ÿท Text ๐Ÿ“ token text, including arguments 119 - // ๐Ÿ“ฅ rarys ๐Ÿท RawArgs ๐Ÿ“ raw arguments 120 - // ๐Ÿ“ฅ body ๐Ÿท Text ๐Ÿ“ argmatch[2] (varies) 121 - // ๐Ÿ“ฅ base_token ๐Ÿท Text ๐Ÿ“ token text, without arguments 122 - function PROCESS(_token_type, token, rargs, body, args_token) { 123 - switch (_token_type) { default: { 124 - throw new TypeError("unknown token type: "+_token_type) 125 - // error 126 - } break; case 'NEWLINE': { 127 - NEWLINE(true) 128 - } break; case 'HEADING': { 129 - let level = token.length 130 - let args = {level} 131 - let id = rargs[0] 132 - args.id = id ? id.replace(/\W+/g, "-") : null 133 - // todo: anchor name (and, can this be chosen automatically based on contents?) 134 - OPEN('heading', args, body) 135 - } break; case 'DIVIDER': { 136 - BLOCK('divider') 137 - } break; case 'BLOCK_END': { 138 - if (brackets>0) { 139 - while (!current.body) 140 - CANCEL() 141 - if ('invalid'===current.type) { 142 - if ("\n}"==token) 143 - NEWLINE(false) // false since we already closed everything 144 - TEXT("}") 145 - } 146 - CLOSE() 147 - } else { 148 - // hack: 149 - if ("\n}"==token) 150 - NEWLINE(true) 151 - TEXT("}") 152 - } 153 - } break; case 'NULL_ENV': { 154 - OPEN('null_env', null, true) 155 - current.prev = current.parent.prev 156 - } break; case 'ESCAPED': { 157 - if ("\\\n"===token) 158 - NEWLINE(false) 159 - else if ("\\."===token) { // \. is a no-op 160 - // todo: close lists too 161 - //current.content.push("") 162 - current.prev = 'block' 163 - } else 164 - TEXT(token.substring(1)) 165 - } break; case 'QUOTE': { 166 - OPEN('quote', {cite: rargs[0]}, body) 167 - } break; case 'CODE_BLOCK': { 168 - let lang = rargs 169 - BLOCK('code', {text: body, lang}) 170 - } break; case 'INLINE_CODE': { 171 - BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")}) 172 - } break; case 'EMBED': { 173 - let url = token.substring(1) // ehh better 174 - let [type, args] = process_embed(url, rargs) 175 - BLOCK(type, args) 176 - } break; case 'LINK': { 177 - let url = token 178 - let args = {url} 179 - if (body) { 180 - OPEN('link', args, body) 181 - } else { 182 - args.text = rargs[0] 183 - BLOCK('simple_link', args) 184 - } 185 - } break; case 'TABLE_START': { 186 - OPEN('table_row', token+args_token) // special OPEN call 187 - OPEN('table_cell', rargs, body) 188 - } break; case 'TABLE_CELL': { 189 - while (current.type!=='table_cell') 190 - CANCEL() 191 - CLOSE() // cell 192 - // we don't know whether these are row args or cell args, 193 - // so just pass the raw args directly, and parse them later. 194 - OPEN('table_cell', rargs, body) 195 - } break; case 'INVALID_TAG': { 196 - if (body) 197 - OPEN('invalid', {text: token+args_token, reason: "invalid tag"}, body) 198 - else 199 - BLOCK('invalid', {text: token+args_token, reason: "invalid tag"}) 200 - } break; case 'LIST_ITEM': { 201 - let indent = token.indexOf("-") 202 - OPEN('list_item', {indent}, body) 203 62 204 - } break; case '\\sub': { 205 - OPEN('subscript', null, body) 206 - } break; case '\\sup': { 207 - OPEN('superscript', null, body) 208 - } break; case '\\b': { 209 - OPEN('bold', null, body) 210 - } break; case '\\i': { 211 - OPEN('italic', null, body) 212 - } break; case '\\u': { 213 - OPEN('underline', null, body) 214 - } break; case '\\s': { 215 - OPEN('strikethrough', null, body) 216 - } break; case '\\quote': { 217 - OPEN('quote', {cite: rargs[0]}, body) 218 - } break; case '\\align': { 219 - let a = rargs[0] 220 - if (!['left', 'right', 'center'].includes(a)) 221 - a = 'center' 222 - OPEN('align', {align: a}, body) 223 - } break; case '\\spoiler': case '\\h': { 224 - let label = arg0(rargs, "spoiler") 225 - OPEN('spoiler', {label}, body) 226 - } break; case '\\ruby': { 227 - let text = arg0(rargs, "true") 228 - OPEN('ruby', {text}, body) 229 - } break; case '\\key': { 230 - OPEN('key', null, body) 231 - } break; case '\\a': { 232 - let id = rargs[0] 233 - id = id ? id.replace(/\W+/g, "-") : null 234 - OPEN('anchor', {id}, body) 235 - //BLOCK('anchor', {id}) 236 - } break; case '\\link': { 237 - let args = {url: rargs[0]} 238 - if (body) { 239 - OPEN('link', args, body) 240 - } else { 241 - args.text = args.url 242 - BLOCK('simple_link', args) 243 - } 244 - } } 245 - } 246 63 247 - function arg0(rargs, def) { 248 - if (rargs.length<1) 249 - return def 250 - return rargs[0] 251 - } 252 - 253 - 254 - 255 - const null_args = [] 256 - null_args.named = Object.freeze({}) 257 - Object.freeze(null_args) 258 - // todo: do we even need named args? 259 - function parse_args(arglist) { 260 - // note: checks undefined AND "" (\tag AND \tag[]) 261 - if (!arglist) 262 - return null_args 263 - let list = [], named = {} 264 - list.named = named 265 - for (let arg of arglist.split(";")) { 266 - let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg) 267 - // value OR =value 268 - // (this is to allow values to contain =. ex: [=1=2] is "1=2") 269 - if (!name) 270 - list.push(value) 271 - else // name=value 272 - named[name] = value 273 - } 274 - return list 275 - } 276 64 // process an embed url: !https://example.com/image.png[alt=balls] 277 65 // returns [type: String, args: Object] 278 - function process_embed(url, rargs) { 66 + const process_embed=(url, rargs)=>{ 279 67 let type 280 68 let args = {url} 281 69 for (let arg of rargs) { ··· 296 84 args.alt = rargs.named.alt 297 85 // todo: improve this 298 86 if (!type) { 299 - //let u = new URL(url, "x-relative:/") 300 - //let ext = /[.]([a-z0-9A-Z]{3,4})(?!\w)[^.]*$/.exec(url) 301 87 if (/[.](mp3|ogg|wav|m4a)\b/i.test(url)) 302 88 type = 'audio' 303 89 else if (/[.](mp4|mkv|mov)\b/i.test(url)) ··· 311 97 type = 'image' 312 98 return [type, args] 313 99 } 314 - 315 - // start a new block 316 - function OPEN(type, args, body) { 317 - current = Object.seal({ 318 - type, args, content: [], 319 - body, parent: current, 320 - prev: 'all_newline', 321 - }) 322 - if (body) 323 - brackets++ 100 + const process_cell_args=(rargs)=>{ 101 + let args = {} 102 + for (let arg of rargs) { 103 + let m 104 + if ("*"===arg || "#"===arg) 105 + args.header = true 106 + else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg)) 107 + args.color = arg 108 + else if (m = /^(\d*)x(\d*)$/.exec(arg)) { 109 + let [, w, h] = m 110 + if (+w > 1) args.colspan = +w 111 + if (+h > 1) args.rowspan = +h 112 + } 113 + } 114 + return args 324 115 } 116 + const process_row_args=(rargs)=>{ 117 + let args = {} 118 + for (let arg of rargs) { 119 + if ("*"===arg || "#"===arg) 120 + args.header = true 121 + } 122 + return args 123 + } 124 + 325 125 // move up 326 - function pop() { 126 + const pop=()=>{ 327 127 if (current.body) 328 128 brackets-- 329 129 let o = current ··· 331 131 return o 332 132 } 333 133 334 - function CANCEL() { 335 - if ('style'===current.type) { 336 - let o = pop() 337 - current.content.push(o.args, ...o.content) 338 - current.prev = o.prev 339 - return 340 - } 341 - if ('table_cell'===current.type) { 342 - if (current.content.length) { 343 - CLOSE() // table_cell 344 - current.args = {} 345 - } else { 346 - // cancelling an empty table cell means: 347 - // it's the end of the row, so discard the cell 348 - let o = pop() 349 - // if the ROW is empty (i.e. we just have a single | ) 350 - if (!current.content.length) { 351 - let o = pop() // discard the row 352 - TEXT(o.args) 353 - return 354 - // todo: maybe also cancel rows with 1 unclosed cell? 355 - // like `| abc` -> text 356 - } 357 - // transfer args to the row, and parse as table row args: 358 - let ret = current.args = {} 359 - for (let arg of o.args) { 360 - if ("*"===arg || "#"===arg) { 361 - ret.header = true 362 - } 363 - } 364 - } 365 - // fallthrough to close the table_row 366 - } 367 - CLOSE() 368 - } 369 - 370 - function get_last(block) { 134 + const get_last=(block)=>{ 371 135 return block.content[block.content.length-1] 372 136 } 373 137 374 - function CLOSE() { 138 + const CLOSE=(cancel)=>{ 375 139 let o = pop() 140 + let type = o.type 376 141 377 - if ('null_env'===o.type) { 142 + if ('style'===type && cancel) { 143 + current.content.push(o.args, ...o.content) 144 + current.prev = o.prev 145 + return 146 + } 147 + if ('null_env'===type) { 378 148 current.content.push(...o.content) 379 149 current.prev = o.prev 380 150 return 381 151 } 382 152 153 + // cancelling an empty table cell means: 154 + // it's the end of the row, so discard the cell 155 + if ('table_cell'===type && cancel && !o.content.length) { 156 + // if the ROW is empty (i.e. we just have a single | ) 157 + if (!current.content.length) { 158 + let o = pop() // discard the row 159 + TEXT(o.args) 160 + return 161 + // todo: maybe also cancel rows with 1 unclosed cell? 162 + // like `| abc` -> text 163 + } 164 + // transfer args to the row, and parse as table row args: 165 + current.args = process_row_args(o.args) 166 + // FALLTHROUGH (to close the row) 167 + o = pop() 168 + type = o.type 169 + } 170 + 383 171 if ('newline'===o.prev) 384 172 o.content.push("\n") 385 - let node = {type: o.type, args: o.args, content: o.content} 173 + 174 + let node = {type: type, args: o.args, content: o.content} 386 175 let dest = current 387 176 388 - // merge list_item with preceeding list 389 - if ('list_item'===o.type) { 177 + if ('list_item'===type) { 178 + // merge list_item with preceeding list 390 179 node.args = null 391 180 let indent = o.args.indent 392 181 while (1) { ··· 402 191 if (dest.args.indent == indent) 403 192 break 404 193 } 405 - } 406 - // merge table_row with preceeding table 407 - else if ('table_row'===o.type) { 194 + } else if ('table_row'===type) { 408 195 dest = get_last(current) 409 196 if (!dest || 'table'!==dest.type) { 410 197 dest = {type:'table', args:null, content:[]} 411 198 current.content.push(dest) 412 199 } 413 - } 414 - // table cell 415 - else if ('table_cell'===o.type) { 416 - let ret = node.args = {} 417 - for (let arg of o.args) { 418 - let m 419 - if ("*"===arg || "#"===arg) 420 - ret.header = true 421 - else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg)) 422 - ret.color = arg 423 - else if (m = /^(\d*)x(\d*)$/.exec(arg)) { 424 - let [, w, h] = m 425 - if (+w > 1) ret.colspan = +w 426 - if (+h > 1) ret.rowspan = +h 427 - } 428 - } 429 - } else if ('style'===o.type) { 200 + } else if ('style'===type) { 430 201 node.type = { 431 202 __proto__:null, 432 203 '**': 'bold', '__': 'underline', ··· 435 206 node.args = null 436 207 } 437 208 209 + current.prev = type in IS_BLOCK ? 'block' : o.prev 438 210 dest.content.push(node) 439 - current.prev = o.type in IS_BLOCK ? 'block' : o.prev 211 + 212 + if ('table_cell'===type) { 213 + node.args = process_cell_args(o.args) // hack? 214 + if (cancel) { 215 + // close the row 216 + current.args = {} 217 + CLOSE() 218 + } 219 + } 440 220 } 221 + 441 222 // push text 442 - function TEXT(text) { 223 + const TEXT=(text)=>{ 443 224 if (text!=="") { 444 225 current.content.push(text) // todo: merge with surrounding textnodes? 445 226 current.prev = 'text' 446 227 } 447 228 } 448 229 // push empty tag 449 - function BLOCK(type, args) { 230 + const BLOCK=(type, args)=>{ 450 231 current.content.push({type, args}) 451 232 current.prev = type in IS_BLOCK ? 'block' : 'text' 452 233 } 453 234 454 - function NEWLINE(real) { 235 + const NEWLINE=(real)=>{ 455 236 if (real) 456 237 while (!current.body && 'ROOT'!=current.type) 457 - CANCEL() 238 + CLOSE(true) 458 239 if ('block'!==current.prev) 459 240 current.content.push("\n") 460 241 if ('all_newline'!==current.prev) 461 242 current.prev = 'newline' 462 243 } 463 244 464 - function in_table() { 465 - for (let c=current; ; c=c.parent) { 466 - if ('table_cell'===c.type) 467 - return true 468 - if ('style'!==c.type) 469 - return false 245 + const null_args = [] 246 + null_args.named = Object.freeze({}) 247 + Object.freeze(null_args) 248 + const NO_ARGS = [] 249 + NO_ARGS.named = Object.freeze({}) 250 + Object.freeze(NO_ARGS) 251 + // todo: do we even need named args? 252 + const parse_args=(arglist)=>{ 253 + // note: checks undefined AND "" (\tag AND \tag[]) 254 + if (!arglist) 255 + return null_args 256 + let list = [], named = {} 257 + list.named = named 258 + for (let arg of arglist.split(";")) { 259 + let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg) 260 + // value OR =value 261 + // (this is to allow values to contain =. ex: [=1=2] is "1=2") 262 + if (!name) 263 + list.push(value) 264 + else // name=value 265 + named[name] = value 470 266 } 471 - } 472 - // todo: this should check for body 473 - function find_style(token) { 474 - for (let c=current; 'style'===c.type; c=c.parent) 475 - if (c.args===token) 476 - return c 267 + return list 477 268 } 478 - function do_style(token_text, before, after) { 269 + 270 + const STYLE_START 271 + = /^[ \s.'"}{(> ][^ \s,'" ]/ 272 + const STYLE_CLOSE 273 + = /^[^ \s,'" ][-\s.,:;!?'"}{)<\\ ]/ 274 + 275 + const check_style=(token_text, before, after)=>{ 276 + // END 479 277 for (let c=current; 'style'===c.type; c=c.parent) 480 278 if (c.args===token_text) { 481 - if (!after || /[^\s,'"][-\s.,:;!?'")}{]/y.test(before+after)) 279 + if (STYLE_CLOSE.test(before+after)) 482 280 return c 483 - else 484 - break 281 + break 485 282 } 486 - 487 - if (!before || /[\s.({}'"][^\s,'"]/y.test(before+after)) 283 + // START 284 + if (STYLE_START.test(before+after)) 488 285 return true 489 286 } 490 287 491 - function parse(text) { 288 + let ARG_REGEX = /.*?(?=])/y 289 + let WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y 290 + let CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y // ack 291 + 292 + const parse=(text)=>{ 492 293 let tree = {type: 'ROOT', content: [], prev: 'all_newline'} 493 294 current = tree 494 295 brackets = 0 495 296 496 - // MAIN LOOP // 497 - let prev = -1 498 - let last = REGEX.lastIndex = 0 297 + // these use REGEX, text 298 + const skip_spaces=()=>{ 299 + let pos = REGEX.lastIndex 300 + while (" "===text.charAt(pos)) 301 + pos++ 302 + REGEX.lastIndex = pos 303 + } 304 + const read_code=()=>{ 305 + let pos = REGEX.lastIndex 306 + CODE_REGEX.lastIndex = pos 307 + let [, lang, code] = CODE_REGEX.exec(text) 308 + REGEX.lastIndex = CODE_REGEX.lastIndex 309 + return [lang, code] 310 + } 311 + 312 + let rargs 313 + const read_args=()=>{ 314 + let pos = REGEX.lastIndex 315 + let next = text.charAt(pos) 316 + if ("["!==next) 317 + return rargs = NO_ARGS 318 + ARG_REGEX.lastIndex = pos+1 319 + let argstr = ARG_REGEX.exec(text) 320 + if (!argstr) 321 + return rargs = NO_ARGS 322 + REGEX.lastIndex = ARG_REGEX.lastIndex+1 323 + return rargs = parse_args(argstr[0]) 324 + } 325 + 326 + let body 327 + const read_body=(space=false)=>{ 328 + let pos = REGEX.lastIndex 329 + let next = text.charAt(pos) 330 + if ("{"===next) { 331 + if ("\n"===text.charAt(pos+1)) 332 + pos++ 333 + REGEX.lastIndex = pos+1 334 + return body = true 335 + } 336 + if (space) { 337 + if (" "===next) 338 + REGEX.lastIndex = pos+1 339 + else 340 + return body = false 341 + } 342 + return body = undefined 343 + } 344 + // start a new block 345 + const OPEN=(type, args=null)=>{ 346 + current = Object.seal({ 347 + type, args, content: [], 348 + body, parent: current, 349 + prev: 'all_newline', 350 + }) 351 + if (body) 352 + brackets++ 353 + } 354 + const word_maybe=()=>{ 355 + if (!body) { 356 + TEXT(read_word()) 357 + CLOSE() 358 + } 359 + } 360 + 499 361 let match 500 - function nevermind() { 362 + let last = REGEX.lastIndex = 0 363 + const NEVERMIND=()=>{ 501 364 REGEX.lastIndex = match.index+1 502 365 } 503 - function accept() { 366 + const ACCEPT=()=>{ 504 367 TEXT(text.substring(last, match.index)) 505 368 last = REGEX.lastIndex 506 369 } 507 - function start_line() { 508 - text = text.substring(last) 509 - last = REGEX.lastIndex = 0 510 - prev = -1 370 + const read_word=()=>{ 371 + let pos = REGEX.lastIndex 372 + WORD_REGEX.lastIndex = pos 373 + let word = WORD_REGEX.exec(text) 374 + if (!word) 375 + return null 376 + last = REGEX.lastIndex = WORD_REGEX.lastIndex 377 + return word[0] 511 378 } 379 + 380 + let prev = -1 512 381 main: while (match = REGEX.exec(text)) { 513 382 // check for infinite loops 514 383 if (match.index===prev) 515 384 throw ["INFINITE LOOP", match] 516 385 prev = match.index 517 386 // 2: figure out which token type was matched 518 - let token_text = match[0] 387 + let token = match[0] 519 388 let group_num = match.indexOf("", 1)-1 520 - 521 - // 3: get type + argument pattern 522 389 let type = GROUPS[group_num] 523 - let argregex 524 - // 4: special cases: 525 - if ('TAG'===type) { 526 - if (token_text in TAGS) { 527 - type = token_text 528 - argregex = TAGS[type] 390 + // 3: 391 + body = null 392 + rargs = null 393 + 394 + switch (type) { 395 + case 'TAG': { 396 + read_args() 397 + if (token==='\\link') { 398 + read_body(false) 529 399 } else { 530 - type = 'INVALID_TAG' 531 - argregex = ARGS_NORMAL 400 + read_body(true) 401 + if (NO_ARGS===rargs && false===body) { 402 + NEVERMIND() 403 + continue main 404 + } 532 405 } 533 - } else if ('STYLE'===type) { 534 - let c = do_style(token_text, text.charAt(match.index-1), text.charAt(REGEX.lastIndex)) 406 + ACCEPT() 407 + switch (token) { default: { 408 + let args = {text:text.substring(match.index, last), reason:"invalid tag"} 409 + if (body) 410 + OPEN('invalid', args) 411 + else 412 + BLOCK('invalid', args) 413 + } break; case '\\sub': { 414 + OPEN('subscript') 415 + word_maybe() 416 + } break; case '\\sup': { 417 + OPEN('superscript') 418 + word_maybe() 419 + } break; case '\\b': { 420 + OPEN('bold') 421 + word_maybe() 422 + } break; case '\\i': { 423 + OPEN('italic') 424 + word_maybe() 425 + } break; case '\\u': { 426 + OPEN('underline') 427 + word_maybe() 428 + } break; case '\\s': { 429 + OPEN('strikethrough') 430 + word_maybe() 431 + } break; case '\\quote': { 432 + OPEN('quote', {cite: rargs[0]}) 433 + } break; case '\\align': { 434 + let a = rargs[0] 435 + if (!['left', 'right', 'center'].includes(a)) 436 + a = 'center' 437 + OPEN('align', {align: a}) 438 + } break; case '\\spoiler': case '\\h': { 439 + let [label="spoiler"] = rargs 440 + OPEN('spoiler', {label}) 441 + } break; case '\\ruby': { 442 + let [txt="true"] = rargs 443 + OPEN('ruby', {text: txt}) 444 + word_maybe() 445 + } break; case '\\key': { 446 + OPEN('key') 447 + word_maybe() 448 + } break; case '\\a': { 449 + let id = rargs[0] 450 + id = id ? id.replace(/\W+/g, "-") : null 451 + OPEN('anchor', {id}) 452 + body = true // ghhhh? 453 + //BLOCK('anchor', {id}) 454 + } break; case '\\link': { 455 + let args = {url: rargs[0]} 456 + if (body) { 457 + OPEN('link', args) 458 + } else { 459 + BLOCK('simple_link', args) 460 + } 461 + }} 462 + } break; case 'STYLE': { 463 + let c = check_style(token, text.charAt(match.index-1)||"\n", text.charAt(REGEX.lastIndex)||"\n") 535 464 if (!c) { // no 536 - nevermind() 537 - } else if (true===c) { // open new 538 - accept() 539 - OPEN('style', token_text) 465 + NEVERMIND() 466 + continue main 467 + } 468 + ACCEPT() 469 + if (true===c) { // open new 470 + OPEN('style', token) 540 471 } else { // close 541 - accept() 542 472 while (current != c) 543 - CANCEL() 473 + CLOSE(true) 544 474 CLOSE() 545 475 } 546 - continue main 547 - } else if ('TABLE_CELL'===type && !in_table()) { 548 - nevermind() 549 - continue main 550 - } else { 551 - argregex = ARGTYPES[group_num] 552 - } 553 - // 5: parse args and { 554 - if (!argregex) { 555 - accept() 556 - let body = 'NULL_ENV'===type //h 557 - PROCESS(type, token_text, null, body, token_text) 558 - if (body || 'NEWLINE'===type) 559 - start_line() 560 - } else { 561 - // try to match arguments 562 - argregex.lastIndex = REGEX.lastIndex 563 - let argmatch = argregex.exec(text) 564 - if (null===argmatch) { 565 - nevermind() 476 + } break; case 'TABLE_CELL': { 477 + for (let c=current; ; c=c.parent) { 478 + if ('table_cell'===c.type) { 479 + read_args() 480 + skip_spaces() 481 + ACCEPT() 482 + while (current!==c) 483 + CLOSE(true) 484 + CLOSE() // cell 485 + // we don't know whether these are row args or cell args, 486 + // so just pass the raw args directly, and parse them later. 487 + OPEN('table_cell', rargs) 488 + break 489 + } 490 + if ('style'!==c.type) { 491 + NEVERMIND() 492 + continue main 493 + } 494 + } 495 + } break; case 'TABLE_START': { 496 + read_args() 497 + skip_spaces() 498 + ACCEPT() 499 + let args_token = text.substring(match.index, last) 500 + OPEN('table_row', args_token, false) // special OPEN call 501 + OPEN('table_cell', rargs) 502 + } break; case 'NEWLINE': { 503 + ACCEPT() 504 + NEWLINE(true) 505 + body = true // to trigger start_line 506 + } break; case 'HEADING': { 507 + read_args() 508 + read_body(true) 509 + if (NO_ARGS===rargs && false===body) { 510 + NEVERMIND() 566 511 continue main 567 512 } 568 - REGEX.lastIndex = argregex.lastIndex 569 - accept() 570 - 571 - let args = argmatch[1] 572 - let body = argmatch[2] // flag: args with {, or word args 573 - let word = argmatch[3] // contents: word args & code block 574 - if (ARGS_CODE!==argregex) { 575 - args = parse_args(args) 576 - body = body>="{" 577 - } 578 - 579 - PROCESS(type, token_text, args, body, argmatch[0]) 580 - // word tags 581 - if (undefined!==word) { 582 - // escaping in word args? idk. todo 583 - TEXT(word.replace(/\\([^])/g, "$1")) 513 + ACCEPT() 514 + let level = token.length 515 + let args = {level} 516 + let id = rargs[0] 517 + args.id = id ? id.replace(/\W+/g, "-") : null 518 + // todo: anchor name (and, can this be chosen automatically based on contents?) 519 + OPEN('heading', args) 520 + } break; case 'DIVIDER': { 521 + ACCEPT() 522 + BLOCK('divider') 523 + } break; case 'BLOCK_END': { 524 + ACCEPT() 525 + if (brackets>0) { 526 + while (!current.body) 527 + CLOSE(true) 528 + if ('invalid'===current.type) { 529 + if ("\n}"==token) 530 + NEWLINE(false) // false since we already closed everything 531 + TEXT("}") 532 + } 584 533 CLOSE() 534 + } else { 535 + // hack: 536 + if ("\n}"==token) 537 + NEWLINE(true) 538 + TEXT("}") 585 539 } 586 - // tags with { body 587 - else if (argmatch[2]!==undefined && ARGS_CODE!==argregex) { 588 - start_line() 540 + } break; case 'NULL_ENV': { 541 + body = true 542 + ACCEPT() 543 + OPEN('null_env') 544 + current.prev = current.parent.prev 545 + } break; case 'ESCAPED': { 546 + ACCEPT() 547 + if ("\\\n"===token) 548 + NEWLINE(false) 549 + else if ("\\."===token) { // \. is a no-op 550 + // todo: close lists too 551 + //current.content.push("") 552 + current.prev = 'block' 553 + } else 554 + TEXT(token.substring(1)) 555 + } break; case 'QUOTE': { 556 + read_args() 557 + read_body(true) 558 + if (NO_ARGS===rargs && false===body) { 559 + NEVERMIND() 560 + continue main 561 + } 562 + ACCEPT() 563 + OPEN('quote', {cite: rargs[0]}) 564 + } break; case 'CODE_BLOCK': { 565 + let [lang, code] = read_code() 566 + ACCEPT() 567 + BLOCK('code', {text:code, lang}) 568 + } break; case 'INLINE_CODE': { 569 + ACCEPT() 570 + BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")}) 571 + } break; case 'EMBED': { 572 + read_args() 573 + ACCEPT() 574 + let url = token.substring(1) // ehh better 575 + let [type, args] = process_embed(url, rargs) 576 + BLOCK(type, args) 577 + } break; case 'LINK': { 578 + read_args() 579 + read_body(false) 580 + ACCEPT() 581 + let url = token 582 + let args = {url} 583 + if (body) { 584 + OPEN('link', args) 585 + } else { 586 + args.text = rargs[0] 587 + BLOCK('simple_link', args) 588 + } 589 + } break; case 'LIST_ITEM': { 590 + read_args() 591 + read_body(true) 592 + if (NO_ARGS===rargs && false===body) { 593 + NEVERMIND() 594 + continue main 589 595 } 596 + ACCEPT() 597 + let indent = token.indexOf("-") 598 + OPEN('list_item', {indent}) 599 + } } 600 + 601 + if (body) { 602 + text = text.substring(last) 603 + last = REGEX.lastIndex = 0 604 + prev = -1 590 605 } 591 606 } // end of main loop 592 607 593 608 TEXT(text.substring(last)) // text after last token 594 609 595 610 while ('ROOT'!==current.type) 596 - CANCEL() 611 + CLOSE(true) 597 612 if ('newline'===current.prev) //todo: this is repeated 598 613 current.content.push("\n") 599 614 615 + current = null // my the memory leak! 616 + 600 617 return tree // technically we could return `current` here and get rid of `tree` entirely 601 618 } 602 619 603 - /** 604 - Parser function 605 - (closure method) 606 - @type {Parser} 607 - @kind function 608 - **/ 609 620 this.parse = parse 610 - /** 611 - @type {Object<string,Parser>} 612 - @property {Parser} 12y2 - same as .parse 613 - **/ 614 621 this.langs = {'12y2': parse} 615 622 616 623 // what if you want to write like, "{...}". well that's fine
-633
parse2.js
··· 1 - /*! ๐ฆ—๐–นญ 2 - */ 3 - 4 - 12||+typeof await/2//2; export default 5 - /** 6 - 12y2 markup parser factory 7 - @implements Parser_Collection 8 - **/ 9 - class Markup_12y2 { constructor() { 10 - 11 - // all state is stored in these vars (and REGEX.lastIndex) 12 - let current, brackets 13 - 14 - // About __proto__ in object literals: 15 - // https://tc39.es/ecma262/multipage/ecmascript-language-expressions.html#sec-runtime-semantics-propertydefinitionevaluation 16 - 17 - // elements which can survive an eol (without a body) 18 - const IS_BLOCK = {__proto__:null, code:1, divider:1, ROOT:1, heading:1, quote:1, table:1, table_cell:1, image:1, video:1, audio:1, spoiler:1, align:1, list:1, list_item:1, youtube:1, anchor:1} 19 - 20 - const MACROS = { 21 - '{EOL}': "(?![^\\n])", 22 - '{BOL}': "^", 23 - '{ANY}': "[^]", 24 - '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*", 25 - '{URL_FINAL}': "[-\\w/%&=#+~@$*']", 26 - } 27 - const GROUPS = [] 28 - let regi = [] 29 - const PAT=({raw}, ...groups)=>{ 30 - regi.push( 31 - raw.join("()") 32 - .replace(/\\`/g, "`") 33 - .replace(/[(](?![?)])/g, "(?:") 34 - .replace(/[{][A-Z_]+[}]/g, match=>MACROS[match]) 35 - ) 36 - GROUPS.push(...groups) 37 - } 38 - 39 - PAT`[\n]?[}]${'BLOCK_END'}` 40 - PAT`[\n]${'NEWLINE'}` 41 - PAT`{BOL}[#]{1,4}(?=[\[{ ])${'HEADING'}` 42 - PAT`{BOL}[>](?=[\[{ ])${'QUOTE'}` 43 - PAT`{BOL}[-]{3,}{EOL}${'DIVIDER'}` 44 - PAT`([*][*]|[_][_]|[~][~]|[/])${'STYLE'}` 45 - PAT`[\\]((https?|sbs)${'ESCAPED'}|[a-z]+)(?![a-zA-Z0-9])${'TAG'}` 46 - PAT`[\\][{][\n]?${'NULL_ENV'}` 47 - PAT`[i]{ANY}${'ESCAPED'}` 48 - PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${'CODE_BLOCK'}` 49 - PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}` 50 - //PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${'LINK'}` 51 - PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}` 52 - //PAT`([!]${'EMBED'})?\b(https?://|sbs:)({URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)])?)+${'LINK'}` 53 - PAT`{BOL} *[|]${'TABLE_START'}` 54 - PAT` *[|]${'TABLE_CELL'}` 55 - PAT`{BOL} *[-]${'LIST_ITEM'}` 56 - 57 - const REGEX = new RegExp(regi.join("|"), 'g') 58 - regi = null 59 - 60 - //todo: org tables separators? 61 - 62 - 63 - 64 - // process an embed url: !https://example.com/image.png[alt=balls] 65 - // returns [type: String, args: Object] 66 - const process_embed=(url, rargs)=>{ 67 - let type 68 - let args = {url} 69 - for (let arg of rargs) { 70 - let m 71 - if ('video'===arg || 'audio'===arg || 'image'===arg) { 72 - type = arg 73 - } else if (m = /^(\d+)x(\d+)$/.exec(arg)) { 74 - args.width = +m[1] 75 - args.height = +m[2] 76 - } else { 77 - if (args.alt==undefined) 78 - args.alt = arg 79 - else 80 - args.alt += ";"+arg 81 - } 82 - } 83 - if (rargs.named.alt!=undefined) 84 - args.alt = rargs.named.alt 85 - // todo: improve this 86 - if (!type) { 87 - if (/[.](mp3|ogg|wav|m4a)\b/i.test(url)) 88 - type = 'audio' 89 - else if (/[.](mp4|mkv|mov)\b/i.test(url)) 90 - type = 'video' 91 - else if (/^https?:[/][/](?:www[.])?(?:youtube.com[/]watch[?]v=|youtu[.]be[/]|youtube.com[/]shorts[/])[\w-]{11}/.test(url)) { 92 - // todo: accept [start-end] args maybe? 93 - type = 'youtube' 94 - } 95 - } 96 - if (!type) 97 - type = 'image' 98 - return [type, args] 99 - } 100 - const process_cell_args=(rargs)=>{ 101 - let args = {} 102 - for (let arg of rargs) { 103 - let m 104 - if ("*"===arg || "#"===arg) 105 - args.header = true 106 - else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg)) 107 - args.color = arg 108 - else if (m = /^(\d*)x(\d*)$/.exec(arg)) { 109 - let [, w, h] = m 110 - if (+w > 1) args.colspan = +w 111 - if (+h > 1) args.rowspan = +h 112 - } 113 - } 114 - return args 115 - } 116 - const process_row_args=(rargs)=>{ 117 - let args = {} 118 - for (let arg of rargs) { 119 - if ("*"===arg || "#"===arg) 120 - args.header = true 121 - } 122 - return args 123 - } 124 - 125 - // move up 126 - const pop=()=>{ 127 - if (current.body) 128 - brackets-- 129 - let o = current 130 - current = current.parent 131 - return o 132 - } 133 - 134 - const get_last=(block)=>{ 135 - return block.content[block.content.length-1] 136 - } 137 - 138 - const CLOSE=(cancel)=>{ 139 - let o = pop() 140 - let type = o.type 141 - 142 - if ('style'===type && cancel) { 143 - current.content.push(o.args, ...o.content) 144 - current.prev = o.prev 145 - return 146 - } 147 - if ('null_env'===type) { 148 - current.content.push(...o.content) 149 - current.prev = o.prev 150 - return 151 - } 152 - 153 - // cancelling an empty table cell means: 154 - // it's the end of the row, so discard the cell 155 - if ('table_cell'===type && cancel && !o.content.length) { 156 - // if the ROW is empty (i.e. we just have a single | ) 157 - if (!current.content.length) { 158 - let o = pop() // discard the row 159 - TEXT(o.args) 160 - return 161 - // todo: maybe also cancel rows with 1 unclosed cell? 162 - // like `| abc` -> text 163 - } 164 - // transfer args to the row, and parse as table row args: 165 - current.args = process_row_args(o.args) 166 - // FALLTHROUGH (to close the row) 167 - o = pop() 168 - type = o.type 169 - } 170 - 171 - if ('newline'===o.prev) 172 - o.content.push("\n") 173 - 174 - let node = {type: type, args: o.args, content: o.content} 175 - let dest = current 176 - 177 - if ('list_item'===type) { 178 - // merge list_item with preceeding list 179 - node.args = null 180 - let indent = o.args.indent 181 - while (1) { 182 - let curr = dest 183 - dest = get_last(curr) 184 - if (!dest || dest.type!=='list' || dest.args.indent>indent) { 185 - // create a new level in the list 186 - dest = {type:'list', args:{indent}, content:[]} 187 - // safe because there's no newline 188 - curr.content.push(dest) 189 - break 190 - } 191 - if (dest.args.indent == indent) 192 - break 193 - } 194 - } else if ('table_row'===type) { 195 - dest = get_last(current) 196 - if (!dest || 'table'!==dest.type) { 197 - dest = {type:'table', args:null, content:[]} 198 - current.content.push(dest) 199 - } 200 - } else if ('style'===type) { 201 - node.type = { 202 - __proto__:null, 203 - '**': 'bold', '__': 'underline', 204 - '~~': 'strikethrough', '/': 'italic', 205 - }[o.args] 206 - node.args = null 207 - } 208 - 209 - current.prev = type in IS_BLOCK ? 'block' : o.prev 210 - dest.content.push(node) 211 - 212 - if ('table_cell'===type) { 213 - node.args = process_cell_args(o.args) // hack? 214 - if (cancel) { 215 - // close the row 216 - current.args = {} 217 - CLOSE() 218 - } 219 - } 220 - } 221 - 222 - // push text 223 - const TEXT=(text)=>{ 224 - if (text!=="") { 225 - current.content.push(text) // todo: merge with surrounding textnodes? 226 - current.prev = 'text' 227 - } 228 - } 229 - // push empty tag 230 - const BLOCK=(type, args)=>{ 231 - current.content.push({type, args}) 232 - current.prev = type in IS_BLOCK ? 'block' : 'text' 233 - } 234 - 235 - const NEWLINE=(real)=>{ 236 - if (real) 237 - while (!current.body && 'ROOT'!=current.type) 238 - CLOSE(true) 239 - if ('block'!==current.prev) 240 - current.content.push("\n") 241 - if ('all_newline'!==current.prev) 242 - current.prev = 'newline' 243 - } 244 - 245 - const null_args = [] 246 - null_args.named = Object.freeze({}) 247 - Object.freeze(null_args) 248 - const NO_ARGS = [] 249 - NO_ARGS.named = Object.freeze({}) 250 - Object.freeze(NO_ARGS) 251 - // todo: do we even need named args? 252 - const parse_args=(arglist)=>{ 253 - // note: checks undefined AND "" (\tag AND \tag[]) 254 - if (!arglist) 255 - return null_args 256 - let list = [], named = {} 257 - list.named = named 258 - for (let arg of arglist.split(";")) { 259 - let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg) 260 - // value OR =value 261 - // (this is to allow values to contain =. ex: [=1=2] is "1=2") 262 - if (!name) 263 - list.push(value) 264 - else // name=value 265 - named[name] = value 266 - } 267 - return list 268 - } 269 - 270 - const STYLE_START 271 - = /^[ \s.'"}{(> ][^ \s,'" ]/ 272 - const STYLE_CLOSE 273 - = /^[^ \s,'" ][-\s.,:;!?'"}{)<\\ ]/ 274 - 275 - const check_style=(token_text, before, after)=>{ 276 - // END 277 - for (let c=current; 'style'===c.type; c=c.parent) 278 - if (c.args===token_text) { 279 - if (STYLE_CLOSE.test(before+after)) 280 - return c 281 - break 282 - } 283 - // START 284 - if (STYLE_START.test(before+after)) 285 - return true 286 - } 287 - 288 - let ARG_REGEX = /.*?(?=])/y 289 - let WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y 290 - let CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y // ack 291 - 292 - const parse=(text)=>{ 293 - let tree = {type: 'ROOT', content: [], prev: 'all_newline'} 294 - current = tree 295 - brackets = 0 296 - 297 - // these use REGEX, text 298 - const skip_spaces=()=>{ 299 - let pos = REGEX.lastIndex 300 - while (" "===text.charAt(pos)) 301 - pos++ 302 - REGEX.lastIndex = pos 303 - } 304 - const read_code=()=>{ 305 - let pos = REGEX.lastIndex 306 - CODE_REGEX.lastIndex = pos 307 - let [, lang, code] = CODE_REGEX.exec(text) 308 - REGEX.lastIndex = CODE_REGEX.lastIndex 309 - return [lang, code] 310 - } 311 - 312 - let rargs 313 - const read_args=()=>{ 314 - let pos = REGEX.lastIndex 315 - let next = text.charAt(pos) 316 - if ("["!==next) 317 - return rargs = NO_ARGS 318 - ARG_REGEX.lastIndex = pos+1 319 - let argstr = ARG_REGEX.exec(text) 320 - if (!argstr) 321 - return rargs = NO_ARGS 322 - REGEX.lastIndex = ARG_REGEX.lastIndex+1 323 - return rargs = parse_args(argstr[0]) 324 - } 325 - 326 - let body 327 - const read_body=(space=false)=>{ 328 - let pos = REGEX.lastIndex 329 - let next = text.charAt(pos) 330 - if ("{"===next) { 331 - if ("\n"===text.charAt(pos+1)) 332 - pos++ 333 - REGEX.lastIndex = pos+1 334 - return body = true 335 - } 336 - if (space) { 337 - if (" "===next) 338 - REGEX.lastIndex = pos+1 339 - else 340 - return body = false 341 - } 342 - return body = undefined 343 - } 344 - // start a new block 345 - const OPEN=(type, args=null)=>{ 346 - current = Object.seal({ 347 - type, args, content: [], 348 - body, parent: current, 349 - prev: 'all_newline', 350 - }) 351 - if (body) 352 - brackets++ 353 - } 354 - const word_maybe=()=>{ 355 - if (!body) { 356 - TEXT(read_word()) 357 - CLOSE() 358 - } 359 - } 360 - 361 - let match 362 - let last = REGEX.lastIndex = 0 363 - const NEVERMIND=()=>{ 364 - REGEX.lastIndex = match.index+1 365 - } 366 - const ACCEPT=()=>{ 367 - TEXT(text.substring(last, match.index)) 368 - last = REGEX.lastIndex 369 - } 370 - const read_word=()=>{ 371 - let pos = REGEX.lastIndex 372 - WORD_REGEX.lastIndex = pos 373 - let word = WORD_REGEX.exec(text) 374 - if (!word) 375 - return null 376 - last = REGEX.lastIndex = WORD_REGEX.lastIndex 377 - return word[0] 378 - } 379 - 380 - let prev = -1 381 - main: while (match = REGEX.exec(text)) { 382 - // check for infinite loops 383 - if (match.index===prev) 384 - throw ["INFINITE LOOP", match] 385 - prev = match.index 386 - // 2: figure out which token type was matched 387 - let token = match[0] 388 - let group_num = match.indexOf("", 1)-1 389 - let type = GROUPS[group_num] 390 - // 3: 391 - body = null 392 - rargs = null 393 - 394 - switch (type) { 395 - case 'TAG': { 396 - read_args() 397 - if (token==='\\link') { 398 - read_body(false) 399 - } else { 400 - read_body(true) 401 - if (NO_ARGS===rargs && false===body) { 402 - NEVERMIND() 403 - continue main 404 - } 405 - } 406 - ACCEPT() 407 - switch (token) { default: { 408 - let args = {text:text.substring(match.index, last), reason:"invalid tag"} 409 - if (body) 410 - OPEN('invalid', args) 411 - else 412 - BLOCK('invalid', args) 413 - } break; case '\\sub': { 414 - OPEN('subscript') 415 - word_maybe() 416 - } break; case '\\sup': { 417 - OPEN('superscript') 418 - word_maybe() 419 - } break; case '\\b': { 420 - OPEN('bold') 421 - word_maybe() 422 - } break; case '\\i': { 423 - OPEN('italic') 424 - word_maybe() 425 - } break; case '\\u': { 426 - OPEN('underline') 427 - word_maybe() 428 - } break; case '\\s': { 429 - OPEN('strikethrough') 430 - word_maybe() 431 - } break; case '\\quote': { 432 - OPEN('quote', {cite: rargs[0]}) 433 - } break; case '\\align': { 434 - let a = rargs[0] 435 - if (!['left', 'right', 'center'].includes(a)) 436 - a = 'center' 437 - OPEN('align', {align: a}) 438 - } break; case '\\spoiler': case '\\h': { 439 - let [label="spoiler"] = rargs 440 - OPEN('spoiler', {label}) 441 - } break; case '\\ruby': { 442 - let [txt="true"] = rargs 443 - OPEN('ruby', {text: txt}) 444 - word_maybe() 445 - } break; case '\\key': { 446 - OPEN('key') 447 - word_maybe() 448 - } break; case '\\a': { 449 - let id = rargs[0] 450 - id = id ? id.replace(/\W+/g, "-") : null 451 - OPEN('anchor', {id}) 452 - body = true // ghhhh? 453 - //BLOCK('anchor', {id}) 454 - } break; case '\\link': { 455 - let args = {url: rargs[0]} 456 - if (body) { 457 - OPEN('link', args) 458 - } else { 459 - BLOCK('simple_link', args) 460 - } 461 - }} 462 - } break; case 'STYLE': { 463 - let c = check_style(token, text.charAt(match.index-1)||"\n", text.charAt(REGEX.lastIndex)||"\n") 464 - if (!c) { // no 465 - NEVERMIND() 466 - continue main 467 - } 468 - ACCEPT() 469 - if (true===c) { // open new 470 - OPEN('style', token) 471 - } else { // close 472 - while (current != c) 473 - CLOSE(true) 474 - CLOSE() 475 - } 476 - } break; case 'TABLE_CELL': { 477 - for (let c=current; ; c=c.parent) { 478 - if ('table_cell'===c.type) { 479 - read_args() 480 - skip_spaces() 481 - ACCEPT() 482 - while (current!==c) 483 - CLOSE(true) 484 - CLOSE() // cell 485 - // we don't know whether these are row args or cell args, 486 - // so just pass the raw args directly, and parse them later. 487 - OPEN('table_cell', rargs) 488 - break 489 - } 490 - if ('style'!==c.type) { 491 - NEVERMIND() 492 - continue main 493 - } 494 - } 495 - } break; case 'TABLE_START': { 496 - read_args() 497 - skip_spaces() 498 - ACCEPT() 499 - let args_token = text.substring(match.index, last) 500 - OPEN('table_row', args_token, false) // special OPEN call 501 - OPEN('table_cell', rargs) 502 - } break; case 'NEWLINE': { 503 - ACCEPT() 504 - NEWLINE(true) 505 - body = true // to trigger start_line 506 - } break; case 'HEADING': { 507 - read_args() 508 - read_body(true) 509 - if (NO_ARGS===rargs && false===body) { 510 - NEVERMIND() 511 - continue main 512 - } 513 - ACCEPT() 514 - let level = token.length 515 - let args = {level} 516 - let id = rargs[0] 517 - args.id = id ? id.replace(/\W+/g, "-") : null 518 - // todo: anchor name (and, can this be chosen automatically based on contents?) 519 - OPEN('heading', args) 520 - } break; case 'DIVIDER': { 521 - ACCEPT() 522 - BLOCK('divider') 523 - } break; case 'BLOCK_END': { 524 - ACCEPT() 525 - if (brackets>0) { 526 - while (!current.body) 527 - CLOSE(true) 528 - if ('invalid'===current.type) { 529 - if ("\n}"==token) 530 - NEWLINE(false) // false since we already closed everything 531 - TEXT("}") 532 - } 533 - CLOSE() 534 - } else { 535 - // hack: 536 - if ("\n}"==token) 537 - NEWLINE(true) 538 - TEXT("}") 539 - } 540 - } break; case 'NULL_ENV': { 541 - body = true 542 - ACCEPT() 543 - OPEN('null_env') 544 - current.prev = current.parent.prev 545 - } break; case 'ESCAPED': { 546 - ACCEPT() 547 - if ("\\\n"===token) 548 - NEWLINE(false) 549 - else if ("\\."===token) { // \. is a no-op 550 - // todo: close lists too 551 - //current.content.push("") 552 - current.prev = 'block' 553 - } else 554 - TEXT(token.substring(1)) 555 - } break; case 'QUOTE': { 556 - read_args() 557 - read_body(true) 558 - if (NO_ARGS===rargs && false===body) { 559 - NEVERMIND() 560 - continue main 561 - } 562 - ACCEPT() 563 - OPEN('quote', {cite: rargs[0]}) 564 - } break; case 'CODE_BLOCK': { 565 - let [lang, code] = read_code() 566 - ACCEPT() 567 - BLOCK('code', {text:code, lang}) 568 - } break; case 'INLINE_CODE': { 569 - ACCEPT() 570 - BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")}) 571 - } break; case 'EMBED': { 572 - read_args() 573 - ACCEPT() 574 - let url = token.substring(1) // ehh better 575 - let [type, args] = process_embed(url, rargs) 576 - BLOCK(type, args) 577 - } break; case 'LINK': { 578 - read_args() 579 - read_body(false) 580 - ACCEPT() 581 - let url = token 582 - let args = {url} 583 - if (body) { 584 - OPEN('link', args) 585 - } else { 586 - args.text = rargs[0] 587 - BLOCK('simple_link', args) 588 - } 589 - } break; case 'LIST_ITEM': { 590 - read_args() 591 - read_body(true) 592 - if (NO_ARGS===rargs && false===body) { 593 - NEVERMIND() 594 - continue main 595 - } 596 - ACCEPT() 597 - let indent = token.indexOf("-") 598 - OPEN('list_item', {indent}) 599 - } } 600 - 601 - if (body) { 602 - text = text.substring(last) 603 - last = REGEX.lastIndex = 0 604 - prev = -1 605 - } 606 - } // end of main loop 607 - 608 - TEXT(text.substring(last)) // text after last token 609 - 610 - while ('ROOT'!==current.type) 611 - CLOSE(true) 612 - if ('newline'===current.prev) //todo: this is repeated 613 - current.content.push("\n") 614 - 615 - current = null // my the memory leak! 616 - 617 - return tree // technically we could return `current` here and get rid of `tree` entirely 618 - } 619 - 620 - this.parse = parse 621 - this.langs = {'12y2': parse} 622 - 623 - // what if you want to write like, "{...}". well that's fine 624 - // BUT if you are inside a tag, the } will close it. 625 - // maybe closing tags should need some kind of special syntax? 626 - // \tag{ ... \} >{...\} idk.. 627 - // or match paired {}s : 628 - // \tag{ ... {heck} ... } <- closes here 629 - 630 - // todo: after parsing a block element: eat the next newline directly 631 - } } 632 - 633 - if ('object'==typeof module && module) module.exports = Markup_12y2
+3 -3
testing/auto.html
··· 1 1 <!doctype html><html lang=en-QS><meta charset=utf-8><meta name=viewport content="width=device-width, height=device-height, initial-scale=1" id=$meta_viewport> 2 2 <title>Tests 2</title> 3 3 4 - <script src=../parse2.js></script> 4 + <script src=../parse.js></script> 5 5 <script src=../legacy.js></script> 6 6 <script src=parse-ref.js></script> 7 7 <script src=legacy-ref.js></script> ··· 93 93 console.log("got "+got+" items ("+nw+" new)") 94 94 } 95 95 96 - let lang = "12y" 96 + let lang = "12y2" 97 97 98 98 load_data([ 99 99 { ··· 105 105 type:'message', 106 106 fields:'text,values,id,createDate,contentId', 107 107 query:`!valuelike({{m}},{{"${lang}"}})`, 108 - order:'id_desc', 108 + order:'id', 109 109 }, 110 110 ]).then(async (lmm)=>{ 111 111 collect(lmm.content, false, lang)
+1 -1
testing/index.html
··· 1 1 <!doctype html><html lang=en-QS><meta charset=utf-8> 2 2 <title>Markup2 Tests</title> 3 3 4 - <script src=../parse2.js></script> 4 + <script src=../parse.js></script> 5 5 <script src=../langs.js></script> 6 6 7 7 <script src=test.js></script>