this repo has no description
1/*! 𝦗𖹭
2*/
3"use strict"
412||+typeof await/2//2; export default
5/**
6 12y2 markup parser factory
7 @implements Parser_Collection
8**/
9class Markup_12y2 { constructor() {
10
11 const MACROS = {
12 '{EOL}': "(?![^\\n])",
13 '{BOL}': "^",
14 '{ANY}': "[^]",
15 '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*",
16 '{URL_FINAL}': "[-\\w/%&=#+~@$*']",
17 }
18 const GROUPS = []
19 let regi = []
20 const REGEX = function self(tem, ...groups) {
21 if (!tem)
22 return new RegExp(regi.join("|"), 'g')
23 regi.push(
24 tem.raw.join("()")
25 .replace(/\\`/g, "`")
26 .replace(/[(](?![?)])/g, "(?:")
27 .replace(/[{][A-Z_]+[}]/g, match=>MACROS[match])
28 )
29 GROUPS.push(...groups)
30 return self
31 }
32 `[\n]?[}]${'BLOCK_END'}`
33 `[\n]${'NEWLINE'}`
34 `{BOL}[#]{1,4}(?=[\[{ ])${'HEADING'}`
35 `{BOL}[>](?=[\[{ ])${'QUOTE'}`
36 `{BOL}[-]{3,}{EOL}${'DIVIDER'}`
37 `([*][*]|[_][_]|[~][~]|[/])${'STYLE'}`
38 `[\\]((https?|sbs)${'ESCAPED'}|[a-z]+)(?![a-zA-Z0-9])${'TAG'}`
39 `[\\][{][\n]?${'NULL_ENV'}`
40 `[\\]{ANY}${'ESCAPED'}`
41 `{BOL}[\`]{3}(?!.*?[\`])${'CODE_BLOCK'}`
42 `[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}`
43 `([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}`
44 `{BOL}[|][-][-+]*[-][|]{EOL}${'TABLE_DIVIDER'}` // `{BOL}[|][|][|]{EOL}${'TABLE_DIVIDER'}`
45 `{BOL} *[|]${'TABLE_START'}`
46 ` *[|][|]?${'TABLE_CELL'}`
47 `{BOL} *[-]${'LIST_ITEM'}`
48 ()
49
50 //todo: org tables separators?
51 // what if we make them enable an ascii art table parsing mode
52 // like
53 // | heck | 123 |
54 // |------+------|
55 // | line1 | aaa |
56 // | line2 | bbb |
57 // creates 2 cells, with 2 lines each, rather than 2 rows.
58 // i.e: each added row will just append its contents to the cells
59 // of the previous row.
60 // maybe this should be an arg instead? on a row, to merge it with prev or etc..
61
62
63 // all state is stored in these vars (and REGEX.lastIndex)
64 let current, brackets
65
66 // About __proto__ in object literals:
67 // https://tc39.es/ecma262/multipage/ecmascript-language-expressions.html#sec-runtime-semantics-propertydefinitionevaluation
68 const IS_BLOCK = {__proto__:null, code:'block', divider:'block', ROOT:'block', heading:'block', quote:'block', table:'block', table_cell:'block', image:'block', video:'block', audio:'block', spoiler:'block', align:'block', list:'block', list_item:'block', youtube:'block', anchor:'block', table_divider:'block', ruby:'text', key:'text'}
69 // 'text' is for inline-block elements
70
71
72 // argument processing //
73
74 const NO_ARGS = []
75 NO_ARGS.named = Object.freeze({})
76 Object.freeze(NO_ARGS)
77 // todo: do we even need named args?
78 const parse_args=(arglist)=>{
79 let list = [], named = {}
80 list.named = named
81 for (let arg of arglist.split(";")) {
82 let [, name, value] = /^(?:([-\w]*)=)?(.*)$/.exec(arg)
83 // value OR =value
84 // (this is to allow values to contain =. ex: [=1=2] is "1=2")
85 if (!name)
86 list.push(value)
87 else // name=value
88 named[name] = value
89 }
90 return list
91 }
92
93 // process an embed url: !https://example.com/image.png[alt=balls]
94 // returns [type: String, args: Object]
95 const process_embed=(url, rargs)=>{
96 let type
97 let args = {url}
98 for (let arg of rargs) {
99 let m
100 if ('video'===arg || 'audio'===arg || 'image'===arg) {
101 type = arg
102 } else if (m = /^(\d+)x(\d+)$/.exec(arg)) {
103 args.width = +m[1]
104 args.height = +m[2]
105 } else {
106 if (args.alt==undefined)
107 args.alt = arg
108 else
109 args.alt += ";"+arg
110 }
111 }
112 if (rargs.named.alt!=undefined)
113 args.alt = rargs.named.alt
114 // todo: improve this
115 if (!type) {
116 if (/[.](mp3|ogg|wav|m4a|flac|aac|oga|opus|wma)\b/i.test(url))
117 type = 'audio'
118 else if (/[.](mp4|mkv|mov|webm|avi|flv|m4v|mpeg|mpg|ogv|ogm|ogx|wmv|xvid)\b/i.test(url))
119 type = 'video'
120 else if (/^https?:[/][/](?:www[.]|music[.])?(?:youtube.com[/]watch[?]v=|youtu[.]be[/]|youtube.com[/]shorts[/])[\w-]{11}/.test(url)) {
121 // todo: accept [start-end] args maybe?
122 type = 'youtube'
123 }
124 }
125 if (!type)
126 type = 'image'
127 return [type, args]
128 }
129 const is_color=(arg)=>{
130 return ['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg)
131 }
132 const process_cell_args=(rargs)=>{
133 let args = {}
134 for (let arg of rargs) {
135 let m
136 if ("*"===arg || "#"===arg)
137 args.header = true
138 else if ("-div"===arg)
139 args.div = true
140 else if (is_color(arg))
141 args.color = arg
142 else if (m = /^(\d*)x(\d*)$/.exec(arg)) {
143 let [, w, h] = m
144 if (+w > 1) args.colspan = +w
145 if (+h > 1) args.rowspan = +h
146 }
147 }
148 return args
149 }
150 const process_row_args=(rargs)=>{
151 let args = {}
152 for (let arg of rargs) {
153 if ("*"===arg || "#"===arg)
154 args.header = true
155 }
156 return args
157 }
158
159 // tree operations //
160
161 const pop=()=>{
162 if (current.body)
163 brackets--
164 let o = current
165 current = current.parent
166 return o
167 }
168
169 const get_last=(block)=>{
170 return block.content[block.content.length-1]
171 }
172
173 const push=(dest, type, args, content)=>{
174 let node = {type, args, content}
175 dest.content.push(node)
176 return node
177 }
178
179 // push text
180 const TEXT=(text)=>{
181 if ('block'===current.prev)
182 text = text.replace(/^ +/, "")
183 if (text!=="") {
184 current.content.push(text) // todo: merge with surrounding textnodes?
185 current.prev = 'text'
186 }
187 }
188
189 const CLOSE=(cancel)=>{
190 let o = pop()
191 let type = o.type
192
193 //if ('newline'===o.prev)
194 // o.content.push("\n")
195
196 switch (type) { default: {
197 push(current, type, o.args, o.content)
198 } break; case 'style': {
199 if (cancel) {
200 TEXT(o.args)
201 current.content.push(...o.content)
202 } else {
203 type = {
204 __proto__:null,
205 '**': 'bold', '__': 'underline',
206 '~~': 'strikethrough', '/': 'italic',
207 }[o.args]
208 push(current, type, null, o.content)
209 }
210 } break; case 'null_env': {
211 current.content.push(...o.content)
212 } break; case 'table_divider': {
213 let above = get_last(current)
214 if (above && 'table'===above.type) {
215 above.args = {divider:true}
216 }
217 } break; case 'table_cell': {
218 // push cell if not empty
219 if (!cancel || o.content.length) {
220 push(current, type, process_cell_args(o.args), o.content)
221 current.prev = 'block'
222 }
223 // cancelled = next row
224 if (cancel) {
225 // empty cell -> parse arguments as row arguments
226 if (!o.content.length) {
227 // exception: empty row -> cancel table
228 if (!current.content.length) {
229 let o = pop()
230 TEXT(o.args)
231 return
232 // todo: maybe also cancel rows with 1 unclosed cell?
233 // like `| abc` -> text
234 }
235 current.args = process_row_args(o.args)
236 } else
237 current.args = {}
238 CLOSE(true)
239 return
240 }
241 } break; case 'list_item': {
242 // merge list_item with preceeding list
243 let dest = current
244 let indent = o.args.indent
245 do {
246 let curr = dest
247 dest = get_last(curr)
248 if (!dest || 'list'!==dest.type || dest.args.indent>indent) {
249 // create a new level in the list
250 dest = push(curr, 'list', {indent, style:o.args.kind}, [])
251 break
252 }
253 } while (dest.args.indent != indent)
254 push(dest, type, null, o.content)
255 } break; case 'table_row': {
256 let dest = get_last(current)
257 if (!dest || 'table'!==dest.type) {
258 dest = push(current, 'table', null, [])
259 } else {
260 if (dest.args && dest.args.divider) {
261 delete dest.args.divider
262 o.args.divider = true
263 }
264 }
265 push(dest, type, o.args, o.content)
266 } }
267
268 current.prev = IS_BLOCK[type] || o.prev
269 }
270
271 // push empty tag
272 const BLOCK=(type, args)=>{
273 current.content.push({type, args})
274 current.prev = IS_BLOCK[type] || 'text'
275 }
276
277 const NEWLINE=(real)=>{
278 if (real)
279 while (!current.body && 'ROOT'!=current.type)
280 CLOSE(true)
281 if ('block'!==current.prev)
282 current.content.push("\n")
283 if ('all_newline'!==current.prev)
284 current.prev = 'newline'
285 }
286
287
288 // parsing //
289
290 const STYLE_START
291 = /^[\s,][^\s,]|^['"}{(>|][^\s,'"]/
292 const STYLE_END
293 = /^[^\s,][-\s.,:;!?'"}{)<\\|]/
294 const ITALIC_START
295 = /^[\s,][^\s,/]|^['"}{(|][^\s,'"/<]/
296 const ITALIC_END
297 = /^[^\s,/>][-\s.,:;!?'"}{)\\|]/
298 // wait, shouldn't \./heck/\. be allowed though? but that wouldn't work since `.` isn't allowed before..
299
300 const find_style=(token)=>{
301 for (let c=current; 'style'===c.type; c=c.parent)
302 if (c.args===token)
303 return c
304 }
305
306 const check_style=(token, before, after)=>{
307 let ital = "/"===token
308 let c = find_style(token)
309 if (c && (ital ? ITALIC_END : STYLE_END).test(before+after))
310 return c
311 if ((ital ? ITALIC_START : STYLE_START).test(before+after))
312 return true
313 }
314 const ARG_REGEX = /.*?(?=])/y
315 const WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y
316 const CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack
317
318 const parse=(text)=>{
319 let tree = {type: 'ROOT', content: [], prev: 'all_newline'}
320 current = tree
321 brackets = 0
322
323 // these use REGEX, text
324 const skip_spaces=()=>{
325 let pos = REGEX.lastIndex
326 while (" "===text.charAt(pos))
327 pos++
328 REGEX.lastIndex = pos
329 }
330 const read_code=()=>{
331 let pos = REGEX.lastIndex
332 CODE_REGEX.lastIndex = pos
333 let [, lang, code] = CODE_REGEX.exec(text)
334 REGEX.lastIndex = CODE_REGEX.lastIndex
335 return [lang, code]
336 }
337
338 let rargs
339 const read_args=()=>{
340 let pos = REGEX.lastIndex
341 let next = text.charAt(pos)
342 if ("["!==next)
343 return rargs = NO_ARGS
344 ARG_REGEX.lastIndex = pos+1
345 let argstr = ARG_REGEX.exec(text)
346 if (!argstr)
347 return rargs = NO_ARGS
348 REGEX.lastIndex = ARG_REGEX.lastIndex+1
349 return rargs = parse_args(argstr[0])
350 }
351
352 let body
353 const read_body=(space=false)=>{
354 let pos = REGEX.lastIndex
355 let next = text.charAt(pos)
356 if ("{"===next) {
357 if ("\n"===text.charAt(pos+1))
358 pos++
359 REGEX.lastIndex = pos+1
360 return body = true
361 }
362 if (space) {
363 if (" "===next)
364 REGEX.lastIndex = pos+1
365 else
366 return body = false
367 }
368 return body = undefined
369 }
370 // start a new block
371 const OPEN=(type, args=null)=>{
372 current = Object.seal({
373 type, args, content: [],
374 body, parent: current,
375 prev: 'all_newline',
376 })
377 if (body)
378 brackets++
379 }
380 const word_maybe=()=>{
381 if (!body) {
382 TEXT(read_word())
383 CLOSE()
384 }
385 }
386
387 let match
388 let last = REGEX.lastIndex = 0
389 const NEVERMIND=(index=match.index+1)=>{
390 REGEX.lastIndex = index
391 }
392 const ACCEPT=()=>{
393 TEXT(text.substring(last, match.index))
394 last = REGEX.lastIndex
395 }
396 const read_word=()=>{
397 let pos = REGEX.lastIndex
398 WORD_REGEX.lastIndex = pos
399 let word = WORD_REGEX.exec(text)
400 if (!word)
401 return null
402 last = REGEX.lastIndex = WORD_REGEX.lastIndex
403 return word[0]
404 }
405
406 let prev = -1
407 main: while (match = REGEX.exec(text)) {
408 // check for infinite loops
409 if (match.index===prev)
410 throw ["INFINITE LOOP", match]
411 prev = match.index
412 // 2: figure out which token type was matched
413 let token = match[0]
414 let group_num = match.indexOf("", 1)-1
415 let type = GROUPS[group_num]
416 // 3:
417 body = null
418 rargs = null
419
420 switch (type) {
421 case 'TAG': {
422 read_args()
423 if (token==='\\link') {
424 read_body(false)
425 } else {
426 read_body(true)
427 if (NO_ARGS===rargs && false===body) {
428 NEVERMIND()
429 continue main
430 }
431 }
432 ACCEPT()
433 switch (token) { default: {
434 let args = {text:text.substring(match.index, last), reason:"invalid tag"}
435 if (body)
436 OPEN('invalid', args)
437 else
438 BLOCK('invalid', args)
439 } break; case '\\sub': {
440 OPEN('subscript')
441 word_maybe()
442 } break; case '\\sup': {
443 OPEN('superscript')
444 word_maybe()
445 } break; case '\\sm': {
446 OPEN('small')
447 word_maybe()
448 } break; case '\\sc': {
449 OPEN('small_caps')
450 word_maybe()
451 } break; case '\\ov': {
452 OPEN('overline')
453 word_maybe()
454 } break; case '\\b': {
455 OPEN('bold')
456 word_maybe()
457 } break; case '\\i': {
458 OPEN('italic')
459 word_maybe()
460 } break; case '\\u': {
461 OPEN('underline')
462 word_maybe()
463 } break; case '\\s': {
464 OPEN('strikethrough')
465 word_maybe()
466 } break; case '\\quote': {
467 OPEN('quote', {cite: rargs[0]})
468 } break; case '\\align': {
469 let a = rargs[0]
470 if (!['left', 'right', 'center'].includes(a))
471 a = 'center'
472 OPEN('align', {align: a})
473 } break; case '\\spoiler': case '\\h': {
474 let [label="spoiler"] = rargs
475 let cw = /\bcw\b|🔞/i.test(label)
476 OPEN('spoiler', {label, cw})
477 } break; case '\\ruby': {
478 let [txt="true"] = rargs
479 OPEN('ruby', {text: txt})
480 word_maybe()
481 } break; case '\\key': {
482 OPEN('key')
483 word_maybe()
484 } break; case '\\a': {
485 let id = rargs[0]
486 id = id ? id.replace(/\W+/g, "-") : null
487 OPEN('anchor', {id})
488 body = true // ghhhh?
489 //BLOCK('anchor', {id})
490 } break; case '\\link': {
491 let [url=""] = rargs
492 let args = {url}
493 if (body) {
494 OPEN('link', args)
495 } else {
496 BLOCK('simple_link', args)
497 }
498 } break; case '\\bg': {
499 let color = rargs[0]
500 if (!is_color(color))
501 color = null
502 OPEN('background_color', {color})
503 } break; case '\\lang': {
504 let [lang=""] = rargs
505 OPEN('language', {lang})
506 word_maybe()
507 }}
508 } break; case 'STYLE': {
509 let c = check_style(token, text.charAt(match.index-1)||"\n", text.charAt(REGEX.lastIndex)||"\n")
510 if (!c) { // no
511 NEVERMIND()
512 continue main
513 }
514 ACCEPT()
515 if (true===c) { // open new
516 OPEN('style', token)
517 } else { // close
518 while (current != c)
519 CLOSE(true)
520 CLOSE()
521 }
522 } break; case 'TABLE_CELL': {
523 for (let c=current; ; c=c.parent) {
524 if ('table_cell'===c.type) {
525 read_args()
526 skip_spaces()
527 ACCEPT()
528 while (current!==c)
529 CLOSE(true)
530 CLOSE() // cell
531 // TODO: HACK
532 if (/^ *[|][|]/.test(token)) {
533 let last = current.content[current.content.length-1]
534 last.args.div = true
535 }
536 // we don't know whether these are row args or cell args,
537 // so just pass the raw args directly, and parse them later.
538 OPEN('table_cell', rargs)
539 break
540 }
541 if ('style'!==c.type) {
542 // normally NEVERMIND skips one char,
543 // e.g. if we parse "abc" and that matches but gets rejected, it'll try parsing at "bc".
544 // but table cell tokens can look like this: " ||"
545 // if we skip 1 char (a space), it would try to parse a table cell again several times.
546 // so instead we skip to the end of the token because we know it's safe in this case.
547 NEVERMIND(REGEX.lastIndex)
548 continue main
549 }
550 }
551 } break; case 'TABLE_DIVIDER': {
552 //skip_spaces()
553 let tbl = get_last(current)
554 if (!tbl || 'table'!==tbl.type) {
555 NEVERMIND()
556 continue main
557 }
558 ACCEPT()
559 OPEN('table_divider')
560 } break; case 'TABLE_START': {
561 read_args()
562 skip_spaces()
563 ACCEPT()
564 let args_token = text.substring(match.index, last)
565 OPEN('table_row', args_token, false) // special OPEN call
566 OPEN('table_cell', rargs)
567 } break; case 'NEWLINE': {
568 ACCEPT()
569 NEWLINE(true)
570 body = true // to trigger start_line
571 } break; case 'HEADING': {
572 read_args()
573 read_body(true)
574 if (NO_ARGS===rargs && false===body) {
575 NEVERMIND()
576 continue main
577 }
578 ACCEPT()
579 let level = token.length
580 let args = {level}
581 let id = rargs[0]
582 args.id = id ? id.replace(/\W+/g, "-") : null
583 // todo: anchor name (and, can this be chosen automatically based on contents?)
584 OPEN('heading', args)
585 } break; case 'DIVIDER': {
586 ACCEPT()
587 BLOCK('divider')
588 } break; case 'BLOCK_END': {
589 ACCEPT()
590 if (brackets>0) {
591 while (!current.body)
592 CLOSE(true)
593 if ('invalid'===current.type) {
594 if ("\n}"==token)
595 NEWLINE(false) // false since we already closed everything
596 TEXT("}")
597 }
598 CLOSE()
599 } else {
600 // hack:
601 if ("\n}"==token)
602 NEWLINE(true)
603 TEXT("}")
604 }
605 } break; case 'NULL_ENV': {
606 body = true
607 ACCEPT()
608 OPEN('null_env')
609 current.prev = current.parent.prev
610 } break; case 'ESCAPED': {
611 ACCEPT()
612 if ("\\\n"===token)
613 NEWLINE(false)
614 else if ("\\."===token) { // \. is a no-op
615 // todo: close lists too
616 //current.content.push("")
617 //current.prev = 'block'
618 } else {
619 current.content.push(token.slice(1))
620 current.prev = 'text'
621 }
622 } break; case 'QUOTE': {
623 read_args()
624 read_body(true)
625 if (NO_ARGS===rargs && false===body) {
626 NEVERMIND()
627 continue main
628 }
629 ACCEPT()
630 OPEN('quote', {cite: rargs[0]})
631 } break; case 'CODE_BLOCK': {
632 let [lang, code] = read_code()
633 ACCEPT()
634 BLOCK('code', {text:code, lang})
635 } break; case 'INLINE_CODE': {
636 ACCEPT()
637 BLOCK('icode', {text: token.replace(/^`|`$/g, "").replace(/``/g, "`")})
638 } break; case 'EMBED': {
639 read_args()
640 ACCEPT()
641 let url = token.substring(1) // ehh better
642 let [type, args] = process_embed(url, rargs)
643 BLOCK(type, args)
644 } break; case 'LINK': {
645 read_args()
646 read_body(false)
647 ACCEPT()
648 let url = token
649 let args = {url}
650 if (body) {
651 OPEN('link', args)
652 } else {
653 args.text = rargs[0]
654 BLOCK('simple_link', args)
655 }
656 } break; case 'LIST_ITEM': {
657 read_args()
658 read_body(true)
659 if (NO_ARGS===rargs && false===body) {
660 NEVERMIND()
661 continue main
662 }
663 ACCEPT()
664 let indent = token.indexOf("-")
665 OPEN('list_item', {indent, kind:rargs[0]==="1"?"1":undefined})
666 } }
667
668 if (body) {
669 text = text.substring(last)
670 last = REGEX.lastIndex = 0
671 prev = -1
672 }
673 } // end of main loop
674
675 TEXT(text.substring(last)) // text after last token
676
677 while ('ROOT'!==current.type)
678 CLOSE(true)
679 if ('newline'===current.prev)
680 current.content.push("\n")
681
682 current = null // my the memory leak!
683
684 return tree
685 } /* parse() */
686
687 this.parse = parse
688 this.langs = {'12y2': parse}
689} }
690
691export default Markup_12y2
692
693// what if you want to write like, "{...}". well that's fine
694// BUT if you are inside a tag, the } will close it.
695// maybe closing tags should need some kind of special syntax?
696// \tag{ ... \} >{...\} idk..
697// or match paired {}s :
698// \tag{ ... {heck} ... } <- closes here
699
700// todo: after parsing a block element: eat the next newline directly
701
702// idea:
703// compare ast formats:
704// memory, speed, etc.
705// {type, args, content}
706// [type, args, content]
707// [type, args, ...content]