tangled
alpha
login
or
join now
ansxor.ca
/
markup2
0
fork
atom
this repo has no description
0
fork
atom
overview
issues
1
pulls
pipelines
rewritten parser
12Me21
3 years ago
9425587d
b8dbae11
+422
-1048
5 changed files
expand all
collapse all
unified
split
index.html
parse.js
parse2.js
testing
auto.html
index.html
+1
-1
index.html
···
4
4
<title>Markup2 Demo</title>
5
5
6
6
<script src=langs.js></script>
7
7
-
<script src=parse2.js></script>
7
7
+
<script src=parse.js></script>
8
8
<script src=legacy.js></script>
9
9
<script src=render.js></script>
10
10
<script src=runtime.js></script>
+417
-410
parse.js
···
8
8
**/
9
9
class Markup_12y2 { constructor() {
10
10
11
11
-
// TokenType ๐ท enum
12
12
-
// BlockType ๐ท enum
13
13
-
// Text ๐ท string ๐ from input text
14
14
-
// ArgPattern ๐ท RegExp
15
15
-
// GroupNum ๐ท number - regex capturing group num
16
16
-
// RawArgs ๐ท Array - array with .named field
17
17
-
// Block ๐ท Object - has .type .args .content
18
18
-
// CurrentBlock ๐ท Object - block + other fields
19
19
-
20
11
// all state is stored in these vars (and REGEX.lastIndex)
21
12
let current, brackets
22
13
···
26
17
// elements which can survive an eol (without a body)
27
18
const IS_BLOCK = {__proto__:null, code:1, divider:1, ROOT:1, heading:1, quote:1, table:1, table_cell:1, image:1, video:1, audio:1, spoiler:1, align:1, list:1, list_item:1, youtube:1, anchor:1}
28
19
29
29
-
// RegExp
30
30
-
// GroupNum -> TokenType
31
31
-
// GroupNum -> ArgPattern
32
20
const MACROS = {
33
21
'{EOL}': "(?![^\\n])",
34
22
'{BOL}': "^",
···
36
24
'{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*",
37
25
'{URL_FINAL}': "[-\\w/%&=#+~@$*']",
38
26
}
39
39
-
const GROUPS = [], ARGTYPES = []
27
27
+
const GROUPS = []
40
28
let regi = []
41
41
-
function PAT({raw}, ...groups) {
29
29
+
const PAT=({raw}, ...groups)=>{
42
30
regi.push(
43
31
raw.join("()")
44
32
.replace(/\\`/g, "`")
45
33
.replace(/[(](?![?)])/g, "(?:")
46
34
.replace(/[{][A-Z_]+[}]/g, match=>MACROS[match])
47
35
)
48
48
-
for (let g of groups) {
49
49
-
GROUPS.push(Object.keys(g)[0])
50
50
-
ARGTYPES.push(Object.values(g)[0])
51
51
-
}
36
36
+
GROUPS.push(...groups)
52
37
}
53
38
54
54
-
// ArgPattern
55
55
-
const ARGS_NORMAL = // /[...]?{?/
56
56
-
/(?:\[([^\]\n]*)\])?({\n?)?/y
57
57
-
58
58
-
const ARGS_WORD = // /[...]?{/ or /[...] ?<word>/ or / <word>/
59
59
-
/(?:\[([^\]\n]*)\]|(?=[ {]))({\n?| ?([^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*))/y // todo: more complex rule for word parsing //TODO: does this set the body flag right? //(what did i mean by this?)
60
60
-
const ARGS_LINE = // /[...]?{/ or /[...] ?/ or / /
61
61
-
/(?:\[([^\]\n]*)\]|(?=[ {]))(?:({\n?)| ?)/y // probably dont need this, we can strip space after { in all cases instead.
62
62
-
const ARGS_HEADING = // /[...]?{/ or /[...] ?/ or / /
63
63
-
/(?:\[([^\]\n]*)\]|(?=[ {]))(?:({\n?)| ?)/y
64
64
-
65
65
-
// this is like args_heading kinda, except always counts as a line start. maybe backport this to args heading etc.?
66
66
-
const ARGS_ANCHOR = // /[...]{?/
67
67
-
/\[([^\]\n]*)\]({\n?| ?|)/y
68
68
-
69
69
-
const ARGS_BODYLESS = // /[...]?/
70
70
-
/(?:\[([^\]\n]*)\])?/y
71
71
-
const ARGS_TABLE = // /[...]? */
72
72
-
/(?:\[([^\]\n]*)\])? */y
73
73
-
74
74
-
const ARGS_CODE = // ... ```
75
75
-
/(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y
76
76
-
77
77
-
PAT`[\n]?[}]${{ BLOCK_END: 0}}`
78
78
-
PAT`[\n]${{ NEWLINE: 0}}`
79
79
-
PAT`{BOL}[#]{1,4}${{ HEADING: ARGS_HEADING}}`
80
80
-
PAT`{BOL}[-]{3,}{EOL}${{ DIVIDER: 0}}`
81
81
-
PAT`([*][*]|[_][_]|[~][~]|[/])${{ STYLE: true}}`
82
82
-
PAT`[\\][a-z]+(?![a-zA-Z0-9])${{ TAG: true}}`
83
83
-
PAT`[\\][{][\n]?${{ NULL_ENV: 0}}`
84
84
-
PAT`[\\]{ANY}${{ ESCAPED: 0}}`
85
85
-
PAT`{BOL}[>]${{ QUOTE: ARGS_HEADING}}`
86
86
-
PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${{ CODE_BLOCK: ARGS_CODE}}`
87
87
-
PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${{ INLINE_CODE: 0}}`
88
88
-
PAT`([!]${{ EMBED: ARGS_BODYLESS}})?\b(https?://|sbs:){URL_CHARS}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${{ LINK: ARGS_NORMAL}}`
89
89
-
PAT`{BOL} *[|]${{ TABLE_START: ARGS_TABLE}}`
90
90
-
PAT` *[|]${{ TABLE_CELL: ARGS_TABLE}}`
91
91
-
PAT`{BOL} *[-]${{ LIST_ITEM: ARGS_HEADING}}`
39
39
+
PAT`[\n]?[}]${'BLOCK_END'}`
40
40
+
PAT`[\n]${'NEWLINE'}`
41
41
+
PAT`{BOL}[#]{1,4}(?=[\[{ ])${'HEADING'}`
42
42
+
PAT`{BOL}[>](?=[\[{ ])${'QUOTE'}`
43
43
+
PAT`{BOL}[-]{3,}{EOL}${'DIVIDER'}`
44
44
+
PAT`([*][*]|[_][_]|[~][~]|[/])${'STYLE'}`
45
45
+
PAT`[\\]((https?|sbs)${'ESCAPED'}|[a-z]+)(?![a-zA-Z0-9])${'TAG'}`
46
46
+
PAT`[\\][{][\n]?${'NULL_ENV'}`
47
47
+
PAT`[\\]{ANY}${'ESCAPED'}`
48
48
+
PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${'CODE_BLOCK'}`
49
49
+
PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}`
50
50
+
//PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${'LINK'}`
51
51
+
PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}`
52
52
+
//PAT`([!]${'EMBED'})?\b(https?://|sbs:)({URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)])?)+${'LINK'}`
53
53
+
PAT`{BOL} *[|]${'TABLE_START'}`
54
54
+
PAT` *[|]${'TABLE_CELL'}`
55
55
+
PAT`{BOL} *[-]${'LIST_ITEM'}`
92
56
93
57
const REGEX = new RegExp(regi.join("|"), 'g')
94
58
regi = null
95
59
96
60
//todo: org tables separators?
97
61
98
98
-
// TokenType -> ArgRegex
99
99
-
const TAGS = {
100
100
-
__proto__:null,
101
101
-
'\\sub': ARGS_WORD,
102
102
-
'\\sup': ARGS_WORD,
103
103
-
'\\b': ARGS_WORD,
104
104
-
'\\i': ARGS_WORD,
105
105
-
'\\u': ARGS_WORD,
106
106
-
'\\s': ARGS_WORD,
107
107
-
'\\quote': ARGS_LINE,
108
108
-
'\\align': ARGS_LINE,
109
109
-
'\\spoiler': ARGS_LINE, '\\h': ARGS_LINE,
110
110
-
'\\ruby': ARGS_WORD,
111
111
-
'\\key': ARGS_WORD,
112
112
-
'\\a': ARGS_ANCHOR,
113
113
-
'\\link': ARGS_NORMAL, // should use arg parse mode, i think?
114
114
-
}
115
115
-
116
116
-
// process a token
117
117
-
// ๐ฅ _token_type ๐ท TokenType ๐
118
118
-
// ๐ฅ token ๐ท Text ๐ token text, including arguments
119
119
-
// ๐ฅ rarys ๐ท RawArgs ๐ raw arguments
120
120
-
// ๐ฅ body ๐ท Text ๐ argmatch[2] (varies)
121
121
-
// ๐ฅ base_token ๐ท Text ๐ token text, without arguments
122
122
-
function PROCESS(_token_type, token, rargs, body, args_token) {
123
123
-
switch (_token_type) { default: {
124
124
-
throw new TypeError("unknown token type: "+_token_type)
125
125
-
// error
126
126
-
} break; case 'NEWLINE': {
127
127
-
NEWLINE(true)
128
128
-
} break; case 'HEADING': {
129
129
-
let level = token.length
130
130
-
let args = {level}
131
131
-
let id = rargs[0]
132
132
-
args.id = id ? id.replace(/\W+/g, "-") : null
133
133
-
// todo: anchor name (and, can this be chosen automatically based on contents?)
134
134
-
OPEN('heading', args, body)
135
135
-
} break; case 'DIVIDER': {
136
136
-
BLOCK('divider')
137
137
-
} break; case 'BLOCK_END': {
138
138
-
if (brackets>0) {
139
139
-
while (!current.body)
140
140
-
CANCEL()
141
141
-
if ('invalid'===current.type) {
142
142
-
if ("\n}"==token)
143
143
-
NEWLINE(false) // false since we already closed everything
144
144
-
TEXT("}")
145
145
-
}
146
146
-
CLOSE()
147
147
-
} else {
148
148
-
// hack:
149
149
-
if ("\n}"==token)
150
150
-
NEWLINE(true)
151
151
-
TEXT("}")
152
152
-
}
153
153
-
} break; case 'NULL_ENV': {
154
154
-
OPEN('null_env', null, true)
155
155
-
current.prev = current.parent.prev
156
156
-
} break; case 'ESCAPED': {
157
157
-
if ("\\\n"===token)
158
158
-
NEWLINE(false)
159
159
-
else if ("\\."===token) { // \. is a no-op
160
160
-
// todo: close lists too
161
161
-
//current.content.push("")
162
162
-
current.prev = 'block'
163
163
-
} else
164
164
-
TEXT(token.substring(1))
165
165
-
} break; case 'QUOTE': {
166
166
-
OPEN('quote', {cite: rargs[0]}, body)
167
167
-
} break; case 'CODE_BLOCK': {
168
168
-
let lang = rargs
169
169
-
BLOCK('code', {text: body, lang})
170
170
-
} break; case 'INLINE_CODE': {
171
171
-
BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")})
172
172
-
} break; case 'EMBED': {
173
173
-
let url = token.substring(1) // ehh better
174
174
-
let [type, args] = process_embed(url, rargs)
175
175
-
BLOCK(type, args)
176
176
-
} break; case 'LINK': {
177
177
-
let url = token
178
178
-
let args = {url}
179
179
-
if (body) {
180
180
-
OPEN('link', args, body)
181
181
-
} else {
182
182
-
args.text = rargs[0]
183
183
-
BLOCK('simple_link', args)
184
184
-
}
185
185
-
} break; case 'TABLE_START': {
186
186
-
OPEN('table_row', token+args_token) // special OPEN call
187
187
-
OPEN('table_cell', rargs, body)
188
188
-
} break; case 'TABLE_CELL': {
189
189
-
while (current.type!=='table_cell')
190
190
-
CANCEL()
191
191
-
CLOSE() // cell
192
192
-
// we don't know whether these are row args or cell args,
193
193
-
// so just pass the raw args directly, and parse them later.
194
194
-
OPEN('table_cell', rargs, body)
195
195
-
} break; case 'INVALID_TAG': {
196
196
-
if (body)
197
197
-
OPEN('invalid', {text: token+args_token, reason: "invalid tag"}, body)
198
198
-
else
199
199
-
BLOCK('invalid', {text: token+args_token, reason: "invalid tag"})
200
200
-
} break; case 'LIST_ITEM': {
201
201
-
let indent = token.indexOf("-")
202
202
-
OPEN('list_item', {indent}, body)
203
62
204
204
-
} break; case '\\sub': {
205
205
-
OPEN('subscript', null, body)
206
206
-
} break; case '\\sup': {
207
207
-
OPEN('superscript', null, body)
208
208
-
} break; case '\\b': {
209
209
-
OPEN('bold', null, body)
210
210
-
} break; case '\\i': {
211
211
-
OPEN('italic', null, body)
212
212
-
} break; case '\\u': {
213
213
-
OPEN('underline', null, body)
214
214
-
} break; case '\\s': {
215
215
-
OPEN('strikethrough', null, body)
216
216
-
} break; case '\\quote': {
217
217
-
OPEN('quote', {cite: rargs[0]}, body)
218
218
-
} break; case '\\align': {
219
219
-
let a = rargs[0]
220
220
-
if (!['left', 'right', 'center'].includes(a))
221
221
-
a = 'center'
222
222
-
OPEN('align', {align: a}, body)
223
223
-
} break; case '\\spoiler': case '\\h': {
224
224
-
let label = arg0(rargs, "spoiler")
225
225
-
OPEN('spoiler', {label}, body)
226
226
-
} break; case '\\ruby': {
227
227
-
let text = arg0(rargs, "true")
228
228
-
OPEN('ruby', {text}, body)
229
229
-
} break; case '\\key': {
230
230
-
OPEN('key', null, body)
231
231
-
} break; case '\\a': {
232
232
-
let id = rargs[0]
233
233
-
id = id ? id.replace(/\W+/g, "-") : null
234
234
-
OPEN('anchor', {id}, body)
235
235
-
//BLOCK('anchor', {id})
236
236
-
} break; case '\\link': {
237
237
-
let args = {url: rargs[0]}
238
238
-
if (body) {
239
239
-
OPEN('link', args, body)
240
240
-
} else {
241
241
-
args.text = args.url
242
242
-
BLOCK('simple_link', args)
243
243
-
}
244
244
-
} }
245
245
-
}
246
63
247
247
-
function arg0(rargs, def) {
248
248
-
if (rargs.length<1)
249
249
-
return def
250
250
-
return rargs[0]
251
251
-
}
252
252
-
253
253
-
254
254
-
255
255
-
const null_args = []
256
256
-
null_args.named = Object.freeze({})
257
257
-
Object.freeze(null_args)
258
258
-
// todo: do we even need named args?
259
259
-
function parse_args(arglist) {
260
260
-
// note: checks undefined AND "" (\tag AND \tag[])
261
261
-
if (!arglist)
262
262
-
return null_args
263
263
-
let list = [], named = {}
264
264
-
list.named = named
265
265
-
for (let arg of arglist.split(";")) {
266
266
-
let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg)
267
267
-
// value OR =value
268
268
-
// (this is to allow values to contain =. ex: [=1=2] is "1=2")
269
269
-
if (!name)
270
270
-
list.push(value)
271
271
-
else // name=value
272
272
-
named[name] = value
273
273
-
}
274
274
-
return list
275
275
-
}
276
64
// process an embed url: !https://example.com/image.png[alt=balls]
277
65
// returns [type: String, args: Object]
278
278
-
function process_embed(url, rargs) {
66
66
+
const process_embed=(url, rargs)=>{
279
67
let type
280
68
let args = {url}
281
69
for (let arg of rargs) {
···
296
84
args.alt = rargs.named.alt
297
85
// todo: improve this
298
86
if (!type) {
299
299
-
//let u = new URL(url, "x-relative:/")
300
300
-
//let ext = /[.]([a-z0-9A-Z]{3,4})(?!\w)[^.]*$/.exec(url)
301
87
if (/[.](mp3|ogg|wav|m4a)\b/i.test(url))
302
88
type = 'audio'
303
89
else if (/[.](mp4|mkv|mov)\b/i.test(url))
···
311
97
type = 'image'
312
98
return [type, args]
313
99
}
314
314
-
315
315
-
// start a new block
316
316
-
function OPEN(type, args, body) {
317
317
-
current = Object.seal({
318
318
-
type, args, content: [],
319
319
-
body, parent: current,
320
320
-
prev: 'all_newline',
321
321
-
})
322
322
-
if (body)
323
323
-
brackets++
100
100
+
const process_cell_args=(rargs)=>{
101
101
+
let args = {}
102
102
+
for (let arg of rargs) {
103
103
+
let m
104
104
+
if ("*"===arg || "#"===arg)
105
105
+
args.header = true
106
106
+
else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg))
107
107
+
args.color = arg
108
108
+
else if (m = /^(\d*)x(\d*)$/.exec(arg)) {
109
109
+
let [, w, h] = m
110
110
+
if (+w > 1) args.colspan = +w
111
111
+
if (+h > 1) args.rowspan = +h
112
112
+
}
113
113
+
}
114
114
+
return args
324
115
}
116
116
+
const process_row_args=(rargs)=>{
117
117
+
let args = {}
118
118
+
for (let arg of rargs) {
119
119
+
if ("*"===arg || "#"===arg)
120
120
+
args.header = true
121
121
+
}
122
122
+
return args
123
123
+
}
124
124
+
325
125
// move up
326
326
-
function pop() {
126
126
+
const pop=()=>{
327
127
if (current.body)
328
128
brackets--
329
129
let o = current
···
331
131
return o
332
132
}
333
133
334
334
-
function CANCEL() {
335
335
-
if ('style'===current.type) {
336
336
-
let o = pop()
337
337
-
current.content.push(o.args, ...o.content)
338
338
-
current.prev = o.prev
339
339
-
return
340
340
-
}
341
341
-
if ('table_cell'===current.type) {
342
342
-
if (current.content.length) {
343
343
-
CLOSE() // table_cell
344
344
-
current.args = {}
345
345
-
} else {
346
346
-
// cancelling an empty table cell means:
347
347
-
// it's the end of the row, so discard the cell
348
348
-
let o = pop()
349
349
-
// if the ROW is empty (i.e. we just have a single | )
350
350
-
if (!current.content.length) {
351
351
-
let o = pop() // discard the row
352
352
-
TEXT(o.args)
353
353
-
return
354
354
-
// todo: maybe also cancel rows with 1 unclosed cell?
355
355
-
// like `| abc` -> text
356
356
-
}
357
357
-
// transfer args to the row, and parse as table row args:
358
358
-
let ret = current.args = {}
359
359
-
for (let arg of o.args) {
360
360
-
if ("*"===arg || "#"===arg) {
361
361
-
ret.header = true
362
362
-
}
363
363
-
}
364
364
-
}
365
365
-
// fallthrough to close the table_row
366
366
-
}
367
367
-
CLOSE()
368
368
-
}
369
369
-
370
370
-
function get_last(block) {
134
134
+
const get_last=(block)=>{
371
135
return block.content[block.content.length-1]
372
136
}
373
137
374
374
-
function CLOSE() {
138
138
+
const CLOSE=(cancel)=>{
375
139
let o = pop()
140
140
+
let type = o.type
376
141
377
377
-
if ('null_env'===o.type) {
142
142
+
if ('style'===type && cancel) {
143
143
+
current.content.push(o.args, ...o.content)
144
144
+
current.prev = o.prev
145
145
+
return
146
146
+
}
147
147
+
if ('null_env'===type) {
378
148
current.content.push(...o.content)
379
149
current.prev = o.prev
380
150
return
381
151
}
382
152
153
153
+
// cancelling an empty table cell means:
154
154
+
// it's the end of the row, so discard the cell
155
155
+
if ('table_cell'===type && cancel && !o.content.length) {
156
156
+
// if the ROW is empty (i.e. we just have a single | )
157
157
+
if (!current.content.length) {
158
158
+
let o = pop() // discard the row
159
159
+
TEXT(o.args)
160
160
+
return
161
161
+
// todo: maybe also cancel rows with 1 unclosed cell?
162
162
+
// like `| abc` -> text
163
163
+
}
164
164
+
// transfer args to the row, and parse as table row args:
165
165
+
current.args = process_row_args(o.args)
166
166
+
// FALLTHROUGH (to close the row)
167
167
+
o = pop()
168
168
+
type = o.type
169
169
+
}
170
170
+
383
171
if ('newline'===o.prev)
384
172
o.content.push("\n")
385
385
-
let node = {type: o.type, args: o.args, content: o.content}
173
173
+
174
174
+
let node = {type: type, args: o.args, content: o.content}
386
175
let dest = current
387
176
388
388
-
// merge list_item with preceeding list
389
389
-
if ('list_item'===o.type) {
177
177
+
if ('list_item'===type) {
178
178
+
// merge list_item with preceeding list
390
179
node.args = null
391
180
let indent = o.args.indent
392
181
while (1) {
···
402
191
if (dest.args.indent == indent)
403
192
break
404
193
}
405
405
-
}
406
406
-
// merge table_row with preceeding table
407
407
-
else if ('table_row'===o.type) {
194
194
+
} else if ('table_row'===type) {
408
195
dest = get_last(current)
409
196
if (!dest || 'table'!==dest.type) {
410
197
dest = {type:'table', args:null, content:[]}
411
198
current.content.push(dest)
412
199
}
413
413
-
}
414
414
-
// table cell
415
415
-
else if ('table_cell'===o.type) {
416
416
-
let ret = node.args = {}
417
417
-
for (let arg of o.args) {
418
418
-
let m
419
419
-
if ("*"===arg || "#"===arg)
420
420
-
ret.header = true
421
421
-
else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg))
422
422
-
ret.color = arg
423
423
-
else if (m = /^(\d*)x(\d*)$/.exec(arg)) {
424
424
-
let [, w, h] = m
425
425
-
if (+w > 1) ret.colspan = +w
426
426
-
if (+h > 1) ret.rowspan = +h
427
427
-
}
428
428
-
}
429
429
-
} else if ('style'===o.type) {
200
200
+
} else if ('style'===type) {
430
201
node.type = {
431
202
__proto__:null,
432
203
'**': 'bold', '__': 'underline',
···
435
206
node.args = null
436
207
}
437
208
209
209
+
current.prev = type in IS_BLOCK ? 'block' : o.prev
438
210
dest.content.push(node)
439
439
-
current.prev = o.type in IS_BLOCK ? 'block' : o.prev
211
211
+
212
212
+
if ('table_cell'===type) {
213
213
+
node.args = process_cell_args(o.args) // hack?
214
214
+
if (cancel) {
215
215
+
// close the row
216
216
+
current.args = {}
217
217
+
CLOSE()
218
218
+
}
219
219
+
}
440
220
}
221
221
+
441
222
// push text
442
442
-
function TEXT(text) {
223
223
+
const TEXT=(text)=>{
443
224
if (text!=="") {
444
225
current.content.push(text) // todo: merge with surrounding textnodes?
445
226
current.prev = 'text'
446
227
}
447
228
}
448
229
// push empty tag
449
449
-
function BLOCK(type, args) {
230
230
+
const BLOCK=(type, args)=>{
450
231
current.content.push({type, args})
451
232
current.prev = type in IS_BLOCK ? 'block' : 'text'
452
233
}
453
234
454
454
-
function NEWLINE(real) {
235
235
+
const NEWLINE=(real)=>{
455
236
if (real)
456
237
while (!current.body && 'ROOT'!=current.type)
457
457
-
CANCEL()
238
238
+
CLOSE(true)
458
239
if ('block'!==current.prev)
459
240
current.content.push("\n")
460
241
if ('all_newline'!==current.prev)
461
242
current.prev = 'newline'
462
243
}
463
244
464
464
-
function in_table() {
465
465
-
for (let c=current; ; c=c.parent) {
466
466
-
if ('table_cell'===c.type)
467
467
-
return true
468
468
-
if ('style'!==c.type)
469
469
-
return false
245
245
+
const null_args = []
246
246
+
null_args.named = Object.freeze({})
247
247
+
Object.freeze(null_args)
248
248
+
const NO_ARGS = []
249
249
+
NO_ARGS.named = Object.freeze({})
250
250
+
Object.freeze(NO_ARGS)
251
251
+
// todo: do we even need named args?
252
252
+
const parse_args=(arglist)=>{
253
253
+
// note: checks undefined AND "" (\tag AND \tag[])
254
254
+
if (!arglist)
255
255
+
return null_args
256
256
+
let list = [], named = {}
257
257
+
list.named = named
258
258
+
for (let arg of arglist.split(";")) {
259
259
+
let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg)
260
260
+
// value OR =value
261
261
+
// (this is to allow values to contain =. ex: [=1=2] is "1=2")
262
262
+
if (!name)
263
263
+
list.push(value)
264
264
+
else // name=value
265
265
+
named[name] = value
470
266
}
471
471
-
}
472
472
-
// todo: this should check for body
473
473
-
function find_style(token) {
474
474
-
for (let c=current; 'style'===c.type; c=c.parent)
475
475
-
if (c.args===token)
476
476
-
return c
267
267
+
return list
477
268
}
478
478
-
function do_style(token_text, before, after) {
269
269
+
270
270
+
const STYLE_START
271
271
+
= /^[ \s.'"}{(> ][^ \s,'" ]/
272
272
+
const STYLE_CLOSE
273
273
+
= /^[^ \s,'" ][-\s.,:;!?'"}{)<\\ ]/
274
274
+
275
275
+
const check_style=(token_text, before, after)=>{
276
276
+
// END
479
277
for (let c=current; 'style'===c.type; c=c.parent)
480
278
if (c.args===token_text) {
481
481
-
if (!after || /[^\s,'"][-\s.,:;!?'")}{]/y.test(before+after))
279
279
+
if (STYLE_CLOSE.test(before+after))
482
280
return c
483
483
-
else
484
484
-
break
281
281
+
break
485
282
}
486
486
-
487
487
-
if (!before || /[\s.({}'"][^\s,'"]/y.test(before+after))
283
283
+
// START
284
284
+
if (STYLE_START.test(before+after))
488
285
return true
489
286
}
490
287
491
491
-
function parse(text) {
288
288
+
let ARG_REGEX = /.*?(?=])/y
289
289
+
let WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y
290
290
+
let CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y // ack
291
291
+
292
292
+
const parse=(text)=>{
492
293
let tree = {type: 'ROOT', content: [], prev: 'all_newline'}
493
294
current = tree
494
295
brackets = 0
495
296
496
496
-
// MAIN LOOP //
497
497
-
let prev = -1
498
498
-
let last = REGEX.lastIndex = 0
297
297
+
// these use REGEX, text
298
298
+
const skip_spaces=()=>{
299
299
+
let pos = REGEX.lastIndex
300
300
+
while (" "===text.charAt(pos))
301
301
+
pos++
302
302
+
REGEX.lastIndex = pos
303
303
+
}
304
304
+
const read_code=()=>{
305
305
+
let pos = REGEX.lastIndex
306
306
+
CODE_REGEX.lastIndex = pos
307
307
+
let [, lang, code] = CODE_REGEX.exec(text)
308
308
+
REGEX.lastIndex = CODE_REGEX.lastIndex
309
309
+
return [lang, code]
310
310
+
}
311
311
+
312
312
+
let rargs
313
313
+
const read_args=()=>{
314
314
+
let pos = REGEX.lastIndex
315
315
+
let next = text.charAt(pos)
316
316
+
if ("["!==next)
317
317
+
return rargs = NO_ARGS
318
318
+
ARG_REGEX.lastIndex = pos+1
319
319
+
let argstr = ARG_REGEX.exec(text)
320
320
+
if (!argstr)
321
321
+
return rargs = NO_ARGS
322
322
+
REGEX.lastIndex = ARG_REGEX.lastIndex+1
323
323
+
return rargs = parse_args(argstr[0])
324
324
+
}
325
325
+
326
326
+
let body
327
327
+
const read_body=(space=false)=>{
328
328
+
let pos = REGEX.lastIndex
329
329
+
let next = text.charAt(pos)
330
330
+
if ("{"===next) {
331
331
+
if ("\n"===text.charAt(pos+1))
332
332
+
pos++
333
333
+
REGEX.lastIndex = pos+1
334
334
+
return body = true
335
335
+
}
336
336
+
if (space) {
337
337
+
if (" "===next)
338
338
+
REGEX.lastIndex = pos+1
339
339
+
else
340
340
+
return body = false
341
341
+
}
342
342
+
return body = undefined
343
343
+
}
344
344
+
// start a new block
345
345
+
const OPEN=(type, args=null)=>{
346
346
+
current = Object.seal({
347
347
+
type, args, content: [],
348
348
+
body, parent: current,
349
349
+
prev: 'all_newline',
350
350
+
})
351
351
+
if (body)
352
352
+
brackets++
353
353
+
}
354
354
+
const word_maybe=()=>{
355
355
+
if (!body) {
356
356
+
TEXT(read_word())
357
357
+
CLOSE()
358
358
+
}
359
359
+
}
360
360
+
499
361
let match
500
500
-
function nevermind() {
362
362
+
let last = REGEX.lastIndex = 0
363
363
+
const NEVERMIND=()=>{
501
364
REGEX.lastIndex = match.index+1
502
365
}
503
503
-
function accept() {
366
366
+
const ACCEPT=()=>{
504
367
TEXT(text.substring(last, match.index))
505
368
last = REGEX.lastIndex
506
369
}
507
507
-
function start_line() {
508
508
-
text = text.substring(last)
509
509
-
last = REGEX.lastIndex = 0
510
510
-
prev = -1
370
370
+
const read_word=()=>{
371
371
+
let pos = REGEX.lastIndex
372
372
+
WORD_REGEX.lastIndex = pos
373
373
+
let word = WORD_REGEX.exec(text)
374
374
+
if (!word)
375
375
+
return null
376
376
+
last = REGEX.lastIndex = WORD_REGEX.lastIndex
377
377
+
return word[0]
511
378
}
379
379
+
380
380
+
let prev = -1
512
381
main: while (match = REGEX.exec(text)) {
513
382
// check for infinite loops
514
383
if (match.index===prev)
515
384
throw ["INFINITE LOOP", match]
516
385
prev = match.index
517
386
// 2: figure out which token type was matched
518
518
-
let token_text = match[0]
387
387
+
let token = match[0]
519
388
let group_num = match.indexOf("", 1)-1
520
520
-
521
521
-
// 3: get type + argument pattern
522
389
let type = GROUPS[group_num]
523
523
-
let argregex
524
524
-
// 4: special cases:
525
525
-
if ('TAG'===type) {
526
526
-
if (token_text in TAGS) {
527
527
-
type = token_text
528
528
-
argregex = TAGS[type]
390
390
+
// 3:
391
391
+
body = null
392
392
+
rargs = null
393
393
+
394
394
+
switch (type) {
395
395
+
case 'TAG': {
396
396
+
read_args()
397
397
+
if (token==='\\link') {
398
398
+
read_body(false)
529
399
} else {
530
530
-
type = 'INVALID_TAG'
531
531
-
argregex = ARGS_NORMAL
400
400
+
read_body(true)
401
401
+
if (NO_ARGS===rargs && false===body) {
402
402
+
NEVERMIND()
403
403
+
continue main
404
404
+
}
532
405
}
533
533
-
} else if ('STYLE'===type) {
534
534
-
let c = do_style(token_text, text.charAt(match.index-1), text.charAt(REGEX.lastIndex))
406
406
+
ACCEPT()
407
407
+
switch (token) { default: {
408
408
+
let args = {text:text.substring(match.index, last), reason:"invalid tag"}
409
409
+
if (body)
410
410
+
OPEN('invalid', args)
411
411
+
else
412
412
+
BLOCK('invalid', args)
413
413
+
} break; case '\\sub': {
414
414
+
OPEN('subscript')
415
415
+
word_maybe()
416
416
+
} break; case '\\sup': {
417
417
+
OPEN('superscript')
418
418
+
word_maybe()
419
419
+
} break; case '\\b': {
420
420
+
OPEN('bold')
421
421
+
word_maybe()
422
422
+
} break; case '\\i': {
423
423
+
OPEN('italic')
424
424
+
word_maybe()
425
425
+
} break; case '\\u': {
426
426
+
OPEN('underline')
427
427
+
word_maybe()
428
428
+
} break; case '\\s': {
429
429
+
OPEN('strikethrough')
430
430
+
word_maybe()
431
431
+
} break; case '\\quote': {
432
432
+
OPEN('quote', {cite: rargs[0]})
433
433
+
} break; case '\\align': {
434
434
+
let a = rargs[0]
435
435
+
if (!['left', 'right', 'center'].includes(a))
436
436
+
a = 'center'
437
437
+
OPEN('align', {align: a})
438
438
+
} break; case '\\spoiler': case '\\h': {
439
439
+
let [label="spoiler"] = rargs
440
440
+
OPEN('spoiler', {label})
441
441
+
} break; case '\\ruby': {
442
442
+
let [txt="true"] = rargs
443
443
+
OPEN('ruby', {text: txt})
444
444
+
word_maybe()
445
445
+
} break; case '\\key': {
446
446
+
OPEN('key')
447
447
+
word_maybe()
448
448
+
} break; case '\\a': {
449
449
+
let id = rargs[0]
450
450
+
id = id ? id.replace(/\W+/g, "-") : null
451
451
+
OPEN('anchor', {id})
452
452
+
body = true // ghhhh?
453
453
+
//BLOCK('anchor', {id})
454
454
+
} break; case '\\link': {
455
455
+
let args = {url: rargs[0]}
456
456
+
if (body) {
457
457
+
OPEN('link', args)
458
458
+
} else {
459
459
+
BLOCK('simple_link', args)
460
460
+
}
461
461
+
}}
462
462
+
} break; case 'STYLE': {
463
463
+
let c = check_style(token, text.charAt(match.index-1)||"\n", text.charAt(REGEX.lastIndex)||"\n")
535
464
if (!c) { // no
536
536
-
nevermind()
537
537
-
} else if (true===c) { // open new
538
538
-
accept()
539
539
-
OPEN('style', token_text)
465
465
+
NEVERMIND()
466
466
+
continue main
467
467
+
}
468
468
+
ACCEPT()
469
469
+
if (true===c) { // open new
470
470
+
OPEN('style', token)
540
471
} else { // close
541
541
-
accept()
542
472
while (current != c)
543
543
-
CANCEL()
473
473
+
CLOSE(true)
544
474
CLOSE()
545
475
}
546
546
-
continue main
547
547
-
} else if ('TABLE_CELL'===type && !in_table()) {
548
548
-
nevermind()
549
549
-
continue main
550
550
-
} else {
551
551
-
argregex = ARGTYPES[group_num]
552
552
-
}
553
553
-
// 5: parse args and {
554
554
-
if (!argregex) {
555
555
-
accept()
556
556
-
let body = 'NULL_ENV'===type //h
557
557
-
PROCESS(type, token_text, null, body, token_text)
558
558
-
if (body || 'NEWLINE'===type)
559
559
-
start_line()
560
560
-
} else {
561
561
-
// try to match arguments
562
562
-
argregex.lastIndex = REGEX.lastIndex
563
563
-
let argmatch = argregex.exec(text)
564
564
-
if (null===argmatch) {
565
565
-
nevermind()
476
476
+
} break; case 'TABLE_CELL': {
477
477
+
for (let c=current; ; c=c.parent) {
478
478
+
if ('table_cell'===c.type) {
479
479
+
read_args()
480
480
+
skip_spaces()
481
481
+
ACCEPT()
482
482
+
while (current!==c)
483
483
+
CLOSE(true)
484
484
+
CLOSE() // cell
485
485
+
// we don't know whether these are row args or cell args,
486
486
+
// so just pass the raw args directly, and parse them later.
487
487
+
OPEN('table_cell', rargs)
488
488
+
break
489
489
+
}
490
490
+
if ('style'!==c.type) {
491
491
+
NEVERMIND()
492
492
+
continue main
493
493
+
}
494
494
+
}
495
495
+
} break; case 'TABLE_START': {
496
496
+
read_args()
497
497
+
skip_spaces()
498
498
+
ACCEPT()
499
499
+
let args_token = text.substring(match.index, last)
500
500
+
OPEN('table_row', args_token, false) // special OPEN call
501
501
+
OPEN('table_cell', rargs)
502
502
+
} break; case 'NEWLINE': {
503
503
+
ACCEPT()
504
504
+
NEWLINE(true)
505
505
+
body = true // to trigger start_line
506
506
+
} break; case 'HEADING': {
507
507
+
read_args()
508
508
+
read_body(true)
509
509
+
if (NO_ARGS===rargs && false===body) {
510
510
+
NEVERMIND()
566
511
continue main
567
512
}
568
568
-
REGEX.lastIndex = argregex.lastIndex
569
569
-
accept()
570
570
-
571
571
-
let args = argmatch[1]
572
572
-
let body = argmatch[2] // flag: args with {, or word args
573
573
-
let word = argmatch[3] // contents: word args & code block
574
574
-
if (ARGS_CODE!==argregex) {
575
575
-
args = parse_args(args)
576
576
-
body = body>="{"
577
577
-
}
578
578
-
579
579
-
PROCESS(type, token_text, args, body, argmatch[0])
580
580
-
// word tags
581
581
-
if (undefined!==word) {
582
582
-
// escaping in word args? idk. todo
583
583
-
TEXT(word.replace(/\\([^])/g, "$1"))
513
513
+
ACCEPT()
514
514
+
let level = token.length
515
515
+
let args = {level}
516
516
+
let id = rargs[0]
517
517
+
args.id = id ? id.replace(/\W+/g, "-") : null
518
518
+
// todo: anchor name (and, can this be chosen automatically based on contents?)
519
519
+
OPEN('heading', args)
520
520
+
} break; case 'DIVIDER': {
521
521
+
ACCEPT()
522
522
+
BLOCK('divider')
523
523
+
} break; case 'BLOCK_END': {
524
524
+
ACCEPT()
525
525
+
if (brackets>0) {
526
526
+
while (!current.body)
527
527
+
CLOSE(true)
528
528
+
if ('invalid'===current.type) {
529
529
+
if ("\n}"==token)
530
530
+
NEWLINE(false) // false since we already closed everything
531
531
+
TEXT("}")
532
532
+
}
584
533
CLOSE()
534
534
+
} else {
535
535
+
// hack:
536
536
+
if ("\n}"==token)
537
537
+
NEWLINE(true)
538
538
+
TEXT("}")
585
539
}
586
586
-
// tags with { body
587
587
-
else if (argmatch[2]!==undefined && ARGS_CODE!==argregex) {
588
588
-
start_line()
540
540
+
} break; case 'NULL_ENV': {
541
541
+
body = true
542
542
+
ACCEPT()
543
543
+
OPEN('null_env')
544
544
+
current.prev = current.parent.prev
545
545
+
} break; case 'ESCAPED': {
546
546
+
ACCEPT()
547
547
+
if ("\\\n"===token)
548
548
+
NEWLINE(false)
549
549
+
else if ("\\."===token) { // \. is a no-op
550
550
+
// todo: close lists too
551
551
+
//current.content.push("")
552
552
+
current.prev = 'block'
553
553
+
} else
554
554
+
TEXT(token.substring(1))
555
555
+
} break; case 'QUOTE': {
556
556
+
read_args()
557
557
+
read_body(true)
558
558
+
if (NO_ARGS===rargs && false===body) {
559
559
+
NEVERMIND()
560
560
+
continue main
561
561
+
}
562
562
+
ACCEPT()
563
563
+
OPEN('quote', {cite: rargs[0]})
564
564
+
} break; case 'CODE_BLOCK': {
565
565
+
let [lang, code] = read_code()
566
566
+
ACCEPT()
567
567
+
BLOCK('code', {text:code, lang})
568
568
+
} break; case 'INLINE_CODE': {
569
569
+
ACCEPT()
570
570
+
BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")})
571
571
+
} break; case 'EMBED': {
572
572
+
read_args()
573
573
+
ACCEPT()
574
574
+
let url = token.substring(1) // ehh better
575
575
+
let [type, args] = process_embed(url, rargs)
576
576
+
BLOCK(type, args)
577
577
+
} break; case 'LINK': {
578
578
+
read_args()
579
579
+
read_body(false)
580
580
+
ACCEPT()
581
581
+
let url = token
582
582
+
let args = {url}
583
583
+
if (body) {
584
584
+
OPEN('link', args)
585
585
+
} else {
586
586
+
args.text = rargs[0]
587
587
+
BLOCK('simple_link', args)
588
588
+
}
589
589
+
} break; case 'LIST_ITEM': {
590
590
+
read_args()
591
591
+
read_body(true)
592
592
+
if (NO_ARGS===rargs && false===body) {
593
593
+
NEVERMIND()
594
594
+
continue main
589
595
}
596
596
+
ACCEPT()
597
597
+
let indent = token.indexOf("-")
598
598
+
OPEN('list_item', {indent})
599
599
+
} }
600
600
+
601
601
+
if (body) {
602
602
+
text = text.substring(last)
603
603
+
last = REGEX.lastIndex = 0
604
604
+
prev = -1
590
605
}
591
606
} // end of main loop
592
607
593
608
TEXT(text.substring(last)) // text after last token
594
609
595
610
while ('ROOT'!==current.type)
596
596
-
CANCEL()
611
611
+
CLOSE(true)
597
612
if ('newline'===current.prev) //todo: this is repeated
598
613
current.content.push("\n")
599
614
615
615
+
current = null // my the memory leak!
616
616
+
600
617
return tree // technically we could return `current` here and get rid of `tree` entirely
601
618
}
602
619
603
603
-
/**
604
604
-
Parser function
605
605
-
(closure method)
606
606
-
@type {Parser}
607
607
-
@kind function
608
608
-
**/
609
620
this.parse = parse
610
610
-
/**
611
611
-
@type {Object<string,Parser>}
612
612
-
@property {Parser} 12y2 - same as .parse
613
613
-
**/
614
621
this.langs = {'12y2': parse}
615
622
616
623
// what if you want to write like, "{...}". well that's fine
-633
parse2.js
···
1
1
-
/*! ๐ฆ๐นญ
2
2
-
*/
3
3
-
4
4
-
12||+typeof await/2//2; export default
5
5
-
/**
6
6
-
12y2 markup parser factory
7
7
-
@implements Parser_Collection
8
8
-
**/
9
9
-
class Markup_12y2 { constructor() {
10
10
-
11
11
-
// all state is stored in these vars (and REGEX.lastIndex)
12
12
-
let current, brackets
13
13
-
14
14
-
// About __proto__ in object literals:
15
15
-
// https://tc39.es/ecma262/multipage/ecmascript-language-expressions.html#sec-runtime-semantics-propertydefinitionevaluation
16
16
-
17
17
-
// elements which can survive an eol (without a body)
18
18
-
const IS_BLOCK = {__proto__:null, code:1, divider:1, ROOT:1, heading:1, quote:1, table:1, table_cell:1, image:1, video:1, audio:1, spoiler:1, align:1, list:1, list_item:1, youtube:1, anchor:1}
19
19
-
20
20
-
const MACROS = {
21
21
-
'{EOL}': "(?![^\\n])",
22
22
-
'{BOL}': "^",
23
23
-
'{ANY}': "[^]",
24
24
-
'{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*",
25
25
-
'{URL_FINAL}': "[-\\w/%&=#+~@$*']",
26
26
-
}
27
27
-
const GROUPS = []
28
28
-
let regi = []
29
29
-
const PAT=({raw}, ...groups)=>{
30
30
-
regi.push(
31
31
-
raw.join("()")
32
32
-
.replace(/\\`/g, "`")
33
33
-
.replace(/[(](?![?)])/g, "(?:")
34
34
-
.replace(/[{][A-Z_]+[}]/g, match=>MACROS[match])
35
35
-
)
36
36
-
GROUPS.push(...groups)
37
37
-
}
38
38
-
39
39
-
PAT`[\n]?[}]${'BLOCK_END'}`
40
40
-
PAT`[\n]${'NEWLINE'}`
41
41
-
PAT`{BOL}[#]{1,4}(?=[\[{ ])${'HEADING'}`
42
42
-
PAT`{BOL}[>](?=[\[{ ])${'QUOTE'}`
43
43
-
PAT`{BOL}[-]{3,}{EOL}${'DIVIDER'}`
44
44
-
PAT`([*][*]|[_][_]|[~][~]|[/])${'STYLE'}`
45
45
-
PAT`[\\]((https?|sbs)${'ESCAPED'}|[a-z]+)(?![a-zA-Z0-9])${'TAG'}`
46
46
-
PAT`[\\][{][\n]?${'NULL_ENV'}`
47
47
-
PAT`[i]{ANY}${'ESCAPED'}`
48
48
-
PAT`{BOL}[\`]{3}(?=[^\n\`]*?{EOL})${'CODE_BLOCK'}`
49
49
-
PAT`[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}`
50
50
-
//PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}({URL_FINAL}|[(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)${'LINK'}`
51
51
-
PAT`([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}`
52
52
-
//PAT`([!]${'EMBED'})?\b(https?://|sbs:)({URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)])?)+${'LINK'}`
53
53
-
PAT`{BOL} *[|]${'TABLE_START'}`
54
54
-
PAT` *[|]${'TABLE_CELL'}`
55
55
-
PAT`{BOL} *[-]${'LIST_ITEM'}`
56
56
-
57
57
-
const REGEX = new RegExp(regi.join("|"), 'g')
58
58
-
regi = null
59
59
-
60
60
-
//todo: org tables separators?
61
61
-
62
62
-
63
63
-
64
64
-
// process an embed url: !https://example.com/image.png[alt=balls]
65
65
-
// returns [type: String, args: Object]
66
66
-
const process_embed=(url, rargs)=>{
67
67
-
let type
68
68
-
let args = {url}
69
69
-
for (let arg of rargs) {
70
70
-
let m
71
71
-
if ('video'===arg || 'audio'===arg || 'image'===arg) {
72
72
-
type = arg
73
73
-
} else if (m = /^(\d+)x(\d+)$/.exec(arg)) {
74
74
-
args.width = +m[1]
75
75
-
args.height = +m[2]
76
76
-
} else {
77
77
-
if (args.alt==undefined)
78
78
-
args.alt = arg
79
79
-
else
80
80
-
args.alt += ";"+arg
81
81
-
}
82
82
-
}
83
83
-
if (rargs.named.alt!=undefined)
84
84
-
args.alt = rargs.named.alt
85
85
-
// todo: improve this
86
86
-
if (!type) {
87
87
-
if (/[.](mp3|ogg|wav|m4a)\b/i.test(url))
88
88
-
type = 'audio'
89
89
-
else if (/[.](mp4|mkv|mov)\b/i.test(url))
90
90
-
type = 'video'
91
91
-
else if (/^https?:[/][/](?:www[.])?(?:youtube.com[/]watch[?]v=|youtu[.]be[/]|youtube.com[/]shorts[/])[\w-]{11}/.test(url)) {
92
92
-
// todo: accept [start-end] args maybe?
93
93
-
type = 'youtube'
94
94
-
}
95
95
-
}
96
96
-
if (!type)
97
97
-
type = 'image'
98
98
-
return [type, args]
99
99
-
}
100
100
-
const process_cell_args=(rargs)=>{
101
101
-
let args = {}
102
102
-
for (let arg of rargs) {
103
103
-
let m
104
104
-
if ("*"===arg || "#"===arg)
105
105
-
args.header = true
106
106
-
else if (['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'gray'].includes(arg))
107
107
-
args.color = arg
108
108
-
else if (m = /^(\d*)x(\d*)$/.exec(arg)) {
109
109
-
let [, w, h] = m
110
110
-
if (+w > 1) args.colspan = +w
111
111
-
if (+h > 1) args.rowspan = +h
112
112
-
}
113
113
-
}
114
114
-
return args
115
115
-
}
116
116
-
const process_row_args=(rargs)=>{
117
117
-
let args = {}
118
118
-
for (let arg of rargs) {
119
119
-
if ("*"===arg || "#"===arg)
120
120
-
args.header = true
121
121
-
}
122
122
-
return args
123
123
-
}
124
124
-
125
125
-
// move up
126
126
-
const pop=()=>{
127
127
-
if (current.body)
128
128
-
brackets--
129
129
-
let o = current
130
130
-
current = current.parent
131
131
-
return o
132
132
-
}
133
133
-
134
134
-
const get_last=(block)=>{
135
135
-
return block.content[block.content.length-1]
136
136
-
}
137
137
-
138
138
-
const CLOSE=(cancel)=>{
139
139
-
let o = pop()
140
140
-
let type = o.type
141
141
-
142
142
-
if ('style'===type && cancel) {
143
143
-
current.content.push(o.args, ...o.content)
144
144
-
current.prev = o.prev
145
145
-
return
146
146
-
}
147
147
-
if ('null_env'===type) {
148
148
-
current.content.push(...o.content)
149
149
-
current.prev = o.prev
150
150
-
return
151
151
-
}
152
152
-
153
153
-
// cancelling an empty table cell means:
154
154
-
// it's the end of the row, so discard the cell
155
155
-
if ('table_cell'===type && cancel && !o.content.length) {
156
156
-
// if the ROW is empty (i.e. we just have a single | )
157
157
-
if (!current.content.length) {
158
158
-
let o = pop() // discard the row
159
159
-
TEXT(o.args)
160
160
-
return
161
161
-
// todo: maybe also cancel rows with 1 unclosed cell?
162
162
-
// like `| abc` -> text
163
163
-
}
164
164
-
// transfer args to the row, and parse as table row args:
165
165
-
current.args = process_row_args(o.args)
166
166
-
// FALLTHROUGH (to close the row)
167
167
-
o = pop()
168
168
-
type = o.type
169
169
-
}
170
170
-
171
171
-
if ('newline'===o.prev)
172
172
-
o.content.push("\n")
173
173
-
174
174
-
let node = {type: type, args: o.args, content: o.content}
175
175
-
let dest = current
176
176
-
177
177
-
if ('list_item'===type) {
178
178
-
// merge list_item with preceeding list
179
179
-
node.args = null
180
180
-
let indent = o.args.indent
181
181
-
while (1) {
182
182
-
let curr = dest
183
183
-
dest = get_last(curr)
184
184
-
if (!dest || dest.type!=='list' || dest.args.indent>indent) {
185
185
-
// create a new level in the list
186
186
-
dest = {type:'list', args:{indent}, content:[]}
187
187
-
// safe because there's no newline
188
188
-
curr.content.push(dest)
189
189
-
break
190
190
-
}
191
191
-
if (dest.args.indent == indent)
192
192
-
break
193
193
-
}
194
194
-
} else if ('table_row'===type) {
195
195
-
dest = get_last(current)
196
196
-
if (!dest || 'table'!==dest.type) {
197
197
-
dest = {type:'table', args:null, content:[]}
198
198
-
current.content.push(dest)
199
199
-
}
200
200
-
} else if ('style'===type) {
201
201
-
node.type = {
202
202
-
__proto__:null,
203
203
-
'**': 'bold', '__': 'underline',
204
204
-
'~~': 'strikethrough', '/': 'italic',
205
205
-
}[o.args]
206
206
-
node.args = null
207
207
-
}
208
208
-
209
209
-
current.prev = type in IS_BLOCK ? 'block' : o.prev
210
210
-
dest.content.push(node)
211
211
-
212
212
-
if ('table_cell'===type) {
213
213
-
node.args = process_cell_args(o.args) // hack?
214
214
-
if (cancel) {
215
215
-
// close the row
216
216
-
current.args = {}
217
217
-
CLOSE()
218
218
-
}
219
219
-
}
220
220
-
}
221
221
-
222
222
-
// push text
223
223
-
const TEXT=(text)=>{
224
224
-
if (text!=="") {
225
225
-
current.content.push(text) // todo: merge with surrounding textnodes?
226
226
-
current.prev = 'text'
227
227
-
}
228
228
-
}
229
229
-
// push empty tag
230
230
-
const BLOCK=(type, args)=>{
231
231
-
current.content.push({type, args})
232
232
-
current.prev = type in IS_BLOCK ? 'block' : 'text'
233
233
-
}
234
234
-
235
235
-
const NEWLINE=(real)=>{
236
236
-
if (real)
237
237
-
while (!current.body && 'ROOT'!=current.type)
238
238
-
CLOSE(true)
239
239
-
if ('block'!==current.prev)
240
240
-
current.content.push("\n")
241
241
-
if ('all_newline'!==current.prev)
242
242
-
current.prev = 'newline'
243
243
-
}
244
244
-
245
245
-
const null_args = []
246
246
-
null_args.named = Object.freeze({})
247
247
-
Object.freeze(null_args)
248
248
-
const NO_ARGS = []
249
249
-
NO_ARGS.named = Object.freeze({})
250
250
-
Object.freeze(NO_ARGS)
251
251
-
// todo: do we even need named args?
252
252
-
const parse_args=(arglist)=>{
253
253
-
// note: checks undefined AND "" (\tag AND \tag[])
254
254
-
if (!arglist)
255
255
-
return null_args
256
256
-
let list = [], named = {}
257
257
-
list.named = named
258
258
-
for (let arg of arglist.split(";")) {
259
259
-
let [, name, value] = /^(?:([^=]*)=)?(.*)$/.exec(arg)
260
260
-
// value OR =value
261
261
-
// (this is to allow values to contain =. ex: [=1=2] is "1=2")
262
262
-
if (!name)
263
263
-
list.push(value)
264
264
-
else // name=value
265
265
-
named[name] = value
266
266
-
}
267
267
-
return list
268
268
-
}
269
269
-
270
270
-
const STYLE_START
271
271
-
= /^[ \s.'"}{(> ][^ \s,'" ]/
272
272
-
const STYLE_CLOSE
273
273
-
= /^[^ \s,'" ][-\s.,:;!?'"}{)<\\ ]/
274
274
-
275
275
-
const check_style=(token_text, before, after)=>{
276
276
-
// END
277
277
-
for (let c=current; 'style'===c.type; c=c.parent)
278
278
-
if (c.args===token_text) {
279
279
-
if (STYLE_CLOSE.test(before+after))
280
280
-
return c
281
281
-
break
282
282
-
}
283
283
-
// START
284
284
-
if (STYLE_START.test(before+after))
285
285
-
return true
286
286
-
}
287
287
-
288
288
-
let ARG_REGEX = /.*?(?=])/y
289
289
-
let WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y
290
290
-
let CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:```|$)/y // ack
291
291
-
292
292
-
const parse=(text)=>{
293
293
-
let tree = {type: 'ROOT', content: [], prev: 'all_newline'}
294
294
-
current = tree
295
295
-
brackets = 0
296
296
-
297
297
-
// these use REGEX, text
298
298
-
const skip_spaces=()=>{
299
299
-
let pos = REGEX.lastIndex
300
300
-
while (" "===text.charAt(pos))
301
301
-
pos++
302
302
-
REGEX.lastIndex = pos
303
303
-
}
304
304
-
const read_code=()=>{
305
305
-
let pos = REGEX.lastIndex
306
306
-
CODE_REGEX.lastIndex = pos
307
307
-
let [, lang, code] = CODE_REGEX.exec(text)
308
308
-
REGEX.lastIndex = CODE_REGEX.lastIndex
309
309
-
return [lang, code]
310
310
-
}
311
311
-
312
312
-
let rargs
313
313
-
const read_args=()=>{
314
314
-
let pos = REGEX.lastIndex
315
315
-
let next = text.charAt(pos)
316
316
-
if ("["!==next)
317
317
-
return rargs = NO_ARGS
318
318
-
ARG_REGEX.lastIndex = pos+1
319
319
-
let argstr = ARG_REGEX.exec(text)
320
320
-
if (!argstr)
321
321
-
return rargs = NO_ARGS
322
322
-
REGEX.lastIndex = ARG_REGEX.lastIndex+1
323
323
-
return rargs = parse_args(argstr[0])
324
324
-
}
325
325
-
326
326
-
let body
327
327
-
const read_body=(space=false)=>{
328
328
-
let pos = REGEX.lastIndex
329
329
-
let next = text.charAt(pos)
330
330
-
if ("{"===next) {
331
331
-
if ("\n"===text.charAt(pos+1))
332
332
-
pos++
333
333
-
REGEX.lastIndex = pos+1
334
334
-
return body = true
335
335
-
}
336
336
-
if (space) {
337
337
-
if (" "===next)
338
338
-
REGEX.lastIndex = pos+1
339
339
-
else
340
340
-
return body = false
341
341
-
}
342
342
-
return body = undefined
343
343
-
}
344
344
-
// start a new block
345
345
-
const OPEN=(type, args=null)=>{
346
346
-
current = Object.seal({
347
347
-
type, args, content: [],
348
348
-
body, parent: current,
349
349
-
prev: 'all_newline',
350
350
-
})
351
351
-
if (body)
352
352
-
brackets++
353
353
-
}
354
354
-
const word_maybe=()=>{
355
355
-
if (!body) {
356
356
-
TEXT(read_word())
357
357
-
CLOSE()
358
358
-
}
359
359
-
}
360
360
-
361
361
-
let match
362
362
-
let last = REGEX.lastIndex = 0
363
363
-
const NEVERMIND=()=>{
364
364
-
REGEX.lastIndex = match.index+1
365
365
-
}
366
366
-
const ACCEPT=()=>{
367
367
-
TEXT(text.substring(last, match.index))
368
368
-
last = REGEX.lastIndex
369
369
-
}
370
370
-
const read_word=()=>{
371
371
-
let pos = REGEX.lastIndex
372
372
-
WORD_REGEX.lastIndex = pos
373
373
-
let word = WORD_REGEX.exec(text)
374
374
-
if (!word)
375
375
-
return null
376
376
-
last = REGEX.lastIndex = WORD_REGEX.lastIndex
377
377
-
return word[0]
378
378
-
}
379
379
-
380
380
-
let prev = -1
381
381
-
main: while (match = REGEX.exec(text)) {
382
382
-
// check for infinite loops
383
383
-
if (match.index===prev)
384
384
-
throw ["INFINITE LOOP", match]
385
385
-
prev = match.index
386
386
-
// 2: figure out which token type was matched
387
387
-
let token = match[0]
388
388
-
let group_num = match.indexOf("", 1)-1
389
389
-
let type = GROUPS[group_num]
390
390
-
// 3:
391
391
-
body = null
392
392
-
rargs = null
393
393
-
394
394
-
switch (type) {
395
395
-
case 'TAG': {
396
396
-
read_args()
397
397
-
if (token==='\\link') {
398
398
-
read_body(false)
399
399
-
} else {
400
400
-
read_body(true)
401
401
-
if (NO_ARGS===rargs && false===body) {
402
402
-
NEVERMIND()
403
403
-
continue main
404
404
-
}
405
405
-
}
406
406
-
ACCEPT()
407
407
-
switch (token) { default: {
408
408
-
let args = {text:text.substring(match.index, last), reason:"invalid tag"}
409
409
-
if (body)
410
410
-
OPEN('invalid', args)
411
411
-
else
412
412
-
BLOCK('invalid', args)
413
413
-
} break; case '\\sub': {
414
414
-
OPEN('subscript')
415
415
-
word_maybe()
416
416
-
} break; case '\\sup': {
417
417
-
OPEN('superscript')
418
418
-
word_maybe()
419
419
-
} break; case '\\b': {
420
420
-
OPEN('bold')
421
421
-
word_maybe()
422
422
-
} break; case '\\i': {
423
423
-
OPEN('italic')
424
424
-
word_maybe()
425
425
-
} break; case '\\u': {
426
426
-
OPEN('underline')
427
427
-
word_maybe()
428
428
-
} break; case '\\s': {
429
429
-
OPEN('strikethrough')
430
430
-
word_maybe()
431
431
-
} break; case '\\quote': {
432
432
-
OPEN('quote', {cite: rargs[0]})
433
433
-
} break; case '\\align': {
434
434
-
let a = rargs[0]
435
435
-
if (!['left', 'right', 'center'].includes(a))
436
436
-
a = 'center'
437
437
-
OPEN('align', {align: a})
438
438
-
} break; case '\\spoiler': case '\\h': {
439
439
-
let [label="spoiler"] = rargs
440
440
-
OPEN('spoiler', {label})
441
441
-
} break; case '\\ruby': {
442
442
-
let [txt="true"] = rargs
443
443
-
OPEN('ruby', {text: txt})
444
444
-
word_maybe()
445
445
-
} break; case '\\key': {
446
446
-
OPEN('key')
447
447
-
word_maybe()
448
448
-
} break; case '\\a': {
449
449
-
let id = rargs[0]
450
450
-
id = id ? id.replace(/\W+/g, "-") : null
451
451
-
OPEN('anchor', {id})
452
452
-
body = true // ghhhh?
453
453
-
//BLOCK('anchor', {id})
454
454
-
} break; case '\\link': {
455
455
-
let args = {url: rargs[0]}
456
456
-
if (body) {
457
457
-
OPEN('link', args)
458
458
-
} else {
459
459
-
BLOCK('simple_link', args)
460
460
-
}
461
461
-
}}
462
462
-
} break; case 'STYLE': {
463
463
-
let c = check_style(token, text.charAt(match.index-1)||"\n", text.charAt(REGEX.lastIndex)||"\n")
464
464
-
if (!c) { // no
465
465
-
NEVERMIND()
466
466
-
continue main
467
467
-
}
468
468
-
ACCEPT()
469
469
-
if (true===c) { // open new
470
470
-
OPEN('style', token)
471
471
-
} else { // close
472
472
-
while (current != c)
473
473
-
CLOSE(true)
474
474
-
CLOSE()
475
475
-
}
476
476
-
} break; case 'TABLE_CELL': {
477
477
-
for (let c=current; ; c=c.parent) {
478
478
-
if ('table_cell'===c.type) {
479
479
-
read_args()
480
480
-
skip_spaces()
481
481
-
ACCEPT()
482
482
-
while (current!==c)
483
483
-
CLOSE(true)
484
484
-
CLOSE() // cell
485
485
-
// we don't know whether these are row args or cell args,
486
486
-
// so just pass the raw args directly, and parse them later.
487
487
-
OPEN('table_cell', rargs)
488
488
-
break
489
489
-
}
490
490
-
if ('style'!==c.type) {
491
491
-
NEVERMIND()
492
492
-
continue main
493
493
-
}
494
494
-
}
495
495
-
} break; case 'TABLE_START': {
496
496
-
read_args()
497
497
-
skip_spaces()
498
498
-
ACCEPT()
499
499
-
let args_token = text.substring(match.index, last)
500
500
-
OPEN('table_row', args_token, false) // special OPEN call
501
501
-
OPEN('table_cell', rargs)
502
502
-
} break; case 'NEWLINE': {
503
503
-
ACCEPT()
504
504
-
NEWLINE(true)
505
505
-
body = true // to trigger start_line
506
506
-
} break; case 'HEADING': {
507
507
-
read_args()
508
508
-
read_body(true)
509
509
-
if (NO_ARGS===rargs && false===body) {
510
510
-
NEVERMIND()
511
511
-
continue main
512
512
-
}
513
513
-
ACCEPT()
514
514
-
let level = token.length
515
515
-
let args = {level}
516
516
-
let id = rargs[0]
517
517
-
args.id = id ? id.replace(/\W+/g, "-") : null
518
518
-
// todo: anchor name (and, can this be chosen automatically based on contents?)
519
519
-
OPEN('heading', args)
520
520
-
} break; case 'DIVIDER': {
521
521
-
ACCEPT()
522
522
-
BLOCK('divider')
523
523
-
} break; case 'BLOCK_END': {
524
524
-
ACCEPT()
525
525
-
if (brackets>0) {
526
526
-
while (!current.body)
527
527
-
CLOSE(true)
528
528
-
if ('invalid'===current.type) {
529
529
-
if ("\n}"==token)
530
530
-
NEWLINE(false) // false since we already closed everything
531
531
-
TEXT("}")
532
532
-
}
533
533
-
CLOSE()
534
534
-
} else {
535
535
-
// hack:
536
536
-
if ("\n}"==token)
537
537
-
NEWLINE(true)
538
538
-
TEXT("}")
539
539
-
}
540
540
-
} break; case 'NULL_ENV': {
541
541
-
body = true
542
542
-
ACCEPT()
543
543
-
OPEN('null_env')
544
544
-
current.prev = current.parent.prev
545
545
-
} break; case 'ESCAPED': {
546
546
-
ACCEPT()
547
547
-
if ("\\\n"===token)
548
548
-
NEWLINE(false)
549
549
-
else if ("\\."===token) { // \. is a no-op
550
550
-
// todo: close lists too
551
551
-
//current.content.push("")
552
552
-
current.prev = 'block'
553
553
-
} else
554
554
-
TEXT(token.substring(1))
555
555
-
} break; case 'QUOTE': {
556
556
-
read_args()
557
557
-
read_body(true)
558
558
-
if (NO_ARGS===rargs && false===body) {
559
559
-
NEVERMIND()
560
560
-
continue main
561
561
-
}
562
562
-
ACCEPT()
563
563
-
OPEN('quote', {cite: rargs[0]})
564
564
-
} break; case 'CODE_BLOCK': {
565
565
-
let [lang, code] = read_code()
566
566
-
ACCEPT()
567
567
-
BLOCK('code', {text:code, lang})
568
568
-
} break; case 'INLINE_CODE': {
569
569
-
ACCEPT()
570
570
-
BLOCK('icode', {text: token.replace(/`(`)?/g, "$1")})
571
571
-
} break; case 'EMBED': {
572
572
-
read_args()
573
573
-
ACCEPT()
574
574
-
let url = token.substring(1) // ehh better
575
575
-
let [type, args] = process_embed(url, rargs)
576
576
-
BLOCK(type, args)
577
577
-
} break; case 'LINK': {
578
578
-
read_args()
579
579
-
read_body(false)
580
580
-
ACCEPT()
581
581
-
let url = token
582
582
-
let args = {url}
583
583
-
if (body) {
584
584
-
OPEN('link', args)
585
585
-
} else {
586
586
-
args.text = rargs[0]
587
587
-
BLOCK('simple_link', args)
588
588
-
}
589
589
-
} break; case 'LIST_ITEM': {
590
590
-
read_args()
591
591
-
read_body(true)
592
592
-
if (NO_ARGS===rargs && false===body) {
593
593
-
NEVERMIND()
594
594
-
continue main
595
595
-
}
596
596
-
ACCEPT()
597
597
-
let indent = token.indexOf("-")
598
598
-
OPEN('list_item', {indent})
599
599
-
} }
600
600
-
601
601
-
if (body) {
602
602
-
text = text.substring(last)
603
603
-
last = REGEX.lastIndex = 0
604
604
-
prev = -1
605
605
-
}
606
606
-
} // end of main loop
607
607
-
608
608
-
TEXT(text.substring(last)) // text after last token
609
609
-
610
610
-
while ('ROOT'!==current.type)
611
611
-
CLOSE(true)
612
612
-
if ('newline'===current.prev) //todo: this is repeated
613
613
-
current.content.push("\n")
614
614
-
615
615
-
current = null // my the memory leak!
616
616
-
617
617
-
return tree // technically we could return `current` here and get rid of `tree` entirely
618
618
-
}
619
619
-
620
620
-
this.parse = parse
621
621
-
this.langs = {'12y2': parse}
622
622
-
623
623
-
// what if you want to write like, "{...}". well that's fine
624
624
-
// BUT if you are inside a tag, the } will close it.
625
625
-
// maybe closing tags should need some kind of special syntax?
626
626
-
// \tag{ ... \} >{...\} idk..
627
627
-
// or match paired {}s :
628
628
-
// \tag{ ... {heck} ... } <- closes here
629
629
-
630
630
-
// todo: after parsing a block element: eat the next newline directly
631
631
-
} }
632
632
-
633
633
-
if ('object'==typeof module && module) module.exports = Markup_12y2
+3
-3
testing/auto.html
···
1
1
<!doctype html><html lang=en-QS><meta charset=utf-8><meta name=viewport content="width=device-width, height=device-height, initial-scale=1" id=$meta_viewport>
2
2
<title>Tests 2</title>
3
3
4
4
-
<script src=../parse2.js></script>
4
4
+
<script src=../parse.js></script>
5
5
<script src=../legacy.js></script>
6
6
<script src=parse-ref.js></script>
7
7
<script src=legacy-ref.js></script>
···
93
93
console.log("got "+got+" items ("+nw+" new)")
94
94
}
95
95
96
96
-
let lang = "12y"
96
96
+
let lang = "12y2"
97
97
98
98
load_data([
99
99
{
···
105
105
type:'message',
106
106
fields:'text,values,id,createDate,contentId',
107
107
query:`!valuelike({{m}},{{"${lang}"}})`,
108
108
-
order:'id_desc',
108
108
+
order:'id',
109
109
},
110
110
]).then(async (lmm)=>{
111
111
collect(lmm.content, false, lang)
+1
-1
testing/index.html
···
1
1
<!doctype html><html lang=en-QS><meta charset=utf-8>
2
2
<title>Markup2 Tests</title>
3
3
4
4
-
<script src=../parse2.js></script>
4
4
+
<script src=../parse.js></script>
5
5
<script src=../langs.js></script>
6
6
7
7
<script src=test.js></script>