Transpiler for HTML-in-PowerShell, PSX (like JSX)
1param (
2 $PsxFile
3)
4
5enum TokenType {
6 OPEN_ELEMENT_START # <
7 CLOSE_ELEMENT # >
8 VOID_ELEMENT_CLOSE # />
9 EQUAL # =
10 OPEN_ELEMENT_END # </
11
12 TAG_NAME # [a-zA-Z0-9_-\.]+
13 ATTR_NAME # [a-zA-Z0-9_-\.]+
14 ATTR_VALUE # plain identifier
15 ATTR_VALUE_SCRIPTBLOCK # { ... }
16 ATTR_SPLAT # @VariableName
17
18 EOF
19
20 ILLEGAL # idk
21}
22
23[Flags()] enum ElementState {
24 UNOPENED = 1 # before <
25 OPENED = 2 # <
26
27 AFTER_TAG = 4 # <div
28 AFTER_ATTRIBUTE_NAME = 8 # <div attr
29 AFTER_ATTRIBUTE_EQUALS = 16 # <div attr=
30 AFTER_ATTRIBUTE_VALUE = 32 # <div attr=val, <div
31
32 CLOSED = 64 # <div ...>
33 CLOSED_VOID = 128 # <div .../>
34
35 END_OPENED = 256 # <div ...></
36 END_AFTER_TAG = 512 # <div ...></div
37 END_CLOSED = 1024 # <div ...></div>, <div .../>
38
39 ILLEGAL = 2048
40 VALID_ATTRIBUTE_NAME_START = (4 + 8 + 32 + 2048)
41}
42
43class Token {
44 [TokenType] $Type
45 [String] $Literal
46
47 Token($Type, $Literal) {
48 $this.Type = $Type
49 $this.Literal = $Literal
50 }
51
52 [String] ToString() {
53 return "{0} {1}" -f $this.Type, $this.Literal
54 }
55
56 [Boolean] Equals($that) {
57 return (($this.Type -eq $that.Type) -and ($this.Literal -eq $that.Literal))
58 }
59}
60
61class Lexer {
62 [String] $LexInput
63 [Int] $Pos
64 [Int] $NextPos
65 [String] $Char
66
67 [String] $CurrentTag = ''
68 [ElementState] $State = [ElementState]::UNOPENED
69
70 Lexer($LexInput) {
71 $this.LexInput = $LexInput
72 $this.Pos = 0
73 $this.NextPos = 1
74 $this.Char = [String]($LexInput[0])
75 }
76
77 hidden [String] PopChar() {
78 if ($this.NextPos -ge $this.LexInput.Length) {
79 $this.Char = ''
80 } else {
81 $this.Char = $this.LexInput[$this.NextPos]
82 }
83
84 $this.Pos = $this.NextPos
85 $this.NextPos++
86
87 return $this.Char
88 }
89
90 hidden [String] PeekChar() {
91 if ($this.NextPos -ge $this.LexInput.Length) {
92 return ''
93 } else {
94 return $this.LexInput[$this.NextPos]
95 }
96 }
97
98 hidden [Boolean] CharIsIdentifier() {
99 return $this.Char -match '[a-zA-Z0-9_-]'
100 }
101
102 hidden [String] PopIdentifier() {
103 $Start = $this.Pos
104
105 while ($this.CharIsIdentifier()) {
106 $this.PopChar()
107 }
108 # js string.substring is (indexStart, indexEnd)
109 # dotnet String.SubString is (startIndex, length)
110 return $this.LexInput.SubString($Start, $this.Pos-$Start)
111 }
112
113 hidden [String] PeekIdentifier() {
114 $Start = $this.Pos
115 $OldNext = $this.NextPos
116 $OldChar = $this.Char
117
118 while ($this.CharIsIdentifier()) {
119 $this.PopChar()
120 }
121
122 $ret = $this.LexInput.SubString($Start, $this.Pos-$Start)
123
124 # there's a better way to do this but that is for later
125 $this.Pos = $Start
126 $this.NextPos = $OldNext
127 $this.Char = $OldChar
128
129 return $ret
130 }
131
132 hidden [String] PopQuotedAttrValue() {
133 if ($this.Char -notin '"',"'",'{') {
134 $this.Die($this.Char)
135 }
136 $ExpectingQuote = $this.Char -eq '{' ? '}' : $this.Char
137 $this.PopChar() # first "
138 $Start = $this.Pos
139
140 while ($this.PopChar() -ne $ExpectingQuote) {}
141
142 $End = $this.Pos
143 $this.PopChar() # final "
144
145 return $this.LexInput.SubString($Start, $End - $Start)
146 }
147
148 hidden [Void] ConsumeWhitespace() {
149 while ($this.Char -match '\s') {
150 $this.PopChar()
151 }
152 }
153
154 hidden [Void] Die([String] $Char) {
155 throw "Unexpected token $char (char $($this.Pos)) at state $($this.State)"
156 }
157
158 [Token] NextToken() {
159 $Token = $null
160
161 if ($this.Pos -eq $this.LexInput.Length) {
162 return [Token]::new( [TokenType]::EOF, '' )
163 }
164
165 if ($this.State.HasFlag([ElementState]::ILLEGAL)) {
166 throw "Lexer found in illegal state $($this.State)"
167 }
168
169 switch -Regex ($this.Char) {
170 '<' {
171 switch ($this.State) {
172 {$_ -eq [ElementState]::UNOPENED} {
173 $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_START, $this.Char )
174 $this.State = [ElementState]::OPENED
175 }
176 {$_ -eq [ElementState]::CLOSED} {
177 if ($this.PeekChar() -eq '/') {
178 $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_END, $this.Char + $this.PopChar() )
179 $this.State = [ElementState]::END_OPENED
180 }
181 }
182 default {
183 $this.Die('<')
184 }
185 }
186 }
187
188 '/' {
189 switch ($this.State) {
190 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} {
191 if ($this.PeekChar() -eq '>') {
192 $Token = [Token]::new( [TokenType]::VOID_ELEMENT_CLOSE, $this.Char + $this.PopChar() )
193 $this.State = [ElementState]::CLOSED_VOID
194 }
195 }
196 default {
197 $this.Die('/')
198 }
199 }
200 }
201
202 '>' {
203 # check for nested psx here!!!
204 switch ($this.State) {
205 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} {
206 $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char )
207 $this.State = [ElementState]::CLOSED
208 }
209 {$_ -eq [ElementState]::END_AFTER_TAG} {
210 $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char )
211 $this.State = [ElementState]::END_CLOSED
212 }
213 default {
214 $this.Die('>')
215 }
216 }
217
218 $Token = [Token]::new([TokenType]::CLOSE_ELEMENT, $this.Char)
219 }
220
221 {$_ -in '"', "'"} {
222 # TODO: check for nested PowerShell
223 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) {
224 $this.Die($_)
225 }
226
227 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE
228 return [Token]::new( [TokenType]::ATTR_VALUE , $this.PopQuotedAttrValue() )
229 }
230
231 '{' {
232 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) {
233 $this.Die($_)
234 }
235
236 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE
237 return [Token]::new( [TokenType]::ATTR_VALUE_SCRIPTBLOCK , $this.PopQuotedAttrValue() )
238 }
239
240 '@' {
241 $this.PopChar() # drop @
242
243 if ($this.State -notin
244 [ElementState]::AFTER_TAG,
245 [ElementState]::AFTER_ATTRIBUTE_NAME,
246 [ElementState]::AFTER_ATTRIBUTE_VALUE
247 ) {
248 $this.Die('@')
249 }
250
251 return [Token]::new( [TokenType]::ATTR_SPLAT, $this.PopIdentifier() )
252 }
253
254 '[a-zA-Z0-9_-]' {
255 $TokType = $null
256 switch ($this.State) {
257 {$_ -eq [ElementState]::OPENED} {
258 $TokType = [TokenType]::TAG_NAME
259 $this.CurrentTag = $this.PeekIdentifier()
260
261 $this.State = [ElementState]::AFTER_TAG
262 }
263 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} {
264 $TokType = [TokenType]::ATTR_NAME
265
266 $this.State = [ElementState]::AFTER_ATTRIBUTE_NAME
267 }
268 {$_ -eq [ElementState]::AFTER_ATTRIBUTE_EQUALS} {
269 $TokType = [TokenType]::ATTR_VALUE
270
271 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE
272 }
273 {$_ -eq [ElementState]::END_OPENED} {
274 $Cur = $this.CurrentTag
275 $Peek = $this.PeekIdentifier()
276 if ($Cur -ne $Peek) {
277 throw "Start tag name ($Cur) and end tag name ($Peek) doesn't match"
278 }
279
280 $TokType = [TokenType]::TAG_NAME
281
282 $this.State = [ElementState]::END_AFTER_TAG
283 }
284 default {
285 $this.Die( $this.Char )
286 }
287 }
288
289 return [Token]::new($TokType, $this.PopIdentifier())
290 }
291
292 '=' {
293 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_NAME) {
294 $this.Die('=')
295 }
296
297 $Token = [Token]::new([TokenType]::EQUAL, $this.Char)
298
299 $this.State = [ElementState]::AFTER_ATTRIBUTE_EQUALS
300 }
301
302 '\s' {
303 $this.ConsumeWhitespace()
304 return $this.NextToken()
305 }
306
307 default {
308 $Token = [Token]::new([TokenType]::ILLEGAL, $this.LexInput.Substring($this.Pos))
309 $this.State = [ElementState]::ILLEGAL
310 }
311 }
312
313 $this.PopChar()
314 return $Token
315 }
316}