Transpiler for HTML-in-PowerShell, PSX (like JSX)
at master 316 lines 7.4 kB view raw
1param ( 2 $PsxFile 3) 4 5enum TokenType { 6 OPEN_ELEMENT_START # < 7 CLOSE_ELEMENT # > 8 VOID_ELEMENT_CLOSE # /> 9 EQUAL # = 10 OPEN_ELEMENT_END # </ 11 12 TAG_NAME # [a-zA-Z0-9_-\.]+ 13 ATTR_NAME # [a-zA-Z0-9_-\.]+ 14 ATTR_VALUE # plain identifier 15 ATTR_VALUE_SCRIPTBLOCK # { ... } 16 ATTR_SPLAT # @VariableName 17 18 EOF 19 20 ILLEGAL # idk 21} 22 23[Flags()] enum ElementState { 24 UNOPENED = 1 # before < 25 OPENED = 2 # < 26 27 AFTER_TAG = 4 # <div 28 AFTER_ATTRIBUTE_NAME = 8 # <div attr 29 AFTER_ATTRIBUTE_EQUALS = 16 # <div attr= 30 AFTER_ATTRIBUTE_VALUE = 32 # <div attr=val, <div 31 32 CLOSED = 64 # <div ...> 33 CLOSED_VOID = 128 # <div .../> 34 35 END_OPENED = 256 # <div ...></ 36 END_AFTER_TAG = 512 # <div ...></div 37 END_CLOSED = 1024 # <div ...></div>, <div .../> 38 39 ILLEGAL = 2048 40 VALID_ATTRIBUTE_NAME_START = (4 + 8 + 32 + 2048) 41} 42 43class Token { 44 [TokenType] $Type 45 [String] $Literal 46 47 Token($Type, $Literal) { 48 $this.Type = $Type 49 $this.Literal = $Literal 50 } 51 52 [String] ToString() { 53 return "{0} {1}" -f $this.Type, $this.Literal 54 } 55 56 [Boolean] Equals($that) { 57 return (($this.Type -eq $that.Type) -and ($this.Literal -eq $that.Literal)) 58 } 59} 60 61class Lexer { 62 [String] $LexInput 63 [Int] $Pos 64 [Int] $NextPos 65 [String] $Char 66 67 [String] $CurrentTag = '' 68 [ElementState] $State = [ElementState]::UNOPENED 69 70 Lexer($LexInput) { 71 $this.LexInput = $LexInput 72 $this.Pos = 0 73 $this.NextPos = 1 74 $this.Char = [String]($LexInput[0]) 75 } 76 77 hidden [String] PopChar() { 78 if ($this.NextPos -ge $this.LexInput.Length) { 79 $this.Char = '' 80 } else { 81 $this.Char = $this.LexInput[$this.NextPos] 82 } 83 84 $this.Pos = $this.NextPos 85 $this.NextPos++ 86 87 return $this.Char 88 } 89 90 hidden [String] PeekChar() { 91 if ($this.NextPos -ge $this.LexInput.Length) { 92 return '' 93 } else { 94 return $this.LexInput[$this.NextPos] 95 } 96 } 97 98 hidden [Boolean] CharIsIdentifier() { 99 return $this.Char -match '[a-zA-Z0-9_-]' 100 } 101 102 hidden [String] PopIdentifier() { 103 $Start = $this.Pos 104 105 while ($this.CharIsIdentifier()) { 106 $this.PopChar() 107 } 108 # js string.substring is (indexStart, indexEnd) 109 # dotnet String.SubString is (startIndex, length) 110 return $this.LexInput.SubString($Start, $this.Pos-$Start) 111 } 112 113 hidden [String] PeekIdentifier() { 114 $Start = $this.Pos 115 $OldNext = $this.NextPos 116 $OldChar = $this.Char 117 118 while ($this.CharIsIdentifier()) { 119 $this.PopChar() 120 } 121 122 $ret = $this.LexInput.SubString($Start, $this.Pos-$Start) 123 124 # there's a better way to do this but that is for later 125 $this.Pos = $Start 126 $this.NextPos = $OldNext 127 $this.Char = $OldChar 128 129 return $ret 130 } 131 132 hidden [String] PopQuotedAttrValue() { 133 if ($this.Char -notin '"',"'",'{') { 134 $this.Die($this.Char) 135 } 136 $ExpectingQuote = $this.Char -eq '{' ? '}' : $this.Char 137 $this.PopChar() # first " 138 $Start = $this.Pos 139 140 while ($this.PopChar() -ne $ExpectingQuote) {} 141 142 $End = $this.Pos 143 $this.PopChar() # final " 144 145 return $this.LexInput.SubString($Start, $End - $Start) 146 } 147 148 hidden [Void] ConsumeWhitespace() { 149 while ($this.Char -match '\s') { 150 $this.PopChar() 151 } 152 } 153 154 hidden [Void] Die([String] $Char) { 155 throw "Unexpected token $char (char $($this.Pos)) at state $($this.State)" 156 } 157 158 [Token] NextToken() { 159 $Token = $null 160 161 if ($this.Pos -eq $this.LexInput.Length) { 162 return [Token]::new( [TokenType]::EOF, '' ) 163 } 164 165 if ($this.State.HasFlag([ElementState]::ILLEGAL)) { 166 throw "Lexer found in illegal state $($this.State)" 167 } 168 169 switch -Regex ($this.Char) { 170 '<' { 171 switch ($this.State) { 172 {$_ -eq [ElementState]::UNOPENED} { 173 $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_START, $this.Char ) 174 $this.State = [ElementState]::OPENED 175 } 176 {$_ -eq [ElementState]::CLOSED} { 177 if ($this.PeekChar() -eq '/') { 178 $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_END, $this.Char + $this.PopChar() ) 179 $this.State = [ElementState]::END_OPENED 180 } 181 } 182 default { 183 $this.Die('<') 184 } 185 } 186 } 187 188 '/' { 189 switch ($this.State) { 190 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { 191 if ($this.PeekChar() -eq '>') { 192 $Token = [Token]::new( [TokenType]::VOID_ELEMENT_CLOSE, $this.Char + $this.PopChar() ) 193 $this.State = [ElementState]::CLOSED_VOID 194 } 195 } 196 default { 197 $this.Die('/') 198 } 199 } 200 } 201 202 '>' { 203 # check for nested psx here!!! 204 switch ($this.State) { 205 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { 206 $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) 207 $this.State = [ElementState]::CLOSED 208 } 209 {$_ -eq [ElementState]::END_AFTER_TAG} { 210 $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) 211 $this.State = [ElementState]::END_CLOSED 212 } 213 default { 214 $this.Die('>') 215 } 216 } 217 218 $Token = [Token]::new([TokenType]::CLOSE_ELEMENT, $this.Char) 219 } 220 221 {$_ -in '"', "'"} { 222 # TODO: check for nested PowerShell 223 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { 224 $this.Die($_) 225 } 226 227 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 228 return [Token]::new( [TokenType]::ATTR_VALUE , $this.PopQuotedAttrValue() ) 229 } 230 231 '{' { 232 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { 233 $this.Die($_) 234 } 235 236 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 237 return [Token]::new( [TokenType]::ATTR_VALUE_SCRIPTBLOCK , $this.PopQuotedAttrValue() ) 238 } 239 240 '@' { 241 $this.PopChar() # drop @ 242 243 if ($this.State -notin 244 [ElementState]::AFTER_TAG, 245 [ElementState]::AFTER_ATTRIBUTE_NAME, 246 [ElementState]::AFTER_ATTRIBUTE_VALUE 247 ) { 248 $this.Die('@') 249 } 250 251 return [Token]::new( [TokenType]::ATTR_SPLAT, $this.PopIdentifier() ) 252 } 253 254 '[a-zA-Z0-9_-]' { 255 $TokType = $null 256 switch ($this.State) { 257 {$_ -eq [ElementState]::OPENED} { 258 $TokType = [TokenType]::TAG_NAME 259 $this.CurrentTag = $this.PeekIdentifier() 260 261 $this.State = [ElementState]::AFTER_TAG 262 } 263 {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { 264 $TokType = [TokenType]::ATTR_NAME 265 266 $this.State = [ElementState]::AFTER_ATTRIBUTE_NAME 267 } 268 {$_ -eq [ElementState]::AFTER_ATTRIBUTE_EQUALS} { 269 $TokType = [TokenType]::ATTR_VALUE 270 271 $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE 272 } 273 {$_ -eq [ElementState]::END_OPENED} { 274 $Cur = $this.CurrentTag 275 $Peek = $this.PeekIdentifier() 276 if ($Cur -ne $Peek) { 277 throw "Start tag name ($Cur) and end tag name ($Peek) doesn't match" 278 } 279 280 $TokType = [TokenType]::TAG_NAME 281 282 $this.State = [ElementState]::END_AFTER_TAG 283 } 284 default { 285 $this.Die( $this.Char ) 286 } 287 } 288 289 return [Token]::new($TokType, $this.PopIdentifier()) 290 } 291 292 '=' { 293 if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_NAME) { 294 $this.Die('=') 295 } 296 297 $Token = [Token]::new([TokenType]::EQUAL, $this.Char) 298 299 $this.State = [ElementState]::AFTER_ATTRIBUTE_EQUALS 300 } 301 302 '\s' { 303 $this.ConsumeWhitespace() 304 return $this.NextToken() 305 } 306 307 default { 308 $Token = [Token]::new([TokenType]::ILLEGAL, $this.LexInput.Substring($this.Pos)) 309 $this.State = [ElementState]::ILLEGAL 310 } 311 } 312 313 $this.PopChar() 314 return $Token 315 } 316}