IRC parsing, tokenization, and state handling in C#
at a1df2ed6b496cfac5770a6bd7e4806ca18bf39a5 343 lines 8.1 kB view raw
1# IRC parser tests 2# splitting messages into usable atoms 3 4# Written in 2015 by Daniel Oaks <daniel@danieloaks.net> 5# 6# To the extent possible under law, the author(s) have dedicated all copyright 7# and related and neighboring rights to this software to the public domain 8# worldwide. This software is distributed without any warranty. 9# 10# You should have received a copy of the CC0 Public Domain Dedication along 11# with this software. If not, see 12# <http://creativecommons.org/publicdomain/zero/1.0/>. 13 14# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed 15# https://github.com/grawity/code/tree/master/lib/tests 16# some of the tests here originate from Mozilla's test vectors, which is public domain 17# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js 18# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here 19# https://github.com/SaberUK/ircparser/tree/master/test 20 21# we follow RFC1459 with regards to multiple ascii spaces splitting atoms: 22# The prefix, command, and all parameters are 23# separated by one (or more) ASCII space character(s) (0x20). 24# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane 25 26tests: 27 # input is the string coming directly from the server to parse 28 29 # the atoms dict has the keys: 30 # * tags: tags dict 31 # tags with no value are an empty string 32 # * source: source string, without single leading colon 33 # * verb: verb string 34 # * params: params split up as a list 35 # if the params key does not exist, assume it is empty 36 # if any other keys do no exist, assume they are null 37 # a key that is null does not exist or is not specified with the 38 # given input string 39 40 # simple 41 - input: "foo bar baz asdf" 42 atoms: 43 verb: "foo" 44 params: 45 - "bar" 46 - "baz" 47 - "asdf" 48 49 # with source 50 - input: ":coolguy foo bar baz asdf" 51 atoms: 52 source: "coolguy" 53 verb: "foo" 54 params: 55 - "bar" 56 - "baz" 57 - "asdf" 58 59 # with trailing param 60 - input: "foo bar baz :asdf quux" 61 atoms: 62 verb: "foo" 63 params: 64 - "bar" 65 - "baz" 66 - "asdf quux" 67 68 - input: "foo bar baz :" 69 atoms: 70 verb: "foo" 71 params: 72 - "bar" 73 - "baz" 74 - "" 75 76 - input: "foo bar baz ::asdf" 77 atoms: 78 verb: "foo" 79 params: 80 - "bar" 81 - "baz" 82 - ":asdf" 83 84 # with source and trailing param 85 - input: ":coolguy foo bar baz :asdf quux" 86 atoms: 87 source: "coolguy" 88 verb: "foo" 89 params: 90 - "bar" 91 - "baz" 92 - "asdf quux" 93 94 - input: ":coolguy foo bar baz : asdf quux " 95 atoms: 96 source: "coolguy" 97 verb: "foo" 98 params: 99 - "bar" 100 - "baz" 101 - " asdf quux " 102 103 - input: ":coolguy PRIVMSG bar :lol :) " 104 atoms: 105 source: "coolguy" 106 verb: "PRIVMSG" 107 params: 108 - "bar" 109 - "lol :) " 110 111 - input: ":coolguy foo bar baz :" 112 atoms: 113 source: "coolguy" 114 verb: "foo" 115 params: 116 - "bar" 117 - "baz" 118 - "" 119 120 - input: ":coolguy foo bar baz : " 121 atoms: 122 source: "coolguy" 123 verb: "foo" 124 params: 125 - "bar" 126 - "baz" 127 - " " 128 129 # with tags 130 - input: "@a=b;c=32;k;rt=ql7 foo" 131 atoms: 132 verb: "foo" 133 tags: 134 "a": "b" 135 "c": "32" 136 "k": 137 "rt": "ql7" 138 139 # with escaped tags 140 - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo" 141 atoms: 142 verb: "foo" 143 tags: 144 "a": "b\\and\nk" 145 "c": "72 45" 146 "d": "gh;764" 147 148 # with tags and source 149 - input: "@c;h=;a=b :quux ab cd" 150 atoms: 151 tags: 152 "c": 153 "h": "" 154 "a": "b" 155 source: "quux" 156 verb: "ab" 157 params: 158 - "cd" 159 160 # different forms of last param 161 - input: ":src JOIN #chan" 162 atoms: 163 source: "src" 164 verb: "JOIN" 165 params: 166 - "#chan" 167 168 - input: ":src JOIN :#chan" 169 atoms: 170 source: "src" 171 verb: "JOIN" 172 params: 173 - "#chan" 174 175 # with and without last param 176 - input: ":src AWAY" 177 atoms: 178 source: "src" 179 verb: "AWAY" 180 181 - input: ":src AWAY " 182 atoms: 183 source: "src" 184 verb: "AWAY" 185 186 # tab is not considered <SPACE> 187 - input: ":cool\tguy foo bar baz" 188 atoms: 189 source: "cool\tguy" 190 verb: "foo" 191 params: 192 - "bar" 193 - "baz" 194 195 # with weird control codes in the source 196 - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz" 197 atoms: 198 source: "coolguy!ag@net\x035w\x03ork.admin" 199 verb: "PRIVMSG" 200 params: 201 - "foo" 202 - "bar baz" 203 204 - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz" 205 atoms: 206 source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin" 207 verb: "PRIVMSG" 208 params: 209 - "foo" 210 - "bar baz" 211 212 - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3" 213 atoms: 214 tags: 215 tag1: "value1" 216 tag2: 217 vendor1/tag3: "value2" 218 vendor2/tag4: "" 219 source: "irc.example.com" 220 verb: "COMMAND" 221 params: 222 - "param1" 223 - "param2" 224 - "param3 param3" 225 226 - input: ":irc.example.com COMMAND param1 param2 :param3 param3" 227 atoms: 228 source: "irc.example.com" 229 verb: "COMMAND" 230 params: 231 - "param1" 232 - "param2" 233 - "param3 param3" 234 235 - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3" 236 atoms: 237 tags: 238 tag1: "value1" 239 tag2: 240 vendor1/tag3: "value2" 241 vendor2/tag4: 242 verb: "COMMAND" 243 params: 244 - "param1" 245 - "param2" 246 - "param3 param3" 247 248 - input: "COMMAND" 249 atoms: 250 verb: "COMMAND" 251 252 # yaml encoding + slashes is fun 253 - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND" 254 atoms: 255 tags: 256 foo: "\\\\;\\s \r\n" 257 verb: "COMMAND" 258 259 # broken messages from unreal 260 - input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters" 261 atoms: 262 source: "gravel.mozilla.org" 263 verb: "432" 264 params: 265 - "#momo" 266 - "Erroneous Nickname: Illegal characters" 267 268 - input: ":gravel.mozilla.org MODE #tckk +n " 269 atoms: 270 source: "gravel.mozilla.org" 271 verb: "MODE" 272 params: 273 - "#tckk" 274 - "+n" 275 276 - input: ":services.esper.net MODE #foo-bar +o foobar " 277 atoms: 278 source: "services.esper.net" 279 verb: "MODE" 280 params: 281 - "#foo-bar" 282 - "+o" 283 - "foobar" 284 285 # tag values should be parsed char-at-a-time to prevent wayward replacements. 286 - input: "@tag1=value\\\\ntest COMMAND" 287 atoms: 288 tags: 289 tag1: "value\\ntest" 290 verb: "COMMAND" 291 292 # If a tag value has a slash followed by a character which doesn't need 293 # to be escaped, the slash should be dropped. 294 - input: "@tag1=value\\1 COMMAND" 295 atoms: 296 tags: 297 tag1: "value1" 298 verb: "COMMAND" 299 300 # A slash at the end of a tag value should be dropped 301 - input: "@tag1=value1\\ COMMAND" 302 atoms: 303 tags: 304 tag1: "value1" 305 verb: "COMMAND" 306 307 # Duplicate tags: Parsers SHOULD disregard all but the final occurence 308 - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND" 309 atoms: 310 tags: 311 tag1: "5" 312 tag2: "3" 313 tag3: "4" 314 verb: "COMMAND" 315 316 # vendored tags can have the same name as a non-vendored tag 317 - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND" 318 atoms: 319 tags: 320 tag1: "5" 321 tag2: "3" 322 tag3: "4" 323 vendor/tag2: "8" 324 verb: "COMMAND" 325 326 # Some parsers handle /MODE in a special way, make sure they do it right 327 - input: ":SomeOp MODE #channel :+i" 328 atoms: 329 source: "SomeOp" 330 verb: "MODE" 331 params: 332 - "#channel" 333 - "+i" 334 335 - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser" 336 atoms: 337 source: "SomeOp" 338 verb: "MODE" 339 params: 340 - "#channel" 341 - "+oo" 342 - "SomeUser" 343 - "AnotherUser"