IRC parsing, tokenization, and state handling in C#
1# IRC parser tests
2# splitting messages into usable atoms
3
4# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
5#
6# To the extent possible under law, the author(s) have dedicated all copyright
7# and related and neighboring rights to this software to the public domain
8# worldwide. This software is distributed without any warranty.
9#
10# You should have received a copy of the CC0 Public Domain Dedication along
11# with this software. If not, see
12# <http://creativecommons.org/publicdomain/zero/1.0/>.
13
14# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
15# https://github.com/grawity/code/tree/master/lib/tests
16# some of the tests here originate from Mozilla's test vectors, which is public domain
17# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
18# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
19# https://github.com/SaberUK/ircparser/tree/master/test
20
21# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
22# The prefix, command, and all parameters are
23# separated by one (or more) ASCII space character(s) (0x20).
24# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
25
26tests:
27 # input is the string coming directly from the server to parse
28
29 # the atoms dict has the keys:
30 # * tags: tags dict
31 # tags with no value are an empty string
32 # * source: source string, without single leading colon
33 # * verb: verb string
34 # * params: params split up as a list
35 # if the params key does not exist, assume it is empty
36 # if any other keys do no exist, assume they are null
37 # a key that is null does not exist or is not specified with the
38 # given input string
39
40 # simple
41 - input: "foo bar baz asdf"
42 atoms:
43 verb: "foo"
44 params:
45 - "bar"
46 - "baz"
47 - "asdf"
48
49 # with source
50 - input: ":coolguy foo bar baz asdf"
51 atoms:
52 source: "coolguy"
53 verb: "foo"
54 params:
55 - "bar"
56 - "baz"
57 - "asdf"
58
59 # with trailing param
60 - input: "foo bar baz :asdf quux"
61 atoms:
62 verb: "foo"
63 params:
64 - "bar"
65 - "baz"
66 - "asdf quux"
67
68 - input: "foo bar baz :"
69 atoms:
70 verb: "foo"
71 params:
72 - "bar"
73 - "baz"
74 - ""
75
76 - input: "foo bar baz ::asdf"
77 atoms:
78 verb: "foo"
79 params:
80 - "bar"
81 - "baz"
82 - ":asdf"
83
84 # with source and trailing param
85 - input: ":coolguy foo bar baz :asdf quux"
86 atoms:
87 source: "coolguy"
88 verb: "foo"
89 params:
90 - "bar"
91 - "baz"
92 - "asdf quux"
93
94 - input: ":coolguy foo bar baz : asdf quux "
95 atoms:
96 source: "coolguy"
97 verb: "foo"
98 params:
99 - "bar"
100 - "baz"
101 - " asdf quux "
102
103 - input: ":coolguy PRIVMSG bar :lol :) "
104 atoms:
105 source: "coolguy"
106 verb: "PRIVMSG"
107 params:
108 - "bar"
109 - "lol :) "
110
111 - input: ":coolguy foo bar baz :"
112 atoms:
113 source: "coolguy"
114 verb: "foo"
115 params:
116 - "bar"
117 - "baz"
118 - ""
119
120 - input: ":coolguy foo bar baz : "
121 atoms:
122 source: "coolguy"
123 verb: "foo"
124 params:
125 - "bar"
126 - "baz"
127 - " "
128
129 # with tags
130 - input: "@a=b;c=32;k;rt=ql7 foo"
131 atoms:
132 verb: "foo"
133 tags:
134 "a": "b"
135 "c": "32"
136 "k":
137 "rt": "ql7"
138
139 # with escaped tags
140 - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
141 atoms:
142 verb: "foo"
143 tags:
144 "a": "b\\and\nk"
145 "c": "72 45"
146 "d": "gh;764"
147
148 # with tags and source
149 - input: "@c;h=;a=b :quux ab cd"
150 atoms:
151 tags:
152 "c":
153 "h": ""
154 "a": "b"
155 source: "quux"
156 verb: "ab"
157 params:
158 - "cd"
159
160 # different forms of last param
161 - input: ":src JOIN #chan"
162 atoms:
163 source: "src"
164 verb: "JOIN"
165 params:
166 - "#chan"
167
168 - input: ":src JOIN :#chan"
169 atoms:
170 source: "src"
171 verb: "JOIN"
172 params:
173 - "#chan"
174
175 # with and without last param
176 - input: ":src AWAY"
177 atoms:
178 source: "src"
179 verb: "AWAY"
180
181 - input: ":src AWAY "
182 atoms:
183 source: "src"
184 verb: "AWAY"
185
186 # tab is not considered <SPACE>
187 - input: ":cool\tguy foo bar baz"
188 atoms:
189 source: "cool\tguy"
190 verb: "foo"
191 params:
192 - "bar"
193 - "baz"
194
195 # with weird control codes in the source
196 - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
197 atoms:
198 source: "coolguy!ag@net\x035w\x03ork.admin"
199 verb: "PRIVMSG"
200 params:
201 - "foo"
202 - "bar baz"
203
204 - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
205 atoms:
206 source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
207 verb: "PRIVMSG"
208 params:
209 - "foo"
210 - "bar baz"
211
212 - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
213 atoms:
214 tags:
215 tag1: "value1"
216 tag2:
217 vendor1/tag3: "value2"
218 vendor2/tag4: ""
219 source: "irc.example.com"
220 verb: "COMMAND"
221 params:
222 - "param1"
223 - "param2"
224 - "param3 param3"
225
226 - input: ":irc.example.com COMMAND param1 param2 :param3 param3"
227 atoms:
228 source: "irc.example.com"
229 verb: "COMMAND"
230 params:
231 - "param1"
232 - "param2"
233 - "param3 param3"
234
235 - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
236 atoms:
237 tags:
238 tag1: "value1"
239 tag2:
240 vendor1/tag3: "value2"
241 vendor2/tag4:
242 verb: "COMMAND"
243 params:
244 - "param1"
245 - "param2"
246 - "param3 param3"
247
248 - input: "COMMAND"
249 atoms:
250 verb: "COMMAND"
251
252 # yaml encoding + slashes is fun
253 - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
254 atoms:
255 tags:
256 foo: "\\\\;\\s \r\n"
257 verb: "COMMAND"
258
259 # broken messages from unreal
260 - input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters"
261 atoms:
262 source: "gravel.mozilla.org"
263 verb: "432"
264 params:
265 - "#momo"
266 - "Erroneous Nickname: Illegal characters"
267
268 - input: ":gravel.mozilla.org MODE #tckk +n "
269 atoms:
270 source: "gravel.mozilla.org"
271 verb: "MODE"
272 params:
273 - "#tckk"
274 - "+n"
275
276 - input: ":services.esper.net MODE #foo-bar +o foobar "
277 atoms:
278 source: "services.esper.net"
279 verb: "MODE"
280 params:
281 - "#foo-bar"
282 - "+o"
283 - "foobar"
284
285 # tag values should be parsed char-at-a-time to prevent wayward replacements.
286 - input: "@tag1=value\\\\ntest COMMAND"
287 atoms:
288 tags:
289 tag1: "value\\ntest"
290 verb: "COMMAND"
291
292 # If a tag value has a slash followed by a character which doesn't need
293 # to be escaped, the slash should be dropped.
294 - input: "@tag1=value\\1 COMMAND"
295 atoms:
296 tags:
297 tag1: "value1"
298 verb: "COMMAND"
299
300 # A slash at the end of a tag value should be dropped
301 - input: "@tag1=value1\\ COMMAND"
302 atoms:
303 tags:
304 tag1: "value1"
305 verb: "COMMAND"
306
307 # Duplicate tags: Parsers SHOULD disregard all but the final occurence
308 - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
309 atoms:
310 tags:
311 tag1: "5"
312 tag2: "3"
313 tag3: "4"
314 verb: "COMMAND"
315
316 # vendored tags can have the same name as a non-vendored tag
317 - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
318 atoms:
319 tags:
320 tag1: "5"
321 tag2: "3"
322 tag3: "4"
323 vendor/tag2: "8"
324 verb: "COMMAND"
325
326 # Some parsers handle /MODE in a special way, make sure they do it right
327 - input: ":SomeOp MODE #channel :+i"
328 atoms:
329 source: "SomeOp"
330 verb: "MODE"
331 params:
332 - "#channel"
333 - "+i"
334
335 - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
336 atoms:
337 source: "SomeOp"
338 verb: "MODE"
339 params:
340 - "#channel"
341 - "+oo"
342 - "SomeUser"
343 - "AnotherUser"