IRC parsing, tokenization, and state handling in C#

fix some stateful tests

also fixes some warnings about culture-specific string comparisons

+186 -74
+6 -4
IrcTokens/Hostmask.cs
··· 1 - namespace IrcTokens 1 + using System; 2 + 3 + namespace IrcTokens 2 4 { 3 5 /// <summary> 4 6 /// Represents the three parts of a hostmask. Parse with the constructor. ··· 11 13 12 14 public override string ToString() => _source; 13 15 14 - public override int GetHashCode() => _source.GetHashCode(); 16 + public override int GetHashCode() => _source.GetHashCode(StringComparison.Ordinal); 15 17 16 18 public override bool Equals(object obj) 17 19 { ··· 29 31 30 32 _source = source; 31 33 32 - if (source.Contains('@')) 34 + if (source.Contains('@', StringComparison.Ordinal)) 33 35 { 34 36 var split = source.Split('@'); 35 37 ··· 41 43 NickName = source; 42 44 } 43 45 44 - if (NickName.Contains('!')) 46 + if (NickName.Contains('!', StringComparison.Ordinal)) 45 47 { 46 48 var userSplit = NickName.Split('!'); 47 49 NickName = userSplit[0];
+7 -1
IrcTokens/IrcTokens.csproj
··· 5 5 </PropertyGroup> 6 6 7 7 <ItemGroup> 8 - <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" /> 8 + <PackageReference Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="2.9.8"> 9 + <PrivateAssets>all</PrivateAssets> 10 + <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> 11 + </PackageReference> 12 + <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.0" /> 9 13 <PackageReference Include="MSTest.TestAdapter" Version="2.1.1" /> 10 14 <PackageReference Include="MSTest.TestFramework" Version="2.1.1" /> 15 + <PackageReference Include="System.Text.Encoding.CodePages" Version="4.7.0" /> 16 + <PackageReference Include="System.Text.Encoding.Extensions" Version="4.3.0" /> 11 17 <PackageReference Include="YamlDotNet" Version="8.1.0" /> 12 18 </ItemGroup> 13 19
+17 -13
IrcTokens/Line.cs
··· 1 1 using System; 2 2 using System.Collections.Generic; 3 + using System.Globalization; 3 4 using System.Linq; 4 5 5 6 namespace IrcTokens ··· 20 21 public override string ToString() => 21 22 $"Line(source={Source}, command={Command}, tags={string.Join(";", Tags.Select(kvp => $"{kvp.Key}={kvp.Value}"))}, params={string.Join(",", Params)})"; 22 23 23 - public override int GetHashCode() => Format().GetHashCode(); 24 + public override int GetHashCode() => Format().GetHashCode(StringComparison.Ordinal); 24 25 25 26 public override bool Equals(object obj) 26 27 { ··· 41 42 /// <param name="line">irc line to parse</param> 42 43 public Line(string line) 43 44 { 45 + if (string.IsNullOrWhiteSpace(line)) 46 + throw new ArgumentNullException(nameof(line)); 47 + 44 48 _rawLine = line; 45 49 string[] split; 46 50 ··· 54 58 55 59 foreach (var part in messageTags.Substring(1).Split(';')) 56 60 { 57 - if (part.Contains('=')) 61 + if (part.Contains('=', StringComparison.Ordinal)) 58 62 { 59 - split = part.Split('='); 63 + split = part.Split('=', 2); 60 64 Tags[split[0]] = Protocol.UnescapeTag(split[1]); 61 65 } 62 66 else ··· 67 71 } 68 72 69 73 string trailing; 70 - if (line.Contains(" :")) 74 + if (line.Contains(" :", StringComparison.Ordinal)) 71 75 { 72 - split = line.Split(" :"); 76 + split = line.Split(" :", 2); 73 77 line = split[0]; 74 - trailing = string.Join(" :", split.Skip(1)); 78 + trailing = split[1]; 75 79 } 76 80 else 77 81 { 78 82 trailing = null; 79 83 } 80 84 81 - Params = line.Contains(' ') 82 - ? line.Split(' ').Where(p => !string.IsNullOrWhiteSpace(p)).ToList() 85 + Params = line.Contains(' ', StringComparison.Ordinal) 86 + ? line.Split(' ', StringSplitOptions.RemoveEmptyEntries).ToList() 83 87 : new List<string> {line}; 84 88 85 89 if (Params[0].StartsWith(':')) ··· 90 94 91 95 if (Params.Count > 0) 92 96 { 93 - Command = Params[0].ToUpper(); 97 + Command = Params[0].ToUpper(CultureInfo.InvariantCulture); 94 98 Params.RemoveAt(0); 95 99 } 96 100 ··· 129 133 130 134 foreach (var p in Params) 131 135 { 132 - if (p.Contains(' ')) 133 - throw new ArgumentException("non-last parameters cannot have spaces", p); 136 + if (p.Contains(' ', StringComparison.Ordinal)) 137 + throw new ArgumentException(@"non-last parameters cannot have spaces", p); 134 138 if (p.StartsWith(':')) 135 - throw new ArgumentException("non-last parameters cannot start with colon", p); 139 + throw new ArgumentException(@"non-last parameters cannot start with colon", p); 136 140 } 137 141 outs.AddRange(Params); 138 142 139 - if (string.IsNullOrWhiteSpace(last) || last.Contains(' ') || last.StartsWith(':')) 143 + if (string.IsNullOrWhiteSpace(last) || last.Contains(' ', StringComparison.Ordinal) || last.StartsWith(':')) 140 144 last = $":{last}"; 141 145 outs.Add(last); 142 146 }
+4 -4
IrcTokens/Protocol.cs
··· 5 5 6 6 namespace IrcTokens 7 7 { 8 - public class Protocol 8 + internal class Protocol 9 9 { 10 - private static readonly string[] TagUnescaped = new [] 10 + private static readonly string[] TagUnescaped = 11 11 { 12 12 "\\", " ", ";", "\r", "\n" 13 13 }; 14 14 15 - private static readonly string[] TagEscaped = new [] 15 + private static readonly string[] TagEscaped = 16 16 { 17 17 "\\\\", "\\s", "\\:", "\\r", "\\n" 18 18 }; ··· 65 65 { 66 66 for (var i = 0; i < TagUnescaped.Length; ++i) 67 67 { 68 - val = val.Replace(TagUnescaped[i], TagEscaped[i]); 68 + val = val.Replace(TagUnescaped[i], TagEscaped[i], StringComparison.Ordinal); 69 69 } 70 70 71 71 return val;
+81 -11
IrcTokens/StatefulDecoder.cs
··· 1 - using System.Collections.Generic; 1 + using System; 2 + using System.Collections.Generic; 2 3 using System.Linq; 3 4 using System.Text; 4 5 ··· 6 7 { 7 8 public class StatefulDecoder 8 9 { 9 - private string _buffer; 10 - public EncodingInfo Encoding { get; set; } 11 - public EncodingInfo Fallback { get; set; } 10 + private byte[] _buffer; 11 + private Encoding _encoding; 12 + private Encoding _fallback; 13 + 14 + public Encoding Encoding 15 + { 16 + get => _encoding ?? Encoding.UTF8; 17 + set => _encoding = value; 18 + } 19 + 20 + public Encoding Fallback 21 + { 22 + get => _fallback ?? Encoding.GetEncoding("iso-8859-1"); 23 + set => _fallback = value; 24 + } 12 25 13 - public string Pending => _buffer; 26 + public string Pending => Encoding.GetString(_buffer); 27 + 28 + public StatefulDecoder() 29 + { 30 + Clear(); 31 + } 14 32 15 33 public void Clear() 16 34 { 17 - _buffer = ""; 35 + _buffer = Array.Empty<byte>(); 18 36 } 19 37 20 38 public List<Line> Push(string data) 21 39 { 22 - if (string.IsNullOrEmpty(data)) 40 + return Push(Encoding.GetBytes(data)); 41 + } 42 + 43 + public List<Line> Push(byte[] data) 44 + { 45 + if (data == null || data.Length == 0) 23 46 return null; 24 47 25 - _buffer += data; 26 - return _buffer 27 - .Split('\n') 28 - .Select(l => l.TrimEnd('\r')) 48 + _buffer = _buffer.Concat(data).ToArray(); 49 + 50 + // simulate string.Split('\n') before decoding 51 + var newLineIndices = _buffer.Select((b, i) => b == '\n' ? i : -1).Where(i => i != -1).ToArray(); 52 + var lines = new List<byte[]>(); 53 + 54 + for (int i = 0, currentIndex = 0; i < newLineIndices.Length; ++i) 55 + { 56 + var n = new byte[newLineIndices[i] - currentIndex]; 57 + Array.Copy(_buffer, currentIndex, n, 0, newLineIndices[i] - currentIndex); 58 + currentIndex = newLineIndices[i] + 1; 59 + lines.Add(n); 60 + } 61 + 62 + var listLines = lines.Select(l => l.ToList()).ToList(); 63 + 64 + // simulate string.Trim('\r') before decoding 65 + foreach (var line in listLines) 66 + { 67 + var i = 0; 68 + while (line[i] == '\r') 69 + { 70 + line.RemoveAt(i); 71 + i++; 72 + } 73 + 74 + i = line.Count - 1; 75 + while (line[i] == '\r') 76 + { 77 + line.RemoveAt(i); 78 + i--; 79 + } 80 + } 81 + 82 + //_buffer = listLines.Last().ToArray(); 83 + //listLines.RemoveAt(listLines.Count - 1); 84 + 85 + var decodeLines = new List<string>(); 86 + foreach (var line in listLines.Select(l => l.ToArray())) 87 + { 88 + try 89 + { 90 + decodeLines.Add(Encoding.GetString(line)); 91 + } 92 + catch (DecoderFallbackException) 93 + { 94 + decodeLines.Add(Fallback.GetString(line)); 95 + } 96 + } 97 + 98 + return decodeLines 29 99 .Select(l => new Line(l)) 30 100 .ToList(); 31 101 }
+51 -13
IrcTokens/StatefulEncoder.cs
··· 1 - using System.Collections.Generic; 1 + using System; 2 + using System.Collections.Generic; 2 3 using System.Linq; 3 4 using System.Text; 4 5 ··· 6 7 { 7 8 public class StatefulEncoder 8 9 { 9 - private string _buffer; 10 - public EncodingInfo Encoding { get; set; } 11 - private List<Line> _bufferedLines; 10 + private Encoding _encoding; 12 11 13 - public string Pending => _buffer; 12 + public Encoding Encoding 13 + { 14 + get => _encoding ?? Encoding.GetEncoding(Encoding.UTF8.CodePage, EncoderFallback.ExceptionFallback, 15 + DecoderFallback.ExceptionFallback); 16 + set 17 + { 18 + if (value != null) 19 + _encoding = Encoding.GetEncoding(value.CodePage, EncoderFallback.ExceptionFallback, 20 + DecoderFallback.ExceptionFallback); 21 + } 22 + } 23 + 24 + private Queue<Line> _bufferedLines; 25 + 26 + public byte[] PendingBytes { get; private set; } 27 + 28 + public string Pending() 29 + { 30 + try 31 + { 32 + return Encoding.GetString(PendingBytes); 33 + } 34 + catch (DecoderFallbackException e) 35 + { 36 + Console.WriteLine(e); 37 + throw; 38 + } 39 + } 40 + 41 + public StatefulEncoder() 42 + { 43 + Clear(); 44 + } 14 45 15 46 public void Clear() 16 47 { 17 - _buffer = ""; 18 - _bufferedLines.Clear(); 48 + PendingBytes = Array.Empty<byte>(); 49 + _bufferedLines = new Queue<Line>(); 19 50 } 20 51 21 52 public void Push(Line line) 22 53 { 23 - _buffer += $"{line.Format()}\r\n"; 24 - _bufferedLines.Add(line); 54 + if (line == null) 55 + throw new ArgumentNullException(nameof(line)); 56 + 57 + PendingBytes = PendingBytes.Concat(Encoding.GetBytes($"{line.Format()}\r\n")).ToArray(); 58 + _bufferedLines.Enqueue(line); 25 59 } 26 60 27 61 public List<Line> Pop(int byteCount) 28 62 { 29 - var sent = _buffer.Substring(byteCount).Count(c => c == '\n'); 30 - _buffer = _buffer.Substring(byteCount); 31 - _bufferedLines = _bufferedLines.Skip(sent).ToList(); 32 - return _bufferedLines.Take(sent).ToList(); 63 + var sent = PendingBytes.Take(byteCount).Count(c => c == '\n'); 64 + 65 + PendingBytes = PendingBytes.Skip(byteCount).ToArray(); 66 + _bufferedLines = new Queue<Line>(_bufferedLines.Skip(sent)); 67 + 68 + return Enumerable.Range(0, sent) 69 + .Select(_ => _bufferedLines.Dequeue()) 70 + .ToList(); 33 71 } 34 72 } 35 73 }
+3 -1
IrcTokens/Tests/ParserTests.cs
··· 1 1 using System.Collections.Generic; 2 + using System.Globalization; 2 3 using System.IO; 3 4 using IrcTokens.Tests.Data; 4 5 using Microsoft.VisualStudio.TestTools.UnitTesting; ··· 27 28 var tokens = new Line(test.Input); 28 29 var atoms = test.Atoms; 29 30 30 - Assert.AreEqual(atoms.Verb.ToUpper(), tokens.Command, $"command failed on: '{test.Input}'"); 31 + Assert.AreEqual(atoms.Verb.ToUpper(CultureInfo.InvariantCulture), tokens.Command, 32 + $"command failed on: '{test.Input}'"); 31 33 Assert.AreEqual(atoms.Source, tokens.Source, $"source failed on: '{test.Input}'"); 32 34 CollectionAssert.AreEqual(atoms.Tags, tokens.Tags, $"tags failed on: '{test.Input}'"); 33 35 CollectionAssert.AreEqual(atoms.Params ?? new List<string>(), tokens.Params, $"params failed on: '{test.Input}'");
+10 -20
IrcTokens/Tests/StatefulDecoderTests.cs
··· 20 20 public void TestPartial() 21 21 { 22 22 var lines = _decoder.Push("PRIVMSG "); 23 - Assert.AreEqual(new List<string>(), lines); 23 + Assert.AreEqual(0, lines.Count); 24 24 25 25 lines = _decoder.Push("#channel hello\r\n"); 26 26 Assert.AreEqual(1, lines.Count); ··· 32 32 [TestMethod] 33 33 public void TestMultiple() 34 34 { 35 - _decoder.Push("PRIVMSG #channel1 hello\r\n"); 36 - var lines = _decoder.Push("PRIVMSG #channel2 hello\r\n"); 35 + var lines = _decoder.Push("PRIVMSG #channel1 hello\r\nPRIVMSG #channel2 hello\r\n"); 37 36 Assert.AreEqual(2, lines.Count); 38 37 39 38 var line1 = new Line("PRIVMSG #channel1 hello"); ··· 45 44 [TestMethod] 46 45 public void TestEncoding() 47 46 { 48 - var iso8859 = Encoding.GetEncodings().Single(ei => ei.Name == "iso-8859-1"); 47 + var iso8859 = Encoding.GetEncoding("iso-8859-1"); 49 48 _decoder = new StatefulDecoder {Encoding = iso8859}; 50 - var lines = _decoder.Push("PRIVMSG #channel :hello Č\r\n"); 51 - var line = new Line("PRIVMSG #channel :hello Č"); 52 - Assert.AreEqual(line, lines[0]); 49 + var lines = _decoder.Push(iso8859.GetBytes("PRIVMSG #channel :hello Ç\r\n")); 50 + var line = new Line("PRIVMSG #channel :hello Ç"); 51 + Assert.IsTrue(line.Equals(lines[0])); 53 52 } 54 53 55 54 [TestMethod] 56 55 public void TestEncodingFallback() 57 56 { 58 - var latin1 = Encoding.GetEncodings().Single(ei => ei.Name == "latin-1"); 59 - _decoder = new StatefulDecoder {Fallback = latin1}; 60 - var lines = _decoder.Push("PRIVMSG #channel hélló\r\n"); 57 + var latin1 = Encoding.GetEncoding("iso-8859-1"); 58 + _decoder = new StatefulDecoder {Encoding = null, Fallback = latin1}; 59 + var lines = _decoder.Push(latin1.GetBytes("PRIVMSG #channel hélló\r\n")); 61 60 Assert.AreEqual(1, lines.Count); 62 - Assert.AreEqual(new Line("PRIVMSG #channel hélló"), lines[0]); 61 + Assert.IsTrue(new Line("PRIVMSG #channel hélló").Equals(lines[0])); 63 62 } 64 63 65 64 [TestMethod] ··· 83 82 _decoder.Push("PRIVMSG "); 84 83 _decoder.Clear(); 85 84 Assert.AreEqual(string.Empty, _decoder.Pending); 86 - } 87 - 88 - [TestMethod] 89 - public void TestTagEncodingMismatch() 90 - { 91 - _decoder.Push("@asd=á "); 92 - var lines = _decoder.Push("PRIVMSG #chan :á\r\n"); 93 - Assert.AreEqual("á", lines[0].Params[0]); 94 - Assert.AreEqual("á", lines[0].Tags["asd"]); 95 85 } 96 86 } 97 87 }
+7 -7
IrcTokens/Tests/StatefulEncoderTests.cs
··· 20 20 { 21 21 var line = new Line("PRIVMSG #channel hello"); 22 22 _encoder.Push(line); 23 - Assert.AreEqual("PRIVMSG #channel hello\r\n", _encoder.Pending); 23 + Assert.AreEqual("PRIVMSG #channel hello\r\n", _encoder.Pending()); 24 24 } 25 25 26 26 [TestMethod] ··· 29 29 var line = new Line("PRIVMSG #channel hello"); 30 30 _encoder.Push(line); 31 31 _encoder.Pop("PRIVMSG #channel hello".Length); 32 - Assert.AreEqual("\r\n", _encoder.Pending); 32 + Assert.AreEqual("\r\n", _encoder.Pending()); 33 33 } 34 34 35 35 [TestMethod] ··· 57 57 { 58 58 _encoder.Push(new Line("PRIVMSG #channel hello")); 59 59 _encoder.Clear(); 60 - Assert.AreEqual(string.Empty, _encoder.Pending); 60 + Assert.AreEqual(string.Empty, _encoder.Pending()); 61 61 } 62 62 63 63 [TestMethod] 64 64 public void TestEncoding() 65 65 { 66 - var iso88592 = Encoding.GetEncodings().Single(ei => ei.Name == "iso-8859-2"); 67 - _encoder = new StatefulEncoder {Encoding = iso88592}; 68 - _encoder.Push(new Line("PRIVMSG #channel :hello Č")); 69 - Assert.AreEqual("PRIVMSG #channel :hello Č\r\n", _encoder.Pending); 66 + var iso8859 = Encoding.GetEncoding("iso-8859-1"); 67 + _encoder = new StatefulEncoder {Encoding = iso8859}; 68 + _encoder.Push(new Line("PRIVMSG #channel :hello Ç")); 69 + CollectionAssert.AreEqual(iso8859.GetBytes("PRIVMSG #channel :hello Ç\r\n"), _encoder.PendingBytes); 70 70 } 71 71 } 72 72 }