···11+#!/usr/bin/env python3
22+"""
33+Parse Swift Benchmark markdown (produced by `swift package benchmark ... --format markdown`) and produce compact summary
44+markdown tables for Decoding and Encoding using the Time (total CPU) p50 values.
55+66+Usage:
77+ - Read from stdin:
88+ swift package benchmark baseline compare swiftcbor --format markdown --no-progress | python3 bench_compare.py
99+ - Or read from file:
1010+ python3 bench_compare.py benchmark.md
1111+1212+The script prints two markdown tables (Decoding and Encoding) to stdout.
1313+"""
1414+1515+import sys
1616+import re
1717+from pathlib import Path
1818+1919+2020+def parse_markdown(text):
2121+ lines = text.splitlines()
2222+ results = {"Decoding": {}, "Encoding": {}}
2323+ mode = None
2424+ i = 0
2525+ while i < len(lines):
2626+ line = lines[i]
2727+ if line.strip().startswith('## Decoding'):
2828+ mode = 'Decoding'
2929+ i += 1
3030+ continue
3131+ if line.strip().startswith('## Encoding'):
3232+ mode = 'Encoding'
3333+ i += 1
3434+ continue
3535+3636+ m = re.match(r"###\s+(.+?)\s+metrics", line)
3737+ if m and mode:
3838+ bench = m.group(1).strip()
3939+ # look ahead for Time (total CPU) table
4040+ j = i + 1
4141+ while j < len(lines) and not lines[j].strip().startswith('###') and not lines[j].strip().startswith('## '):
4242+ lh = lines[j]
4343+ # find the section header line containing "Time (total CPU)"
4444+ m2 = re.search(r"Time\s*\(total CPU\)\s*(?:\(([^)]+)\))?", lh)
4545+ if m2:
4646+ unit = m2.group(1) if m2.group(1) else ''
4747+ # find the header row that contains p0/p25/p50 etc.
4848+ k = j + 1
4949+ while k < len(lines) and lines[k].strip() == '':
5050+ k += 1
5151+ header_line_index = None
5252+ p50_idx = None
5353+ for t in range(k, min(k + 60, len(lines))):
5454+ if 'p50' in lines[t]:
5555+ cols = [c.strip() for c in lines[t].split('|')][1:-1]
5656+ # locate p50 column
5757+ try:
5858+ p50_idx = next(idx for idx, c in enumerate(cols) if c.startswith('p50'))
5959+ except StopIteration:
6060+ p50_idx = None
6161+ header_line_index = t
6262+ break
6363+ swift_val = None
6464+ curr_val = None
6565+ if header_line_index is not None and p50_idx is not None:
6666+ # parse following rows to find swiftcbor and Current_run
6767+ for t in range(header_line_index + 1, header_line_index + 60):
6868+ if t >= len(lines):
6969+ break
7070+ row = lines[t]
7171+ if not row.strip().startswith('|'):
7272+ continue
7373+ cols = [c.strip() for c in row.split('|')][1:-1]
7474+ if not cols:
7575+ continue
7676+ name = cols[0]
7777+ # defensive check
7878+ if p50_idx < len(cols):
7979+ if 'swiftcbor' in name:
8080+ swift_val = cols[p50_idx]
8181+ if 'Current_run' in name:
8282+ curr_val = cols[p50_idx]
8383+ if swift_val and curr_val:
8484+ break
8585+ results[mode][bench] = (swift_val, curr_val, unit)
8686+ break
8787+ j += 1
8888+ i += 1
8989+ return results
9090+9191+9292+def clean_num(s):
9393+ if s is None:
9494+ return None
9595+ s = s.strip().replace(',', '')
9696+ # find first numeric token
9797+ m = re.search(r"([0-9]+(?:\.[0-9]+)?)", s)
9898+ if not m:
9999+ return None
100100+ try:
101101+ return float(m.group(1))
102102+ except:
103103+ return None
104104+105105+106106+def fmt(n):
107107+ if n is None:
108108+ return ''
109109+ if n >= 1000:
110110+ return f"{int(round(n)):,}"
111111+ if n == int(n):
112112+ return str(int(n))
113113+ return f"{n:.0f}"
114114+115115+116116+def render_table_section(title, rows, preferred_order=None):
117117+ print(f"### {title} (cpu time)\n")
118118+ print("| Benchmark | SwiftCBOR (p50) | CBOR (p50) | % Improvement |")
119119+ print("|---|---:|---:|---:|")
120120+ keys = []
121121+ if preferred_order:
122122+ for k in preferred_order:
123123+ if k in rows:
124124+ keys.append(k)
125125+ # append remaining in alphabetical order
126126+ for k in sorted(rows.keys()):
127127+ if k not in keys:
128128+ keys.append(k)
129129+ for b in keys:
130130+ s_p, c_p, unit = rows.get(b, (None, None, ''))
131131+ sval = clean_num(s_p)
132132+ cval = clean_num(c_p)
133133+ s_str = (fmt(sval) + (' ' + unit if unit else '')) if sval is not None else (s_p or '')
134134+ c_str = (fmt(cval) + (' ' + unit if unit else '')) if cval is not None else (c_p or '')
135135+ perc = ''
136136+ if sval is not None and cval is not None and sval != 0:
137137+ pct = round((sval - cval) / sval * 100)
138138+ perc = f"**{pct}%**"
139139+ print(f"| {b} | {s_str} | {c_str} | {perc} |")
140140+ print("\n")
141141+142142+143143+def main(argv):
144144+ parser = __import__('argparse').ArgumentParser(description='Parse Swift benchmark markdown and print compact p50 tables')
145145+ parser.add_argument('file', nargs='?', default='-', help='Path to markdown file, or - for stdin (default)')
146146+ args = parser.parse_args(argv)
147147+ if args.file == '-':
148148+ text = sys.stdin.read()
149149+ else:
150150+ p = Path(args.file)
151151+ text = p.read_text()
152152+153153+ results = parse_markdown(text)
154154+155155+ # preferred orders to match your example (best-effort)
156156+ dec_order = ["Array","Complex Object","Date","Dictionary","Double","Float","Indeterminate String","Int","Int Small","Simple Object","String","String Small"]
157157+ enc_order = ["Array","Array Small","Bool","Complex Codable Object","Data","Data Small","Dictionary","Dictionary Small","Int","Int Small","Simple Codable Object","String","String Small"]
158158+159159+ render_table_section('Decoding', results.get('Decoding', {}), preferred_order=dec_order)
160160+ render_table_section('Encoding', results.get('Encoding', {}), preferred_order=enc_order)
161161+162162+163163+if __name__ == '__main__':
164164+ main(sys.argv[1:])
+13
Benchmarks/bench_compare.sh
···11+#!/usr/bin/env bash
22+# Run the Swift benchmark compare command and pipe its markdown output into the Python parser.
33+# Usage: run this from the repository root.
44+55+set -euo pipefail
66+77+# If you want to pass additional args to the swift command, set SWIFT_ARGS environment variable.
88+SWIFT_ARGS=${SWIFT_ARGS:-"baseline compare swiftcbor --format markdown --no-progress"}
99+PY_SCRIPT="$(dirname "$0")/bench_compare.py"
1010+1111+swift -version
1212+# Run the swift command and pipe
1313+swift package benchmark $SWIFT_ARGS | python3 "$PY_SCRIPT"
···9090let dagEncoder = DAGCBOREncoder(dateEncodingStrategy: .double)
9191```
92929393-To use, conform your internal CID type to ``CIDType``. **Do not conform standard types like `String` or `Data` to ``CIDType``**, or the encoder will attempt to encode all of those data as tagged items.
9393+To use, conform your internal CID type to ``CIDType``. Do not conform standard types like `String` or `Data` to ``CIDType``, or the encoder will attempt to encode all of those data as tagged items.
9494```swift
9595-struct CID: CIDType, Encodable {
9696- let bytes: [UInt8]
9797- func cidData() throws -> [UInt8] {
9898- // Often you'll want to re-encode your CID from a human readable
9999- // format to Base256.
100100- return bytes
9595+struct CID: CIDType {
9696+ let data: Data
9797+9898+ init(data: Data) {
9999+ self.data = data
100100+ }
101101+102102+ init(from decoder: any Decoder) throws {
103103+ let container = try decoder.singleValueContainer()
104104+ var data = try container.decode(Data.self)
105105+ data.removeFirst()
106106+ self.data = data
107107+ }
108108+109109+ func encode(to encoder: any Encoder) throws {
110110+ var container = encoder.singleValueContainer()
111111+ try container.encode(Data([0x0]) + data)
101112 }
102113}
114114+```
115115+> [!WARNING]
116116+>
117117+> You **need** to prefix your data with the `NULL` byte when encoding. This library will not handle that for you. It is invalid DAG-CBOR encoding to not include the prefixed byte.
103118104104-// Now, any time the encoder finds a `CID` type it will encode it using the
105105-// correct tag.
119119+Now, any time the encoder finds a `CID` type it will encode it using the correct tag.
120120+```swift
106121let cid = CID(bytes: [0,1,2,3,4,5,6,7,8])
107122let data = try DAGCBOREncoder().encode(cid)
108123···112127// 4A # bytes(10)
113128// 00000102030405060708 # "\u0000\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b"
114129```
115115-You **do not** need to prefix your data with the `NULL` character once encoded. This library will handle that for you. It is invalid encoding to not include the prefixed byte, so the encoder handles it.
116116-117130> [!NOTE]
118131> DAG-CBOR does not allow tagged items (besides the CID item), and thus encoding dates must be done by encoding their 'raw' value directly. This is an application specific behavior, so ensure the encoder is using the correct date encoding behavior for compatibility. By default, the encoder will encode dates as an epoch `Double` timestamp.
119132
+27
Sources/CBOR/CustomTags/TaggedCBORItem.swift
···11+//
22+// TaggedCBORItem.swift
33+// CBOR
44+//
55+// Created by Khan Winter on 10/15/25.
66+//
77+88+/// Protocol for tagged item encoding. Conform types to this to have them encoded in a tag container.
99+///
1010+/// Types that conform to this protocol must be `Codable` but are required to implement the method and initializer
1111+/// required by this protocol to ensure extra containers are not created between the tag container and the real data.
1212+/// This library implements this for Foundations ``Foundation/UUID`` type, Dates are handled as a special case.
1313+///
1414+/// For an example conformance, see ``CIDType``.
1515+public protocol TaggedCBORItem: Codable {
1616+ static var tag: UInt { get }
1717+1818+ init<Container: SingleValueDecodingContainer>(decodeTaggedDataUsing container: Container) throws
1919+ func encodeTaggedData<Container: SingleValueEncodingContainer>(using container: inout Container) throws
2020+}
2121+2222+/// Helper for getting the static member from an instance of the protocol. Otherwise sometimes has a crash at runtime
2323+/// when trying to do `T as! TaggedCBORItem.self` even if `T` is a `TaggedCBORItem`. Probably due to some fuckery in
2424+/// Foundation's stuff with UUID in particular. This fixes that though.
2525+extension TaggedCBORItem {
2626+ var __staticTagLookup: UInt { Self.tag }
2727+}
+33
Sources/CBOR/CustomTags/UUID+TaggedCBORItem.swift
···11+//
22+// UUID+TaggedCBORItem.swift
33+// CBOR
44+//
55+// Created by Khan Winter on 10/15/25.
66+//
77+88+#if canImport(FoundationEssentials)
99+import FoundationEssentials
1010+#else
1111+import Foundation
1212+#endif
1313+1414+extension UUID: TaggedCBORItem {
1515+ public static var tag: UInt { CommonTags.uuid.rawValue }
1616+1717+ public init<Container: SingleValueDecodingContainer>(decodeTaggedDataUsing container: Container) throws {
1818+ let data = try container.decode(Data.self)
1919+ guard data.count == 16 else { // UUID size
2020+ throw DecodingError.dataCorruptedError(
2121+ in: container,
2222+ debugDescription: "Data decoded for UUID tag is not 16 bytes long."
2323+ )
2424+ }
2525+ self = data.withUnsafeBytes { ptr in ptr.load(as: UUID.self) }
2626+ }
2727+2828+ public func encodeTaggedData<Container: SingleValueEncodingContainer>(using encoder: inout Container) throws {
2929+ try withUnsafeBytes(of: self) { ptr in
3030+ try encoder.encode(Data(ptr))
3131+ }
3232+ }
3333+}
···37373838 /// Check the next type on the data stack.
3939 /// - Parameter type: The type to check for.
4040- func checkType(_ types: MajorType...) throws {
4040+ func checkType<T>(_ types: MajorType..., forType: T.Type) throws {
4141 guard types.contains(data.type) else {
4242- throw DecodingError.typeMismatch(Bool.self, context.error("Unexpected type found: \(data.type)."))
4242+ throw DecodingError.typeMismatch(T.self, context.error("Unexpected type found: \(data.type)."))
4343+ }
4444+ }
4545+4646+ /// Check an available tag value by reading either the argument or the next int value.
4747+ func checkTag<T>(_ tag: UInt, forType: T.Type) throws {
4848+ let tagValue = try data.readInt(as: UInt.self)
4949+ guard tagValue == tag else {
5050+ throw DecodingError.typeMismatch(
5151+ T.self,
5252+ context.error("Unexpected tag found: \(tagValue), expected \(tag) for \(T.self)")
5353+ )
4354 }
4455 }
4556}
···1111/// To use, conform your internal CID type to ``CIDType``. Do not conform standard types like `String` or `Data` to
1212/// ``CIDType``, or the encoder will attempt to encode all of those data as tagged items.
1313/// ```swift
1414-/// struct CID: CIDType, Encodable {
1515-/// let bytes: [UInt8]
1414+/// struct CID: CIDType {
1515+/// let data: Data
1616+///
1717+/// init(data: Data) {
1818+/// self.data = data
1919+/// }
1620///
1717-/// func cidData() throws -> [UInt8] {
1818-/// // Often you'll want to re-encode your CID from a human readable format to Base256.
1919-/// return bytes
2121+/// init<Container: SingleValueDecodingContainer>(decodeTaggedDataUsing container: Container) throws {
2222+/// var data = try container.decode(Data.self)
2323+/// data.removeFirst()
2424+/// self.data = data
2525+/// }
2626+///
2727+/// func encodeTaggedData<Container: SingleValueEncodingContainer>(using container: inout Container) throws {
2828+/// try container.encode(Data([0x0]) + data)
2029/// }
2130/// }
2231/// ```
2323-/// Note that you **do not** need to prefix your data with the `NULL` character once encoded. This library will
2424-/// handle that for you. It is invalid DAG-CBOR encoding to not include the prefixed byte.
2525-public protocol CIDType: Encodable {
2626- associatedtype Bytes: Collection where Bytes.Element == UInt8
2727- func cidData() throws -> Bytes
3232+/// Note that you **need** to prefix your data with the `NULL` character once encoded. This library will
3333+/// not handle that for you. It is invalid DAG-CBOR encoding to not include the prefixed byte.
3434+public protocol CIDType: TaggedCBORItem {}
3535+3636+extension CIDType {
3737+ /// The tag for all CID types is `42`.
3838+ public static var tag: UInt { 42 }
2839}
···4646 }
47474848 func encode(_ value: Double) throws {
4949- guard !options.rejectInfAndNan && value.isNormal else {
4949+ if options.rejectInfAndNan && (value.isInfinite || value.isNaN) {
5050 throw EncodingError.invalidValue(
5151 value,
5252 context.error("Configured to reject Inf and NaN values. Found Infinite or NaN floating point value.")
···7272 // function for any type, only the standard library types. This is the same method Foundation uses to detect
7373 // special encoding cases. It's still lame.
7474 switch value {
7575- case let value as any CIDType:
7676- parent.register(try CIDOptimizer(value))
7775 case let value as Date:
7876 try _encodeDate(value)
7979- case let value as UUID:
8080- guard options.taggedItemsStrategy != .dagMode else {
8181- throw EncodingError.invalidValue(
8282- value,
8383- // swiftlint:disable:next line_length
8484- context.error("In DAG mode, all tagged items are rejected except tag 42. UUIDs are encoded as a tagged value by default. Override `encode` for your type and encode UUID with a different representation.")
8585- )
8686- }
8787- parent.register(UUIDOptimizer(value: value))
8877 case let value as Data:
8978 parent.register(ByteStringOptimizer(value: value))
7979+ case let value as TaggedCBORItem:
8080+ try _encodeTaggedItem(value, T.self)
9081// #if canImport(Float16)
9182// case let value as Float16:
9283// parent.register(Float16Optimizer(value: value))
···117108 parent.register(EpochDoubleDateOptimizer(value: value))
118109 }
119110 }
111111+ }
112112+113113+ func _encodeTaggedItem<T: Encodable>(_ value: TaggedCBORItem, _ type: T.Type) throws {
114114+ let tag = value.__staticTagLookup
115115+ guard options.taggedItemsStrategy == .accept || tag == 42 else {
116116+ throw EncodingError.invalidValue(
117117+ value,
118118+ // swiftlint:disable:next line_length
119119+ context.error("In DAG mode, all tagged items are rejected except tag 42. UUIDs are encoded as a tagged value by default. Override `encode` for your type and encode UUID with a different representation.")
120120+ )
121121+ }
122122+ parent.register(try TaggedItemOptimizer(value: value, context: context))
120123 }
121124}