atproto utils for zig zat.dev
atproto sdk zig

fix: NSID rejects TLD starting with digit, AT-URI validates components

NSID parser now enforces that the first domain segment (TLD) must start
with a letter, per the spec regex. Previously accepted strings like
"1.0.0.127.record".

AT-URI parser now validates all components: authority must be a valid DID
or handle, collection must be a valid NSID, rkey must be a valid record
key. Also rejects forbidden characters (space, #, ?).

Found by running official atproto-interop-tests fixtures.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+31 -8
+26 -6
src/internal/at_uri.zig
··· 13 13 //! see: https://atproto.com/specs/at-uri-scheme 14 14 15 15 const std = @import("std"); 16 + const Did = @import("did.zig").Did; 17 + const Handle = @import("handle.zig").Handle; 18 + const Nsid = @import("nsid.zig").Nsid; 19 + const Rkey = @import("rkey.zig").Rkey; 16 20 17 21 pub const AtUri = struct { 18 22 /// the full uri string (borrowed, not owned) ··· 35 39 // must start with "at://" 36 40 if (!std.mem.startsWith(u8, s, prefix)) return null; 37 41 42 + // reject forbidden characters anywhere after prefix 43 + for (s) |c| { 44 + if (c == ' ' or c == '#' or c == '?') return null; 45 + } 46 + 38 47 // no trailing slash 39 48 if (s[s.len - 1] == '/') return null; 40 49 ··· 44 53 // find first slash (end of authority) 45 54 const authority_end_rel = std.mem.indexOfScalar(u8, after_prefix, '/'); 46 55 47 - if (authority_end_rel) |ae| { 48 - if (ae == 0) return null; // empty authority 56 + const auth_str = after_prefix[0 .. authority_end_rel orelse after_prefix.len]; 57 + if (auth_str.len == 0) return null; 58 + 59 + // authority must be a valid DID or handle 60 + if (Did.parse(auth_str) == null and Handle.parse(auth_str) == null) return null; 49 61 62 + if (authority_end_rel) |ae| { 50 63 const after_authority = after_prefix[ae + 1 ..]; 51 64 if (after_authority.len == 0) return null; // trailing slash after authority 52 65 53 66 // find second slash (end of collection) 54 67 const collection_end_rel = std.mem.indexOfScalar(u8, after_authority, '/'); 55 68 69 + const coll_str = after_authority[0 .. collection_end_rel orelse after_authority.len]; 70 + if (coll_str.len == 0) return null; // empty collection 71 + 72 + // collection must be a valid NSID 73 + if (Nsid.parse(coll_str) == null) return null; 74 + 56 75 if (collection_end_rel) |ce| { 57 - if (ce == 0) return null; // empty collection 58 - const after_collection = after_authority[ce + 1 ..]; 59 - if (after_collection.len == 0) return null; // trailing slash after collection 76 + const rkey_str = after_authority[ce + 1 ..]; 77 + if (rkey_str.len == 0) return null; // trailing slash after collection 60 78 61 - // full uri: authority + collection + rkey 79 + // rkey must be a valid record key 80 + if (Rkey.parse(rkey_str) == null) return null; 81 + 62 82 return .{ 63 83 .raw = s, 64 84 .authority_end = prefix.len + ae,
+5 -2
src/internal/nsid.zig
··· 41 41 if (c == '.') { 42 42 const segment = s[segment_start..i]; 43 43 // all segments except last must be valid domain segments 44 - if (!isValidDomainSegment(segment)) return null; 44 + if (!isValidDomainSegment(segment, segment_count == 0)) return null; 45 45 segment_count += 1; 46 46 last_dot = i; 47 47 segment_start = i + 1; ··· 77 77 return self.raw[self.name_start..]; 78 78 } 79 79 80 - fn isValidDomainSegment(seg: []const u8) bool { 80 + fn isValidDomainSegment(seg: []const u8, is_first: bool) bool { 81 81 // 1-63 characters 82 82 if (seg.len == 0 or seg.len > max_segment_length) return false; 83 83 84 84 // cannot start or end with hyphen 85 85 if (seg[0] == '-' or seg[seg.len - 1] == '-') return false; 86 + 87 + // first segment (TLD) must start with a letter 88 + if (is_first and !(seg[0] >= 'a' and seg[0] <= 'z')) return false; 86 89 87 90 // lowercase letters, digits, and hyphens only 88 91 for (seg) |c| {