integrate zat SDK, fix quoted phrase search · zzstoatzz.io/leaflet-search@e364d52

+6

backend/build.zig

··· 14 14 .optimize = optimize, 15 15 }); 16 16 17 + const zat = b.dependency("zat", .{ 18 + .target = target, 19 + .optimize = optimize, 20 + }); 21 + 17 22 const exe = b.addExecutable(.{ 18 23 .name = "leaflet-search", 19 24 .root_module = b.createModule(.{ ··· 23 28 .imports = &.{ 24 29 .{ .name = "websocket", .module = websocket.module("websocket") }, 25 30 .{ .name = "zql", .module = zql.module("zql") }, 31 + .{ .name = "zat", .module = zat.module("zat") }, 26 32 }, 27 33 }), 28 34 });

+4

backend/build.zig.zon

··· 12 12 .url = "https://github.com/zzstoatzz/zql/archive/main.tar.gz", 13 13 .hash = "zql-0.0.1-alpha-xNRI4IRNAABUb9gLat5FWUaZDD5HvxAxet_-elgR_A_y", 14 14 }, 15 + .zat = .{ 16 + .url = "https://github.com/zzstoatzz/zat/archive/main.tar.gz", 17 + .hash = "zat-0.0.2-5PuC7hJbAQAx5-PTup-GhBRIRAKqjbyBlIQq8YTEObTu", 18 + }, 15 19 }, 16 20 .paths = .{ 17 21 "build.zig",

+6

backend/src/db/mod.zig

··· 412 412 413 413 /// Build FTS5 query with OR between terms: "cat dog" -> "cat OR dog*" 414 414 /// Uses OR for better recall with BM25 ranking (more matches = higher score) 415 + /// Quoted queries are passed through as phrase matches: "exact phrase" -> "exact phrase" 415 416 fn buildFtsQuery(alloc: Allocator, query: []const u8) ![]const u8 { 416 417 if (query.len == 0) return ""; 417 418 ··· 423 424 if (start >= end) return ""; 424 425 425 426 const trimmed = query[start..end]; 427 + 428 + // quoted phrase: pass through to FTS5 for exact phrase matching 429 + if (trimmed.len >= 2 and trimmed[0] == '"' and trimmed[trimmed.len - 1] == '"') { 430 + return try alloc.dupe(u8, trimmed); 431 + } 426 432 427 433 // count words and total length 428 434 var word_count: usize = 0;

+73 -147

backend/src/tap.zig

··· 4 4 const posix = std.posix; 5 5 const Allocator = mem.Allocator; 6 6 const websocket = @import("websocket"); 7 + const zat = @import("zat"); 7 8 const db = @import("db/mod.zig"); 8 9 9 10 const DOCUMENT_COLLECTION = "pub.leaflet.document"; 10 11 const PUBLICATION_COLLECTION = "pub.leaflet.publication"; 11 - 12 - // domain types 13 - const Did = struct { 14 - raw: []const u8, 15 - 16 - fn parse(s: []const u8) ?Did { 17 - if (!mem.startsWith(u8, s, "did:")) return null; 18 - const rest = s[4..]; 19 - const colon = mem.indexOf(u8, rest, ":") orelse return null; 20 - if (colon == 0 or colon == rest.len - 1) return null; 21 - return .{ .raw = s }; 22 - } 23 - 24 - fn str(self: Did) []const u8 { 25 - return self.raw; 26 - } 27 - }; 28 - 29 - const AtUri = struct { 30 - raw: []const u8, 31 - did_end: usize, 32 - collection_end: usize, 33 - 34 - fn build(allocator: Allocator, d: Did, coll: []const u8, rk: []const u8) !AtUri { 35 - const raw = try std.fmt.allocPrint(allocator, "at://{s}/{s}/{s}", .{ d.raw, coll, rk }); 36 - return .{ 37 - .raw = raw, 38 - .did_end = 5 + d.raw.len, 39 - .collection_end = 5 + d.raw.len + 1 + coll.len, 40 - }; 41 - } 42 - 43 - fn did(self: AtUri) Did { 44 - return .{ .raw = self.raw[5..self.did_end] }; 45 - } 46 - 47 - fn rkey(self: AtUri) []const u8 { 48 - return self.raw[self.collection_end + 1 ..]; 49 - } 50 - 51 - fn str(self: AtUri) []const u8 { 52 - return self.raw; 53 - } 54 - }; 55 12 56 13 fn getTapHost() []const u8 { 57 14 return posix.getenv("TAP_HOST") orelse "leaflet-search-tap.fly.dev"; ··· 140 97 }; 141 98 } 142 99 100 + /// TAP record envelope - extracted via zat.json.extractAt 101 + const TapRecord = struct { 102 + collection: []const u8, 103 + action: []const u8, 104 + did: []const u8, 105 + rkey: []const u8, 106 + }; 107 + 108 + /// Leaflet document fields 109 + const LeafletDocument = struct { 110 + title: []const u8, 111 + publication: ?[]const u8 = null, 112 + publishedAt: ?[]const u8 = null, 113 + createdAt: ?[]const u8 = null, 114 + description: ?[]const u8 = null, 115 + }; 116 + 117 + /// Leaflet publication fields 118 + const LeafletPublication = struct { 119 + name: []const u8, 120 + description: ?[]const u8 = null, 121 + base_path: ?[]const u8 = null, 122 + }; 123 + 143 124 fn processMessage(allocator: Allocator, payload: []const u8) !void { 144 125 const parsed = json.parseFromSlice(json.Value, allocator, payload, .{}) catch return; 145 126 defer parsed.deinit(); 146 127 147 - const root = parsed.value.object; 128 + // check message type 129 + const msg_type = zat.json.getString(parsed.value, "type") orelse return; 130 + if (!mem.eql(u8, msg_type, "record")) return; 148 131 149 - // tap format: { "id": 123, "type": "record", "record": { ... } } 150 - const msg_type = root.get("type") orelse return; 151 - if (msg_type != .string) return; 152 - if (!mem.eql(u8, msg_type.string, "record")) return; 153 - 154 - const record_wrapper = root.get("record") orelse return; 155 - if (record_wrapper != .object) return; 156 - 157 - const rec = record_wrapper.object; 158 - 159 - const collection = rec.get("collection") orelse return; 160 - if (collection != .string) return; 161 - 162 - const action = rec.get("action") orelse return; 163 - if (action != .string) return; 164 - 165 - const did_val = rec.get("did") orelse return; 166 - if (did_val != .string) return; 167 - const did = Did.parse(did_val.string) orelse return; 132 + // extract record envelope 133 + const rec = zat.json.extractAt(TapRecord, allocator, parsed.value, .{"record"}) catch return; 168 134 169 - const rkey = rec.get("rkey") orelse return; 170 - if (rkey != .string) return; 135 + // validate DID 136 + const did = zat.Did.parse(rec.did) orelse return; 171 137 172 - const uri = AtUri.build(allocator, did, collection.string, rkey.string) catch return; 173 - defer allocator.free(uri.raw); 138 + // build AT-URI string 139 + const uri = try std.fmt.allocPrint(allocator, "at://{s}/{s}/{s}", .{ did.raw, rec.collection, rec.rkey }); 140 + defer allocator.free(uri); 174 141 175 - if (mem.eql(u8, action.string, "create") or mem.eql(u8, action.string, "update")) { 176 - const record = rec.get("record") orelse return; 177 - if (record != .object) return; 142 + if (mem.eql(u8, rec.action, "create") or mem.eql(u8, rec.action, "update")) { 143 + const record_obj = zat.json.getObject(parsed.value, "record.record") orelse return; 178 144 179 - if (mem.eql(u8, collection.string, DOCUMENT_COLLECTION)) { 180 - processDocument(allocator, uri, record.object) catch |err| { 145 + if (mem.eql(u8, rec.collection, DOCUMENT_COLLECTION)) { 146 + processDocument(allocator, uri, did.raw, rec.rkey, record_obj) catch |err| { 181 147 std.debug.print("document processing error: {}\n", .{err}); 182 148 }; 183 - } else if (mem.eql(u8, collection.string, PUBLICATION_COLLECTION)) { 184 - processPublication(uri, record.object) catch |err| { 149 + } else if (mem.eql(u8, rec.collection, PUBLICATION_COLLECTION)) { 150 + processPublication(allocator, uri, did.raw, rec.rkey, record_obj) catch |err| { 185 151 std.debug.print("publication processing error: {}\n", .{err}); 186 152 }; 187 153 } 188 - } else if (mem.eql(u8, action.string, "delete")) { 189 - if (mem.eql(u8, collection.string, DOCUMENT_COLLECTION)) { 190 - db.deleteDocument(uri.str()); 191 - std.debug.print("deleted document: {s}\n", .{uri.str()}); 192 - } else if (mem.eql(u8, collection.string, PUBLICATION_COLLECTION)) { 193 - db.deletePublication(uri.str()); 194 - std.debug.print("deleted publication: {s}\n", .{uri.str()}); 154 + } else if (mem.eql(u8, rec.action, "delete")) { 155 + if (mem.eql(u8, rec.collection, DOCUMENT_COLLECTION)) { 156 + db.deleteDocument(uri); 157 + std.debug.print("deleted document: {s}\n", .{uri}); 158 + } else if (mem.eql(u8, rec.collection, PUBLICATION_COLLECTION)) { 159 + db.deletePublication(uri); 160 + std.debug.print("deleted publication: {s}\n", .{uri}); 195 161 } 196 162 } 197 163 } 198 164 199 - fn processDocument(allocator: Allocator, uri: AtUri, record: json.ObjectMap) !void { 200 - // get title 201 - const title_val = record.get("title") orelse return; 202 - if (title_val != .string) return; 203 - const title = title_val.string; 165 + fn processDocument(allocator: Allocator, uri: []const u8, did: []const u8, rkey: []const u8, record: json.ObjectMap) !void { 166 + const record_val: json.Value = .{ .object = record }; 204 167 205 - // get publication URI 206 - const publication_uri: ?[]const u8 = blk: { 207 - if (record.get("publication")) |v| { 208 - if (v == .string) break :blk v.string; 209 - } 210 - break :blk null; 211 - }; 168 + // extract known fields via struct 169 + const doc = zat.json.extractAt(LeafletDocument, allocator, record_val, .{}) catch return; 170 + const created_at = doc.publishedAt orelse doc.createdAt; 212 171 213 - // get createdAt (optional, might be publishedAt) 214 - const created_at: ?[]const u8 = blk: { 215 - if (record.get("publishedAt")) |v| { 216 - if (v == .string) break :blk v.string; 217 - } 218 - if (record.get("createdAt")) |v| { 219 - if (v == .string) break :blk v.string; 220 - } 221 - break :blk null; 222 - }; 223 - 224 - // extract tags 172 + // extract tags array 225 173 var tags_list: std.ArrayList([]const u8) = .{}; 226 174 defer tags_list.deinit(allocator); 227 - if (record.get("tags")) |tags_val| { 228 - if (tags_val == .array) { 229 - for (tags_val.array.items) |tag_item| { 230 - if (tag_item == .string) { 231 - try tags_list.append(allocator, tag_item.string); 232 - } 175 + if (zat.json.getArray(record_val, "tags")) |tags| { 176 + for (tags) |tag_item| { 177 + if (tag_item == .string) { 178 + try tags_list.append(allocator, tag_item.string); 233 179 } 234 180 } 235 181 } 236 182 183 + // extract plaintext from pages 237 184 var content_buf: std.ArrayList(u8) = .{}; 238 185 defer content_buf.deinit(allocator); 239 186 240 - // include document description if present 241 - if (record.get("description")) |desc_val| { 242 - if (desc_val == .string and desc_val.string.len > 0) { 243 - try content_buf.appendSlice(allocator, desc_val.string); 187 + if (doc.description) |desc| { 188 + if (desc.len > 0) { 189 + try content_buf.appendSlice(allocator, desc); 244 190 } 245 191 } 246 192 247 - // extract plaintext from pages 248 - if (record.get("pages")) |pages_val| { 249 - if (pages_val == .array) { 250 - for (pages_val.array.items) |page| { 251 - if (page != .object) continue; 193 + if (zat.json.getArray(record_val, "pages")) |pages| { 194 + for (pages) |page| { 195 + if (page == .object) { 252 196 try extractPlaintextFromPage(allocator, &content_buf, page.object); 253 197 } 254 198 } 255 199 } 256 200 257 - if (content_buf.items.len == 0) { 258 - // no content extracted, skip 259 - return; 260 - } 201 + if (content_buf.items.len == 0) return; 261 202 262 - try db.insertDocument(uri.str(), uri.did().str(), uri.rkey(), title, content_buf.items, created_at, publication_uri, tags_list.items); 263 - std.debug.print("indexed document: {s} ({} chars, {} tags)\n", .{ uri.str(), content_buf.items.len, tags_list.items.len }); 203 + try db.insertDocument(uri, did, rkey, doc.title, content_buf.items, created_at, doc.publication, tags_list.items); 204 + std.debug.print("indexed document: {s} ({} chars, {} tags)\n", .{ uri, content_buf.items.len, tags_list.items.len }); 264 205 } 265 206 266 207 fn extractPlaintextFromPage(allocator: Allocator, buf: *std.ArrayList(u8), page: json.ObjectMap) !void { ··· 344 285 } 345 286 } 346 287 347 - fn processPublication(uri: AtUri, record: json.ObjectMap) !void { 348 - const name_val = record.get("name") orelse return; 349 - if (name_val != .string) return; 350 - const name = name_val.string; 288 + fn processPublication(allocator: Allocator, uri: []const u8, did: []const u8, rkey: []const u8, record: json.ObjectMap) !void { 289 + const record_val: json.Value = .{ .object = record }; 290 + const pub_data = zat.json.extractAt(LeafletPublication, allocator, record_val, .{}) catch return; 351 291 352 - const description: ?[]const u8 = blk: { 353 - if (record.get("description")) |v| { 354 - if (v == .string) break :blk v.string; 355 - } 356 - break :blk null; 357 - }; 358 - 359 - const base_path: ?[]const u8 = blk: { 360 - if (record.get("base_path")) |v| { 361 - if (v == .string) break :blk v.string; 362 - } 363 - break :blk null; 364 - }; 365 - 366 - try db.insertPublication(uri.str(), uri.did().str(), uri.rkey(), name, description, base_path); 367 - std.debug.print("indexed publication: {s} (base_path: {s})\n", .{ uri.str(), base_path orelse "none" }); 292 + try db.insertPublication(uri, did, rkey, pub_data.name, pub_data.description, pub_data.base_path); 293 + std.debug.print("indexed publication: {s} (base_path: {s})\n", .{ uri, pub_data.base_path orelse "none" }); 368 294 }

+1 -96

docs/zig-patterns.md

··· 1 1 # zig patterns 2 2 3 - notes on zig idioms learned while building leaflet-search. 4 - 5 - ## json serialization 6 - 7 - ### struct serialization vs manual building 8 - 9 - zig's `std.json.Stringify` can serialize structs directly with `jw.write(struct)`: 10 - 11 - ```zig 12 - // define types that mirror the JSON structure 13 - const Value = struct { type: []const u8 = "text", value: []const u8 }; 14 - const Stmt = struct { sql: []const u8, args: ?[]const Value = null }; 15 - const ExecuteReq = struct { type: []const u8 = "execute", stmt: Stmt }; 16 - 17 - // serialize with one call 18 - try jw.write(ExecuteReq{ .stmt = .{ .sql = sql, .args = values } }); 19 - ``` 20 - 21 - this is cleaner than manual field-by-field building: 22 - 23 - ```zig 24 - // verbose alternative 25 - try jw.beginObject(); 26 - try jw.objectField("type"); 27 - try jw.write("execute"); 28 - try jw.objectField("stmt"); 29 - try jw.beginObject(); 30 - try jw.objectField("sql"); 31 - try jw.write(sql); 32 - // ... many more lines 33 - try jw.endObject(); 34 - try jw.endObject(); 35 - ``` 36 - 37 - ### optional fields 38 - 39 - use `emit_null_optional_fields = false` to omit null optional fields instead of serializing them as `"field": null`: 40 - 41 - ```zig 42 - const Stmt = struct { 43 - sql: []const u8, 44 - args: ?[]const Value = null, // optional field 45 - }; 46 - 47 - var jw: json.Stringify = .{ 48 - .writer = &body.writer, 49 - .options = .{ .emit_null_optional_fields = false }, 50 - }; 51 - 52 - // if args is null, the field is omitted entirely 53 - try jw.write(Stmt{ .sql = "SELECT 1", .args = null }); 54 - // produces: {"sql":"SELECT 1"} 55 - // NOT: {"sql":"SELECT 1","args":null} 56 - ``` 57 - 58 - this matters when APIs reject `null` values for optional fields (like Turso/Hrana). 59 - 60 - ## file organization 61 - 62 - ### file-as-type pattern 63 - 64 - when a file IS a type (single primary struct), use `@This()`: 65 - 66 - ```zig 67 - // Client.zig 68 - const Client = @This(); 69 - 70 - allocator: Allocator, 71 - url: []const u8, 72 - // ... fields at top level 73 - 74 - pub fn init(allocator: Allocator) !Client { ... } 75 - pub fn query(self: *Client, ...) !Result { ... } 76 - ``` 77 - 78 - consumers import as: `const Client = @import("Client.zig");` 79 - 80 - ### namespace modules 81 - 82 - when a file is a namespace with multiple types, use regular exports: 83 - 84 - ```zig 85 - // result.zig 86 - pub const Result = struct { ... }; 87 - pub const Row = struct { ... }; 88 - pub const BatchResult = struct { ... }; 89 - ``` 90 - 91 - naming convention: 92 - - `TitleCase.zig` → file-as-type (the file IS the struct) 93 - - `snake_case.zig` → namespace module (exports multiple things) 94 - 95 - ## references 96 - 97 - - [zig std.json.Stringify source](https://github.com/ziglang/zig/blob/master/lib/std/json/Stringify.zig) 98 - - [zig style guide](https://ziglang.org/documentation/master/#Style-Guide) 3 + moved to [notes/languages/ziglang](https://tangled.sh/@zzstoatzz.io/notes/tree/main/languages/ziglang)