search for standard sites pub-search.waow.tech
search zig blog atproto

refactor: restructure backend src as library with clear module boundaries

Move server.zig, search.zig, dashboard.zig into server/ directory.
Move reconcile.zig into ingest/ as reconciler.zig alongside other
background workers. Replace top-level ingest.zig and metrics.zig
shims with mod.zig inside their directories. Root tests at main.zig
with shared imports to fix zig 0.15 module path boundary.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+54 -59
+22 -28
backend/build.zig
··· 30 .optimize = optimize, 31 }); 32 33 const exe = b.addExecutable(.{ 34 .name = "leaflet-search", 35 .root_module = b.createModule(.{ 36 .root_source_file = b.path("src/main.zig"), 37 .target = target, 38 .optimize = optimize, 39 - .imports = &.{ 40 - .{ .name = "websocket", .module = websocket.module("websocket") }, 41 - .{ .name = "zql", .module = zql.module("zql") }, 42 - .{ .name = "zat", .module = zat.module("zat") }, 43 - .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 44 - .{ .name = "logfire", .module = logfire.module("logfire") }, 45 - }, 46 }), 47 }); 48 ··· 57 const run_step = b.step("run", "Run the server"); 58 run_step.dependOn(&run_cmd.step); 59 60 - // test step 61 - const test_step = b.step("test", "Run unit tests"); 62 - 63 - const test_files = [_][]const u8{ 64 - "src/search.zig", 65 - "src/ingest/extractor.zig", 66 - }; 67 68 - for (test_files) |file| { 69 - const unit_tests = b.addTest(.{ 70 - .root_module = b.createModule(.{ 71 - .root_source_file = b.path(file), 72 - .target = target, 73 - .optimize = optimize, 74 - .imports = &.{ 75 - .{ .name = "zat", .module = zat.module("zat") }, 76 - }, 77 - }), 78 - }); 79 - const run_tests = b.addRunArtifact(unit_tests); 80 - test_step.dependOn(&run_tests.step); 81 - } 82 }
··· 30 .optimize = optimize, 31 }); 32 33 + const imports: []const std.Build.Module.Import = &.{ 34 + .{ .name = "websocket", .module = websocket.module("websocket") }, 35 + .{ .name = "zql", .module = zql.module("zql") }, 36 + .{ .name = "zat", .module = zat.module("zat") }, 37 + .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 38 + .{ .name = "logfire", .module = logfire.module("logfire") }, 39 + }; 40 + 41 const exe = b.addExecutable(.{ 42 .name = "leaflet-search", 43 .root_module = b.createModule(.{ 44 .root_source_file = b.path("src/main.zig"), 45 .target = target, 46 .optimize = optimize, 47 + .imports = imports, 48 }), 49 }); 50 ··· 59 const run_step = b.step("run", "Run the server"); 60 run_step.dependOn(&run_cmd.step); 61 62 + // tests — rooted at main.zig so all transitive imports are discovered 63 + const unit_tests = b.addTest(.{ 64 + .root_module = b.createModule(.{ 65 + .root_source_file = b.path("src/main.zig"), 66 + .target = target, 67 + .optimize = optimize, 68 + .imports = imports, 69 + }), 70 + }); 71 + unit_tests.linkLibC(); 72 73 + const run_tests = b.addRunArtifact(unit_tests); 74 + const test_step = b.step("test", "Run unit tests"); 75 + test_step.dependOn(&run_tests.step); 76 }
+2 -2
backend/src/dashboard.zig backend/src/server/dashboard.zig
··· 1 const std = @import("std"); 2 const json = std.json; 3 const Allocator = std.mem.Allocator; 4 - const db = @import("db/mod.zig"); 5 const logfire = @import("logfire"); 6 - const timing = @import("metrics.zig").timing; 7 8 // JSON output types 9 const TagJson = struct { tag: []const u8, count: i64 };
··· 1 const std = @import("std"); 2 const json = std.json; 3 const Allocator = std.mem.Allocator; 4 + const db = @import("../db/mod.zig"); 5 const logfire = @import("logfire"); 6 + const timing = @import("../metrics/mod.zig").timing; 7 8 // JSON output types 9 const TagJson = struct { tag: []const u8, count: i64 };
-3
backend/src/ingest.zig
··· 1 - pub const tap = @import("ingest/tap.zig"); 2 - pub const embedder = @import("ingest/embedder.zig"); 3 - // indexer and extractor are internal to the pipeline
···
+4
backend/src/ingest/mod.zig
···
··· 1 + pub const tap = @import("tap.zig"); 2 + pub const embedder = @import("embedder.zig"); 3 + pub const reconciler = @import("reconciler.zig"); 4 + // indexer and extractor are internal to the pipeline
+4 -5
backend/src/main.zig
··· 5 const logfire = @import("logfire"); 6 const db = @import("db/mod.zig"); 7 const tpuf = @import("tpuf.zig"); 8 - const metrics = @import("metrics.zig"); 9 - const server = @import("server.zig"); 10 - const ingest = @import("ingest.zig"); 11 - const reconcile = @import("reconcile.zig"); 12 13 const MAX_HTTP_WORKERS = 16; 14 const SOCKET_TIMEOUT_SECS = 5; ··· 88 tpuf.init(); 89 90 // start reconciler (verifies documents still exist at source PDS) 91 - reconcile.start(allocator); 92 93 // start embedder (voyage-4-lite, 1024 dims, 1 worker) 94 ingest.embedder.start(allocator);
··· 5 const logfire = @import("logfire"); 6 const db = @import("db/mod.zig"); 7 const tpuf = @import("tpuf.zig"); 8 + const metrics = @import("metrics/mod.zig"); 9 + const server = @import("server/mod.zig"); 10 + const ingest = @import("ingest/mod.zig"); 11 12 const MAX_HTTP_WORKERS = 16; 13 const SOCKET_TIMEOUT_SECS = 5; ··· 87 tpuf.init(); 88 89 // start reconciler (verifies documents still exist at source PDS) 90 + ingest.reconciler.start(allocator); 91 92 // start embedder (voyage-4-lite, 1024 dims, 1 worker) 93 ingest.embedder.start(allocator);
-4
backend/src/metrics.zig
··· 1 - pub const stats = @import("metrics/stats.zig"); 2 - pub const buffer = @import("metrics/buffer.zig"); 3 - pub const timing = @import("metrics/timing.zig"); 4 - pub const activity = @import("metrics/activity.zig");
···
+4
backend/src/metrics/mod.zig
···
··· 1 + pub const stats = @import("stats.zig"); 2 + pub const buffer = @import("buffer.zig"); 3 + pub const timing = @import("timing.zig"); 4 + pub const activity = @import("activity.zig");
+6 -5
backend/src/reconcile.zig backend/src/ingest/reconciler.zig
··· 4 //! Documents that return 400/404 from com.atproto.repo.getRecord are 5 //! deleted from turso and turbopuffer. 6 //! 7 - //! This catches deletions missed while the tap was down — the firehose 8 - //! is ephemeral and delete events are never replayed. 9 10 const std = @import("std"); 11 const http = std.http; ··· 14 const posix = std.posix; 15 const Allocator = mem.Allocator; 16 const logfire = @import("logfire"); 17 - const db = @import("db/mod.zig"); 18 - const tpuf = @import("tpuf.zig"); 19 - const indexer = @import("ingest/indexer.zig"); 20 21 // config (env vars with defaults) 22 fn getIntervalSecs() u64 {
··· 4 //! Documents that return 400/404 from com.atproto.repo.getRecord are 5 //! deleted from turso and turbopuffer. 6 //! 7 + //! This catches deletions that tap resync cannot — resync only re-sends 8 + //! records that still exist, so documents deleted at the PDS between 9 + //! resyncs become ghosts. The reconciler verifies them directly. 10 11 const std = @import("std"); 12 const http = std.http; ··· 15 const posix = std.posix; 16 const Allocator = mem.Allocator; 17 const logfire = @import("logfire"); 18 + const db = @import("../db/mod.zig"); 19 + const tpuf = @import("../tpuf.zig"); 20 + const indexer = @import("indexer.zig"); 21 22 // config (env vars with defaults) 23 fn getIntervalSecs() u64 {
+2 -2
backend/src/search.zig backend/src/server/search.zig
··· 3 const Allocator = std.mem.Allocator; 4 const zql = @import("zql"); 5 const logfire = @import("logfire"); 6 - const db = @import("db/mod.zig"); 7 - const tpuf = @import("tpuf.zig"); 8 9 pub const SearchMode = enum { 10 keyword,
··· 3 const Allocator = std.mem.Allocator; 4 const zql = @import("zql"); 5 const logfire = @import("logfire"); 6 + const db = @import("../db/mod.zig"); 7 + const tpuf = @import("../tpuf.zig"); 8 9 pub const SearchMode = enum { 10 keyword,
+2 -2
backend/src/server.zig backend/src/server/mod.zig
··· 6 const Allocator = mem.Allocator; 7 const logfire = @import("logfire"); 8 const zql = @import("zql"); 9 - const db = @import("db/mod.zig"); 10 - const metrics = @import("metrics.zig"); 11 const search = @import("search.zig"); 12 const dashboard = @import("dashboard.zig"); 13
··· 6 const Allocator = mem.Allocator; 7 const logfire = @import("logfire"); 8 const zql = @import("zql"); 9 + const db = @import("../db/mod.zig"); 10 + const metrics = @import("../metrics/mod.zig"); 11 const search = @import("search.zig"); 12 const dashboard = @import("dashboard.zig"); 13
+2 -2
docs/reconciliation.md
··· 40 41 | file | role | 42 |------|------| 43 - | `backend/src/reconcile.zig` | background worker (~250 lines) | 44 - | `backend/src/main.zig` | wires up `reconcile.start(allocator)` after `tpuf.init()` | 45 | `backend/src/db/schema.zig` | `verified_at TEXT` column migration | 46 | `backend/src/ingest/tap.zig` | `tpuf.delete()` after `indexer.deleteDocument()` | 47
··· 40 41 | file | role | 42 |------|------| 43 + | `backend/src/ingest/reconciler.zig` | background worker (~250 lines) | 44 + | `backend/src/main.zig` | wires up `ingest.reconciler.start(allocator)` after `tpuf.init()` | 45 | `backend/src/db/schema.zig` | `verified_at TEXT` column migration | 46 | `backend/src/ingest/tap.zig` | `tpuf.delete()` after `indexer.deleteDocument()` | 47
+2 -2
docs/scratch/logfire-zig-adoption.md
··· 55 56 ## 3. replace timing.zig with spans 57 58 - current pattern in server.zig: 59 60 ```zig 61 fn handleSearch(request: *http.Server.Request, target: []const u8) !void { ··· 154 // ... 155 } 156 157 - // with logfire (in server.zig or stats.zig) 158 pub fn recordSearch(query: []const u8) void { 159 logfire.counter("search.total", 1); 160 // existing logic...
··· 55 56 ## 3. replace timing.zig with spans 57 58 + current pattern in server/mod.zig: 59 60 ```zig 61 fn handleSearch(request: *http.Server.Request, target: []const u8) !void { ··· 154 // ... 155 } 156 157 + // with logfire (in server/mod.zig or stats.zig) 158 pub fn recordSearch(query: []const u8) void { 159 logfire.counter("search.total", 1); 160 // existing logic...
+2 -2
docs/scratch/standard-search-planning.md
··· 98 99 turso (sqlite) - documents table + FTS5 + embeddings 100 101 - search.zig - FTS5 queries + vector similarity 102 103 - server.zig - HTTP API (/search, /similar, /stats) 104 ``` 105 106 leaflet-specific code:
··· 98 99 turso (sqlite) - documents table + FTS5 + embeddings 100 101 + server/search.zig - FTS5 queries + vector similarity 102 103 + server/mod.zig - HTTP API (/search, /similar, /stats) 104 ``` 105 106 leaflet-specific code:
+2 -2
docs/search-architecture.md
··· 34 35 ### what's coupled to FTS5 36 37 - all in `backend/src/search.zig`: 38 39 | component | FTS5-specific | 40 |-----------|---------------| ··· 50 - search-time dedup by `(did, title)` — collapses cross-platform duplicates 51 - ingestion-time dedup by content hash — prevents duplicates at write time 52 - caching logic 53 - - HTTP layer (server.zig just calls `search()`) 54 55 ### known limitations 56
··· 34 35 ### what's coupled to FTS5 36 37 + all in `backend/src/server/search.zig`: 38 39 | component | FTS5-specific | 40 |-----------|---------------| ··· 50 - search-time dedup by `(did, title)` — collapses cross-platform duplicates 51 - ingestion-time dedup by content hash — prevents duplicates at write time 52 - caching logic 53 + - HTTP layer (server/mod.zig just calls `search()`) 54 55 ### known limitations 56