search for standard sites
pub-search.waow.tech
search
zig
blog
atproto
1const std = @import("std");
2const Client = @import("Client.zig");
3
4/// Initialize database schema and run migrations
5pub fn init(client: *Client) !void {
6 try createTables(client);
7 try runMigrations(client);
8 std.debug.print("schema initialized\n", .{});
9}
10
11fn createTables(client: *Client) !void {
12 try client.exec(
13 \\CREATE TABLE IF NOT EXISTS documents (
14 \\ uri TEXT PRIMARY KEY,
15 \\ did TEXT NOT NULL,
16 \\ rkey TEXT NOT NULL,
17 \\ title TEXT NOT NULL,
18 \\ content TEXT NOT NULL,
19 \\ created_at TEXT,
20 \\ publication_uri TEXT
21 \\)
22 , &.{});
23
24 try client.exec(
25 \\CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
26 \\ uri UNINDEXED,
27 \\ title,
28 \\ content
29 \\)
30 , &.{});
31
32 try client.exec(
33 \\CREATE TABLE IF NOT EXISTS publications (
34 \\ uri TEXT PRIMARY KEY,
35 \\ did TEXT NOT NULL,
36 \\ rkey TEXT NOT NULL,
37 \\ name TEXT NOT NULL,
38 \\ description TEXT,
39 \\ base_path TEXT
40 \\)
41 , &.{});
42
43 try client.exec(
44 \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5(
45 \\ uri UNINDEXED,
46 \\ name,
47 \\ description
48 \\)
49 , &.{});
50
51 try client.exec(
52 \\CREATE TABLE IF NOT EXISTS document_tags (
53 \\ document_uri TEXT NOT NULL,
54 \\ tag TEXT NOT NULL,
55 \\ PRIMARY KEY (document_uri, tag)
56 \\)
57 , &.{});
58
59 client.exec(
60 "CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)",
61 &.{},
62 ) catch {};
63
64 // stats table: single row for lifetime counters
65 try client.exec(
66 \\CREATE TABLE IF NOT EXISTS stats (
67 \\ id INTEGER PRIMARY KEY CHECK (id = 1),
68 \\ total_searches INTEGER DEFAULT 0,
69 \\ total_errors INTEGER DEFAULT 0,
70 \\ service_started_at INTEGER
71 \\)
72 , &.{});
73
74 // ensure the single row exists
75 client.exec("INSERT OR IGNORE INTO stats (id) VALUES (1)", &.{}) catch {};
76
77 // set service_started_at if not already set (first run ever)
78 var ts_buf: [20]u8 = undefined;
79 const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{std.time.timestamp()}) catch "0";
80 client.exec(
81 "UPDATE stats SET service_started_at = ? WHERE id = 1 AND service_started_at IS NULL",
82 &.{ts_str},
83 ) catch {};
84
85 // popular searches tracking
86 try client.exec(
87 \\CREATE TABLE IF NOT EXISTS popular_searches (
88 \\ query TEXT PRIMARY KEY,
89 \\ count INTEGER DEFAULT 1
90 \\)
91 , &.{});
92
93 // tombstones for deleted records
94 try client.exec(
95 \\CREATE TABLE IF NOT EXISTS tombstones (
96 \\ uri TEXT PRIMARY KEY,
97 \\ record_type TEXT NOT NULL,
98 \\ deleted_at INTEGER NOT NULL
99 \\)
100 , &.{});
101
102 // similarity cache: stores precomputed similar documents
103 // invalidated when doc_count changes (new docs added/removed)
104 try client.exec(
105 \\CREATE TABLE IF NOT EXISTS similarity_cache (
106 \\ source_uri TEXT PRIMARY KEY,
107 \\ results TEXT NOT NULL,
108 \\ doc_count INTEGER NOT NULL,
109 \\ computed_at INTEGER NOT NULL
110 \\)
111 , &.{});
112}
113
114fn runMigrations(client: *Client) !void {
115 // these may fail if columns already exist - that's fine
116 client.exec("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch {};
117 client.exec("ALTER TABLE publications ADD COLUMN base_path TEXT", &.{}) catch {};
118 client.exec("ALTER TABLE stats ADD COLUMN service_started_at INTEGER", &.{}) catch {};
119 client.exec("ALTER TABLE stats ADD COLUMN cache_hits INTEGER DEFAULT 0", &.{}) catch {};
120 client.exec("ALTER TABLE stats ADD COLUMN cache_misses INTEGER DEFAULT 0", &.{}) catch {};
121
122 // multi-platform support: track source platform and collection
123 client.exec("ALTER TABLE documents ADD COLUMN platform TEXT DEFAULT 'leaflet'", &.{}) catch {};
124 client.exec("ALTER TABLE documents ADD COLUMN source_collection TEXT DEFAULT 'pub.leaflet.document'", &.{}) catch {};
125
126 // backfill existing records (idempotent - only updates NULLs)
127 client.exec("UPDATE documents SET platform = 'leaflet' WHERE platform IS NULL", &.{}) catch {};
128 client.exec("UPDATE documents SET source_collection = 'pub.leaflet.document' WHERE source_collection IS NULL", &.{}) catch {};
129
130 // vector embeddings column already added by backfill script
131}