search for standard sites pub-search.waow.tech
search zig blog atproto
at multi-platform-schema 131 lines 4.5 kB view raw
1const std = @import("std"); 2const Client = @import("Client.zig"); 3 4/// Initialize database schema and run migrations 5pub fn init(client: *Client) !void { 6 try createTables(client); 7 try runMigrations(client); 8 std.debug.print("schema initialized\n", .{}); 9} 10 11fn createTables(client: *Client) !void { 12 try client.exec( 13 \\CREATE TABLE IF NOT EXISTS documents ( 14 \\ uri TEXT PRIMARY KEY, 15 \\ did TEXT NOT NULL, 16 \\ rkey TEXT NOT NULL, 17 \\ title TEXT NOT NULL, 18 \\ content TEXT NOT NULL, 19 \\ created_at TEXT, 20 \\ publication_uri TEXT 21 \\) 22 , &.{}); 23 24 try client.exec( 25 \\CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( 26 \\ uri UNINDEXED, 27 \\ title, 28 \\ content 29 \\) 30 , &.{}); 31 32 try client.exec( 33 \\CREATE TABLE IF NOT EXISTS publications ( 34 \\ uri TEXT PRIMARY KEY, 35 \\ did TEXT NOT NULL, 36 \\ rkey TEXT NOT NULL, 37 \\ name TEXT NOT NULL, 38 \\ description TEXT, 39 \\ base_path TEXT 40 \\) 41 , &.{}); 42 43 try client.exec( 44 \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5( 45 \\ uri UNINDEXED, 46 \\ name, 47 \\ description 48 \\) 49 , &.{}); 50 51 try client.exec( 52 \\CREATE TABLE IF NOT EXISTS document_tags ( 53 \\ document_uri TEXT NOT NULL, 54 \\ tag TEXT NOT NULL, 55 \\ PRIMARY KEY (document_uri, tag) 56 \\) 57 , &.{}); 58 59 client.exec( 60 "CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)", 61 &.{}, 62 ) catch {}; 63 64 // stats table: single row for lifetime counters 65 try client.exec( 66 \\CREATE TABLE IF NOT EXISTS stats ( 67 \\ id INTEGER PRIMARY KEY CHECK (id = 1), 68 \\ total_searches INTEGER DEFAULT 0, 69 \\ total_errors INTEGER DEFAULT 0, 70 \\ service_started_at INTEGER 71 \\) 72 , &.{}); 73 74 // ensure the single row exists 75 client.exec("INSERT OR IGNORE INTO stats (id) VALUES (1)", &.{}) catch {}; 76 77 // set service_started_at if not already set (first run ever) 78 var ts_buf: [20]u8 = undefined; 79 const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{std.time.timestamp()}) catch "0"; 80 client.exec( 81 "UPDATE stats SET service_started_at = ? WHERE id = 1 AND service_started_at IS NULL", 82 &.{ts_str}, 83 ) catch {}; 84 85 // popular searches tracking 86 try client.exec( 87 \\CREATE TABLE IF NOT EXISTS popular_searches ( 88 \\ query TEXT PRIMARY KEY, 89 \\ count INTEGER DEFAULT 1 90 \\) 91 , &.{}); 92 93 // tombstones for deleted records 94 try client.exec( 95 \\CREATE TABLE IF NOT EXISTS tombstones ( 96 \\ uri TEXT PRIMARY KEY, 97 \\ record_type TEXT NOT NULL, 98 \\ deleted_at INTEGER NOT NULL 99 \\) 100 , &.{}); 101 102 // similarity cache: stores precomputed similar documents 103 // invalidated when doc_count changes (new docs added/removed) 104 try client.exec( 105 \\CREATE TABLE IF NOT EXISTS similarity_cache ( 106 \\ source_uri TEXT PRIMARY KEY, 107 \\ results TEXT NOT NULL, 108 \\ doc_count INTEGER NOT NULL, 109 \\ computed_at INTEGER NOT NULL 110 \\) 111 , &.{}); 112} 113 114fn runMigrations(client: *Client) !void { 115 // these may fail if columns already exist - that's fine 116 client.exec("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch {}; 117 client.exec("ALTER TABLE publications ADD COLUMN base_path TEXT", &.{}) catch {}; 118 client.exec("ALTER TABLE stats ADD COLUMN service_started_at INTEGER", &.{}) catch {}; 119 client.exec("ALTER TABLE stats ADD COLUMN cache_hits INTEGER DEFAULT 0", &.{}) catch {}; 120 client.exec("ALTER TABLE stats ADD COLUMN cache_misses INTEGER DEFAULT 0", &.{}) catch {}; 121 122 // multi-platform support: track source platform and collection 123 client.exec("ALTER TABLE documents ADD COLUMN platform TEXT DEFAULT 'leaflet'", &.{}) catch {}; 124 client.exec("ALTER TABLE documents ADD COLUMN source_collection TEXT DEFAULT 'pub.leaflet.document'", &.{}) catch {}; 125 126 // backfill existing records (idempotent - only updates NULLs) 127 client.exec("UPDATE documents SET platform = 'leaflet' WHERE platform IS NULL", &.{}) catch {}; 128 client.exec("UPDATE documents SET source_collection = 'pub.leaflet.document' WHERE source_collection IS NULL", &.{}) catch {}; 129 130 // vector embeddings column already added by backfill script 131}