backend/src/db/schema.zig at multi-platform-schema

zzstoatzz.io / leaflet-search
fork atom
search for standard sites pub-search.waow.tech
search zig blog atproto
fork atom
leaflet-search / backend / src / db / schema.zig
at multi-platform-schema 131 lines 4.5 kB view raw
wrap content
zzstoatzz.io add platform and source_collection columns to documents table 2mo ago
588921c1
  1const std = @import("std");
  2const Client = @import("Client.zig");
  3
  4/// Initialize database schema and run migrations
  5pub fn init(client: *Client) !void {
  6    try createTables(client);
  7    try runMigrations(client);
  8    std.debug.print("schema initialized\n", .{});
  9}
 10
 11fn createTables(client: *Client) !void {
 12    try client.exec(
 13        \\CREATE TABLE IF NOT EXISTS documents (
 14        \\  uri TEXT PRIMARY KEY,
 15        \\  did TEXT NOT NULL,
 16        \\  rkey TEXT NOT NULL,
 17        \\  title TEXT NOT NULL,
 18        \\  content TEXT NOT NULL,
 19        \\  created_at TEXT,
 20        \\  publication_uri TEXT
 21        \\)
 22    , &.{});
 23
 24    try client.exec(
 25        \\CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
 26        \\  uri UNINDEXED,
 27        \\  title,
 28        \\  content
 29        \\)
 30    , &.{});
 31
 32    try client.exec(
 33        \\CREATE TABLE IF NOT EXISTS publications (
 34        \\  uri TEXT PRIMARY KEY,
 35        \\  did TEXT NOT NULL,
 36        \\  rkey TEXT NOT NULL,
 37        \\  name TEXT NOT NULL,
 38        \\  description TEXT,
 39        \\  base_path TEXT
 40        \\)
 41    , &.{});
 42
 43    try client.exec(
 44        \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5(
 45        \\  uri UNINDEXED,
 46        \\  name,
 47        \\  description
 48        \\)
 49    , &.{});
 50
 51    try client.exec(
 52        \\CREATE TABLE IF NOT EXISTS document_tags (
 53        \\  document_uri TEXT NOT NULL,
 54        \\  tag TEXT NOT NULL,
 55        \\  PRIMARY KEY (document_uri, tag)
 56        \\)
 57    , &.{});
 58
 59    client.exec(
 60        "CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag)",
 61        &.{},
 62    ) catch {};
 63
 64    // stats table: single row for lifetime counters
 65    try client.exec(
 66        \\CREATE TABLE IF NOT EXISTS stats (
 67        \\  id INTEGER PRIMARY KEY CHECK (id = 1),
 68        \\  total_searches INTEGER DEFAULT 0,
 69        \\  total_errors INTEGER DEFAULT 0,
 70        \\  service_started_at INTEGER
 71        \\)
 72    , &.{});
 73
 74    // ensure the single row exists
 75    client.exec("INSERT OR IGNORE INTO stats (id) VALUES (1)", &.{}) catch {};
 76
 77    // set service_started_at if not already set (first run ever)
 78    var ts_buf: [20]u8 = undefined;
 79    const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{std.time.timestamp()}) catch "0";
 80    client.exec(
 81        "UPDATE stats SET service_started_at = ? WHERE id = 1 AND service_started_at IS NULL",
 82        &.{ts_str},
 83    ) catch {};
 84
 85    // popular searches tracking
 86    try client.exec(
 87        \\CREATE TABLE IF NOT EXISTS popular_searches (
 88        \\  query TEXT PRIMARY KEY,
 89        \\  count INTEGER DEFAULT 1
 90        \\)
 91    , &.{});
 92
 93    // tombstones for deleted records
 94    try client.exec(
 95        \\CREATE TABLE IF NOT EXISTS tombstones (
 96        \\  uri TEXT PRIMARY KEY,
 97        \\  record_type TEXT NOT NULL,
 98        \\  deleted_at INTEGER NOT NULL
 99        \\)
100    , &.{});
101
102    // similarity cache: stores precomputed similar documents
103    // invalidated when doc_count changes (new docs added/removed)
104    try client.exec(
105        \\CREATE TABLE IF NOT EXISTS similarity_cache (
106        \\  source_uri TEXT PRIMARY KEY,
107        \\  results TEXT NOT NULL,
108        \\  doc_count INTEGER NOT NULL,
109        \\  computed_at INTEGER NOT NULL
110        \\)
111    , &.{});
112}
113
114fn runMigrations(client: *Client) !void {
115    // these may fail if columns already exist - that's fine
116    client.exec("ALTER TABLE documents ADD COLUMN publication_uri TEXT", &.{}) catch {};
117    client.exec("ALTER TABLE publications ADD COLUMN base_path TEXT", &.{}) catch {};
118    client.exec("ALTER TABLE stats ADD COLUMN service_started_at INTEGER", &.{}) catch {};
119    client.exec("ALTER TABLE stats ADD COLUMN cache_hits INTEGER DEFAULT 0", &.{}) catch {};
120    client.exec("ALTER TABLE stats ADD COLUMN cache_misses INTEGER DEFAULT 0", &.{}) catch {};
121
122    // multi-platform support: track source platform and collection
123    client.exec("ALTER TABLE documents ADD COLUMN platform TEXT DEFAULT 'leaflet'", &.{}) catch {};
124    client.exec("ALTER TABLE documents ADD COLUMN source_collection TEXT DEFAULT 'pub.leaflet.document'", &.{}) catch {};
125
126    // backfill existing records (idempotent - only updates NULLs)
127    client.exec("UPDATE documents SET platform = 'leaflet' WHERE platform IS NULL", &.{}) catch {};
128    client.exec("UPDATE documents SET source_collection = 'pub.leaflet.document' WHERE source_collection IS NULL", &.{}) catch {};
129
130    // vector embeddings column already added by backfill script
131}