search for standard sites pub-search.waow.tech
search zig blog atproto

restore voyage-3-lite embedder + /similar endpoint with cached cosine similarity

re-enables the pre-a6660bc setup: background embedder (1 worker, 20 batch,
512-dim voyage-3-lite) and brute-force cosine /similar with two-level cache
(local SQLite + turso). keyword search path is untouched.

cleared stale embeddings in turso before deploy so embedder starts fresh.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+177 -3
+3
backend/src/main.zig
··· 82 82 // start stats buffer (background sync to Turso) 83 83 metrics.buffer.init(); 84 84 85 + // start embedder (voyage-3-lite, 512 dims, 1 worker) 86 + ingest.embedder.start(allocator); 87 + 85 88 // start tap consumer 86 89 ingest.tap.consumer(allocator); 87 90 }
+152
backend/src/search.zig
··· 4 4 const zql = @import("zql"); 5 5 const logfire = @import("logfire"); 6 6 const db = @import("db/mod.zig"); 7 + const metrics = @import("metrics.zig"); 8 + 9 + // cached embedded doc count (refresh every 5 minutes) 10 + var cached_doc_count: std.atomic.Value(i64) = std.atomic.Value(i64).init(0); 11 + var doc_count_updated_at: std.atomic.Value(i64) = std.atomic.Value(i64).init(0); 12 + const DOC_COUNT_CACHE_SECS = 300; // 5 minutes 7 13 8 14 // JSON output type for search results 9 15 const SearchResultJson = struct { ··· 585 591 return &.{}; 586 592 } 587 593 594 + /// Find documents similar to a given document using vector similarity 595 + /// Uses brute-force cosine distance with caching (cache invalidated when doc count changes) 596 + pub fn findSimilar(alloc: Allocator, uri: []const u8, limit: usize) ![]const u8 { 597 + const c = db.getClient() orelse return error.NotInitialized; 598 + 599 + // get cached doc count (rarely hits Turso - refreshes every 5 min) 600 + const doc_count = getEmbeddedDocCountCached(c) orelse return error.QueryFailed; 601 + 602 + // check LOCAL cache first (instant) 603 + if (db.getLocalDb()) |local| { 604 + if (getCachedSimilarLocal(alloc, local, uri, doc_count)) |cached| { 605 + metrics.stats.recordCacheHit(); 606 + return cached; 607 + } 608 + } 609 + 610 + // check Turso cache (slower, but needed if local empty) 611 + if (getCachedSimilar(alloc, c, uri, doc_count)) |cached| { 612 + metrics.stats.recordCacheHit(); 613 + // also write to local cache for next time 614 + if (db.getLocalDb()) |local| { 615 + cacheSimilarResultsLocal(local, uri, cached, doc_count); 616 + } 617 + return cached; 618 + } 619 + metrics.stats.recordCacheMiss(); 620 + 621 + // cache miss - compute similarity 622 + var output: std.Io.Writer.Allocating = .init(alloc); 623 + errdefer output.deinit(); 624 + 625 + var limit_buf: [8]u8 = undefined; 626 + const limit_str = std.fmt.bufPrint(&limit_buf, "{d}", .{limit}) catch "5"; 627 + 628 + // brute-force cosine similarity search (no vector index needed) 629 + var res = c.query( 630 + \\SELECT d2.uri, d2.did, d2.title, '' as snippet, 631 + \\ d2.created_at, d2.rkey, d2.base_path, d2.has_publication, 632 + \\ d2.platform, COALESCE(d2.path, '') as path 633 + \\FROM documents d1, documents d2 634 + \\WHERE d1.uri = ? 635 + \\ AND d2.uri != d1.uri 636 + \\ AND d1.embedding IS NOT NULL 637 + \\ AND d2.embedding IS NOT NULL 638 + \\ORDER BY vector_distance_cos(d1.embedding, d2.embedding) 639 + \\LIMIT ? 640 + , &.{ uri, limit_str }) catch { 641 + try output.writer.writeAll("[]"); 642 + return try output.toOwnedSlice(); 643 + }; 644 + defer res.deinit(); 645 + 646 + var jw: json.Stringify = .{ .writer = &output.writer }; 647 + try jw.beginArray(); 648 + for (res.rows) |row| try jw.write(Doc.fromRow(row).toJson()); 649 + try jw.endArray(); 650 + 651 + const results = try output.toOwnedSlice(); 652 + 653 + // cache to LOCAL db (instant) 654 + if (db.getLocalDb()) |local| { 655 + cacheSimilarResultsLocal(local, uri, results, doc_count); 656 + } 657 + 658 + // cache to Turso (fire and forget - still useful for durability) 659 + cacheSimilarResults(c, uri, results, doc_count); 660 + 661 + return results; 662 + } 663 + 664 + fn getEmbeddedDocCount(c: *db.Client) ?i64 { 665 + var res = c.query("SELECT COUNT(*) FROM documents WHERE embedding IS NOT NULL", &.{}) catch return null; 666 + defer res.deinit(); 667 + if (res.rows.len == 0) return null; 668 + return res.rows[0].int(0); 669 + } 670 + 671 + fn getEmbeddedDocCountCached(c: *db.Client) ?i64 { 672 + const now = std.time.timestamp(); 673 + const last_update = doc_count_updated_at.load(.acquire); 674 + 675 + // use cached value if fresh enough 676 + if (now - last_update < DOC_COUNT_CACHE_SECS) { 677 + const cached = cached_doc_count.load(.acquire); 678 + if (cached > 0) return cached; 679 + } 680 + 681 + // refresh from Turso 682 + const count = getEmbeddedDocCount(c) orelse return null; 683 + cached_doc_count.store(count, .release); 684 + doc_count_updated_at.store(now, .release); 685 + return count; 686 + } 687 + 688 + fn getCachedSimilar(alloc: Allocator, c: *db.Client, uri: []const u8, current_doc_count: i64) ?[]const u8 { 689 + var count_buf: [20]u8 = undefined; 690 + const count_str = std.fmt.bufPrint(&count_buf, "{d}", .{current_doc_count}) catch return null; 691 + 692 + var res = c.query( 693 + "SELECT results FROM similarity_cache WHERE source_uri = ? AND doc_count = ?", 694 + &.{ uri, count_str }, 695 + ) catch return null; 696 + defer res.deinit(); 697 + 698 + if (res.rows.len == 0) return null; 699 + return alloc.dupe(u8, res.rows[0].text(0)) catch null; 700 + } 701 + 702 + fn cacheSimilarResults(c: *db.Client, uri: []const u8, results: []const u8, doc_count: i64) void { 703 + var count_buf: [20]u8 = undefined; 704 + const count_str = std.fmt.bufPrint(&count_buf, "{d}", .{doc_count}) catch return; 705 + 706 + var ts_buf: [20]u8 = undefined; 707 + const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{std.time.timestamp()}) catch return; 708 + 709 + c.exec( 710 + "INSERT OR REPLACE INTO similarity_cache (source_uri, results, doc_count, computed_at) VALUES (?, ?, ?, ?)", 711 + &.{ uri, results, count_str, ts_str }, 712 + ) catch {}; 713 + } 714 + 715 + fn getCachedSimilarLocal(alloc: Allocator, local: *db.LocalDb, uri: []const u8, current_doc_count: i64) ?[]const u8 { 716 + var rows = local.query( 717 + "SELECT results, doc_count FROM similarity_cache WHERE source_uri = ?", 718 + .{uri}, 719 + ) catch return null; 720 + defer rows.deinit(); 721 + 722 + const row = rows.next() orelse return null; 723 + // check doc_count matches for cache validity 724 + if (row.int(1) != current_doc_count) return null; 725 + return alloc.dupe(u8, row.text(0)) catch null; 726 + } 727 + 728 + fn cacheSimilarResultsLocal(local: *db.LocalDb, uri: []const u8, results: []const u8, doc_count: i64) void { 729 + var count_buf: [20]u8 = undefined; 730 + const count_str = std.fmt.bufPrint(&count_buf, "{d}", .{doc_count}) catch return; 731 + 732 + var ts_buf: [20]u8 = undefined; 733 + const ts_str = std.fmt.bufPrint(&ts_buf, "{d}", .{std.time.timestamp()}) catch return; 734 + 735 + local.exec( 736 + "INSERT OR REPLACE INTO similarity_cache (source_uri, results, doc_count, computed_at) VALUES (?, ?, ?, ?)", 737 + .{ uri, results, count_str, ts_str }, 738 + ) catch {}; 739 + } 588 740 589 741 /// Build FTS5 query with OR between terms: "cat dog" -> "cat OR dog*" 590 742 /// Uses OR for better recall with BM25 ranking (more matches = higher score)
+22 -3
backend/src/server.zig
··· 406 406 } 407 407 408 408 fn handleSimilar(request: *http.Server.Request, target: []const u8) !void { 409 - _ = target; 410 - // disabled: vector similarity search was saturating Turso with brute-force cosine queries 411 - try sendJson(request, "[]"); 409 + const start_time = std.time.microTimestamp(); 410 + defer metrics.timing.record(.similar, start_time); 411 + 412 + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 413 + defer arena.deinit(); 414 + const alloc = arena.allocator(); 415 + 416 + const uri = parseQueryParam(alloc, target, "uri") catch { 417 + try sendJson(request, "{\"error\":\"missing uri parameter\"}"); 418 + return; 419 + }; 420 + 421 + // span attributes are copied internally, safe to use arena strings 422 + const span = logfire.span("http.similar", .{ .uri = uri }); 423 + defer span.end(); 424 + 425 + const results = search.findSimilar(alloc, uri, 5) catch { 426 + try sendJson(request, "[]"); 427 + return; 428 + }; 429 + 430 + try sendJson(request, results); 412 431 } 413 432 414 433 fn handleActivity(request: *http.Server.Request) !void {