search for standard sites pub-search.waow.tech
search zig blog atproto

feat: add author filter across the stack

- backend: parse ?author= param, resolve handles via zat HandleResolver
(HTTP .well-known + DNS-over-HTTPS fallback), post-filter all search
modes by DID, add DocsByAuthor/DocsByAuthorAndPlatform for browse-by-author
- frontend: currentAuthor state, setAuthor/clearAuthor, author chip in
active filters, click author name to filter, ?author= URL param support
- MCP: author param on search/search_semantic/search_hybrid tools
- OG tags: pass author through to title/description/image URL
- deps: upgrade zat v0.1.9 → v0.2.13, align websocket to zat's fork

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+219 -36
+4 -4
backend/build.zig.zon
··· 5 .minimum_zig_version = "0.15.0", 6 .dependencies = .{ 7 .websocket = .{ 8 - .url = "https://github.com/karlseguin/websocket.zig/archive/refs/heads/master.tar.gz", 9 - .hash = "websocket-0.1.0-ZPISdRlzAwBB_Bz2UMMqxYqF6YEVTIBoFsbzwPUJTHIc", 10 }, 11 .zql = .{ 12 .url = "https://github.com/zzstoatzz/zql/archive/main.tar.gz", 13 .hash = "zql-0.0.1-alpha-xNRI4IRNAABUb9gLat5FWUaZDD5HvxAxet_-elgR_A_y", 14 }, 15 .zat = .{ 16 - .url = "https://tangled.sh/zat.dev/zat/archive/v0.1.9", 17 - .hash = "zat-0.1.9-5PuC7tL5AwAgHHJXdOHTCy373NtwQW7cE2nfB7rq4yx_", 18 }, 19 .zqlite = .{ 20 .url = "https://github.com/karlseguin/zqlite.zig/archive/refs/heads/master.tar.gz",
··· 5 .minimum_zig_version = "0.15.0", 6 .dependencies = .{ 7 .websocket = .{ 8 + .url = "https://github.com/zzstoatzz/websocket.zig/archive/9e6d732b207bdb0cb5fe5efb37a8173ac9638051.tar.gz", 9 + .hash = "websocket-0.1.0-ZPISdeJ2AwC8rczCVo9NwFzIzW7EdvoXlNkNR_P-bdaf", 10 }, 11 .zql = .{ 12 .url = "https://github.com/zzstoatzz/zql/archive/main.tar.gz", 13 .hash = "zql-0.0.1-alpha-xNRI4IRNAABUb9gLat5FWUaZDD5HvxAxet_-elgR_A_y", 14 }, 15 .zat = .{ 16 + .url = "https://tangled.sh/zat.dev/zat/archive/v0.2.13", 17 + .hash = "zat-0.2.13-5PuC7tDBBAAchi_u_Myjr1hVhDbOollod03nbXqXHFn_", 18 }, 19 .zqlite = .{ 20 .url = "https://github.com/karlseguin/zqlite.zig/archive/refs/heads/master.tar.gz",
+28 -2
backend/src/server.zig
··· 6 const Allocator = mem.Allocator; 7 const logfire = @import("logfire"); 8 const zql = @import("zql"); 9 const db = @import("db.zig"); 10 const metrics = @import("metrics.zig"); 11 const search = @import("server/search.zig"); ··· 105 const tag_filter = parseQueryParam(alloc, target, "tag") catch null; 106 const platform_filter = parseQueryParam(alloc, target, "platform") catch null; 107 const since_filter = parseQueryParam(alloc, target, "since") catch null; 108 const mode_str = parseQueryParam(alloc, target, "mode") catch null; 109 const mode = search.SearchMode.fromString(mode_str); 110 const format = parseQueryParam(alloc, target, "format") catch "v1"; ··· 113 const limit = if (limit_str) |s| std.fmt.parseInt(usize, s, 10) catch 20 else 20; 114 const offset = if (offset_str) |s| std.fmt.parseInt(usize, s, 10) catch 0 else 0; 115 116 // record per-mode latency 117 const timing_endpoint: metrics.timing.Endpoint = switch (mode) { 118 .keyword => .search_keyword, ··· 126 .query = query, 127 .tag = tag_filter, 128 .platform = platform_filter, 129 .mode = @tagName(mode), 130 }); 131 defer span.end(); 132 133 - if (query.len == 0 and tag_filter == null) { 134 try sendJson(request, "{\"error\":\"enter a search term\"}"); 135 return; 136 } 137 138 // perform search - arena handles cleanup 139 - const results = search.search(alloc, query, tag_filter, platform_filter, since_filter, mode) catch |err| { 140 logfire.err("search failed: {}", .{err}); 141 metrics.stats.recordError(); 142 return err; ··· 564 } 565 try jw.endArray(); 566 return try output.toOwnedSlice(); 567 } 568 569 fn handleActivity(request: *http.Server.Request) !void {
··· 6 const Allocator = mem.Allocator; 7 const logfire = @import("logfire"); 8 const zql = @import("zql"); 9 + const zat = @import("zat"); 10 const db = @import("db.zig"); 11 const metrics = @import("metrics.zig"); 12 const search = @import("server/search.zig"); ··· 106 const tag_filter = parseQueryParam(alloc, target, "tag") catch null; 107 const platform_filter = parseQueryParam(alloc, target, "platform") catch null; 108 const since_filter = parseQueryParam(alloc, target, "since") catch null; 109 + const author_param = parseQueryParam(alloc, target, "author") catch null; 110 const mode_str = parseQueryParam(alloc, target, "mode") catch null; 111 const mode = search.SearchMode.fromString(mode_str); 112 const format = parseQueryParam(alloc, target, "format") catch "v1"; ··· 115 const limit = if (limit_str) |s| std.fmt.parseInt(usize, s, 10) catch 20 else 20; 116 const offset = if (offset_str) |s| std.fmt.parseInt(usize, s, 10) catch 0 else 0; 117 118 + // resolve author param: if it's a handle (not a DID), resolve via AT Protocol 119 + const author_filter: ?[]const u8 = if (author_param) |ap| blk: { 120 + if (mem.startsWith(u8, ap, "did:")) break :blk ap; 121 + break :blk resolveHandle(alloc, ap) catch null; 122 + } else null; 123 + 124 // record per-mode latency 125 const timing_endpoint: metrics.timing.Endpoint = switch (mode) { 126 .keyword => .search_keyword, ··· 134 .query = query, 135 .tag = tag_filter, 136 .platform = platform_filter, 137 + .author = author_filter, 138 .mode = @tagName(mode), 139 }); 140 defer span.end(); 141 142 + if (query.len == 0 and tag_filter == null and author_filter == null) { 143 try sendJson(request, "{\"error\":\"enter a search term\"}"); 144 return; 145 } 146 147 // perform search - arena handles cleanup 148 + const results = search.search(alloc, query, tag_filter, platform_filter, since_filter, author_filter, mode) catch |err| { 149 logfire.err("search failed: {}", .{err}); 150 metrics.stats.recordError(); 151 return err; ··· 573 } 574 try jw.endArray(); 575 return try output.toOwnedSlice(); 576 + } 577 + 578 + /// Resolve an AT Protocol handle to a DID via zat's HandleResolver. 579 + /// Tries HTTP .well-known first, falls back to DNS-over-HTTPS. 580 + fn resolveHandle(alloc: std.mem.Allocator, handle: []const u8) ![]const u8 { 581 + const parsed = zat.Handle.parse(handle) orelse { 582 + logfire.warn("resolveHandle: invalid handle: {s}", .{handle}); 583 + return error.InvalidHandle; 584 + }; 585 + 586 + var resolver = zat.HandleResolver.init(alloc); 587 + defer resolver.deinit(); 588 + 589 + return resolver.resolve(parsed) catch |err| { 590 + logfire.warn("resolveHandle: failed for {s}: {}", .{ handle, err }); 591 + return error.ResolveFailed; 592 + }; 593 } 594 595 fn handleActivity(request: *http.Server.Request) !void {
+92 -14
backend/src/server/search.zig
··· 204 \\ORDER BY d.created_at DESC LIMIT 40 205 ); 206 207 // Find documents by their publication's base_path (subdomain search) 208 // e.g., searching "gyst" finds all docs on gyst.leaflet.pub 209 // Uses recency decay: recent docs rank higher than old ones with same match ··· 321 \\ORDER BY rank + (julianday('now') - julianday(p.created_at)) / 30.0 LIMIT 10 322 ); 323 324 - pub fn search(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8, mode: SearchMode) ![]const u8 { 325 - if (mode == .hybrid) return searchHybrid(alloc, query, tag_filter, platform_filter, since_filter); 326 - if (mode == .semantic) return searchSemantic(alloc, query, platform_filter); 327 - return searchKeyword(alloc, query, tag_filter, platform_filter, since_filter); 328 } 329 330 /// Check if we've already seen a result from the same author with the same title. ··· 341 } 342 343 /// Keyword search: FTS5 via local SQLite or Turso fallback. 344 - fn searchKeyword(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8) ![]const u8 { 345 // try local SQLite first (faster for FTS queries) 346 if (db.getLocalDb()) |local| { 347 - if (searchLocal(alloc, local, query, tag_filter, platform_filter, since_filter)) |result| { 348 logfire.info("search.local hit", .{}); 349 return result; 350 } else |err| { ··· 378 var seen_authors = std.StringHashMap(void).init(alloc); 379 defer seen_authors.deinit(); 380 381 // build batch of queries to execute in single HTTP request 382 var statements: [3]db.Client.Statement = undefined; 383 var stmt_count: usize = 0; ··· 429 if (doc_sql != null) { 430 for (batch.get(query_idx)) |row| { 431 const doc = Doc.fromRow(row); 432 if (try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) continue; 433 const uri_dupe = try alloc.dupe(u8, doc.uri); 434 try seen_uris.put(uri_dupe, {}); ··· 441 if (run_basepath) { 442 for (batch.get(query_idx)) |row| { 443 const doc = Doc.fromRow(row); 444 if (!seen_uris.contains(doc.uri) and !try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) { 445 try jw.write(doc.toJson()); 446 } ··· 451 // process query 2: publication results 452 if (run_pubs) { 453 for (batch.get(query_idx)) |row| { 454 - try jw.write(Pub.fromRow(row).toJson()); 455 } 456 } 457 ··· 461 462 /// Local SQLite search (FTS queries only, no vector similarity) 463 /// Simplified version - just handles basic FTS query case to get started 464 - fn searchLocal(alloc: Allocator, local: *db.LocalDb, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8) ![]const u8 { 465 // only handle basic FTS queries for now (most common case) 466 - // fall back to Turso for complex filter combinations 467 if (query.len == 0 or tag_filter != null) { 468 return error.UnsupportedQuery; 469 } ··· 501 502 while (rows.next()) |row| { 503 const doc = Doc.fromLocalRow(row); 504 if (since_filter) |since| { 505 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 506 } ··· 526 527 while (bp_rows.next()) |row| { 528 const doc = Doc.fromLocalRow(row); 529 if (since_filter) |since| { 530 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 531 } ··· 554 var doc_count: u32 = 0; 555 while (rows.next()) |row| { 556 const doc = Doc.fromLocalRow(row); 557 if (since_filter) |since| { 558 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 559 } ··· 586 var bp_count: u32 = 0; 587 while (bp_rows.next()) |row| { 588 const doc = Doc.fromLocalRow(row); 589 if (since_filter) |since| { 590 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) { 591 bp_count += 1; ··· 617 defer iter_span.end(); 618 var pub_count: u32 = 0; 619 while (pub_rows.next()) |row| { 620 - try jw.write(Pub.fromLocalRow(row).toJson()); 621 pub_count += 1; 622 } 623 logfire.info("search.iterate.pubs_fts rows={d}", .{pub_count}); ··· 782 783 /// Hybrid search: run keyword + semantic, merge with Reciprocal Rank Fusion. 784 /// score(doc) = 1/(k + rank_keyword) + 1/(k + rank_semantic), k=60 785 - fn searchHybrid(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8) ![]const u8 { 786 if (query.len == 0) return try alloc.dupe(u8, "[]"); 787 788 const span = logfire.span("search.hybrid", .{}); 789 defer span.end(); 790 791 // 1. keyword search (~10ms via local SQLite) 792 - const kw_json = searchKeyword(alloc, query, tag_filter, platform_filter, since_filter) catch |err| blk: { 793 logfire.warn("search.hybrid: keyword failed: {}", .{err}); 794 break :blk try alloc.dupe(u8, "[]"); 795 }; 796 797 // 2. semantic search (~550ms via voyage + tpuf) 798 - const sem_json = searchSemantic(alloc, query, platform_filter) catch |err| blk: { 799 logfire.warn("search.hybrid: semantic failed: {}", .{err}); 800 break :blk try alloc.dupe(u8, "[]"); 801 }; ··· 1002 } 1003 1004 /// Semantic search: embed query via Voyage, ANN search via turbopuffer. 1005 - fn searchSemantic(alloc: Allocator, query: []const u8, platform_filter: ?[]const u8) ![]const u8 { 1006 if (query.len == 0) return try alloc.dupe(u8, "[]"); 1007 1008 if (!tpuf.isSemanticEnabled()) { ··· 1054 if (r.title.len == 0) continue; 1055 if (platform_filter) |pf| { 1056 if (!std.mem.eql(u8, r.platform, pf)) continue; 1057 } 1058 var is_dup = false; 1059 for (seen[0..seen_count]) |s| {
··· 204 \\ORDER BY d.created_at DESC LIMIT 40 205 ); 206 207 + const DocsByAuthor = zql.Query( 208 + \\SELECT d.uri, d.did, d.title, '' as snippet, 209 + \\ d.created_at, d.rkey, d.base_path, d.has_publication, 210 + \\ d.platform, COALESCE(d.path, '') as path, 211 + \\ COALESCE(d.cover_image, '') as cover_image 212 + \\FROM documents d 213 + \\WHERE d.did = :author 214 + \\ORDER BY d.created_at DESC LIMIT 40 215 + ); 216 + 217 + const DocsByAuthorAndPlatform = zql.Query( 218 + \\SELECT d.uri, d.did, d.title, '' as snippet, 219 + \\ d.created_at, d.rkey, d.base_path, d.has_publication, 220 + \\ d.platform, COALESCE(d.path, '') as path, 221 + \\ COALESCE(d.cover_image, '') as cover_image 222 + \\FROM documents d 223 + \\WHERE d.did = :author AND d.platform = :platform 224 + \\ORDER BY d.created_at DESC LIMIT 40 225 + ); 226 + 227 // Find documents by their publication's base_path (subdomain search) 228 // e.g., searching "gyst" finds all docs on gyst.leaflet.pub 229 // Uses recency decay: recent docs rank higher than old ones with same match ··· 341 \\ORDER BY rank + (julianday('now') - julianday(p.created_at)) / 30.0 LIMIT 10 342 ); 343 344 + pub fn search(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8, author_filter: ?[]const u8, mode: SearchMode) ![]const u8 { 345 + if (mode == .hybrid) return searchHybrid(alloc, query, tag_filter, platform_filter, since_filter, author_filter); 346 + if (mode == .semantic) return searchSemantic(alloc, query, platform_filter, author_filter); 347 + return searchKeyword(alloc, query, tag_filter, platform_filter, since_filter, author_filter); 348 } 349 350 /// Check if we've already seen a result from the same author with the same title. ··· 361 } 362 363 /// Keyword search: FTS5 via local SQLite or Turso fallback. 364 + fn searchKeyword(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8, author_filter: ?[]const u8) ![]const u8 { 365 // try local SQLite first (faster for FTS queries) 366 if (db.getLocalDb()) |local| { 367 + if (searchLocal(alloc, local, query, tag_filter, platform_filter, since_filter, author_filter)) |result| { 368 logfire.info("search.local hit", .{}); 369 return result; 370 } else |err| { ··· 398 var seen_authors = std.StringHashMap(void).init(alloc); 399 defer seen_authors.deinit(); 400 401 + // author-only browse: no FTS query needed, just fetch by DID 402 + if (author_filter != null and !has_query and !has_tag) { 403 + if (has_platform) { 404 + var res = c.query(DocsByAuthorAndPlatform.positional, &.{ author_filter.?, platform_filter.? }) catch { 405 + try jw.endArray(); 406 + return try output.toOwnedSlice(); 407 + }; 408 + defer res.deinit(); 409 + for (res.rows) |row| { 410 + const doc = Doc.fromRow(row); 411 + if (try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) continue; 412 + try jw.write(doc.toJson()); 413 + } 414 + } else { 415 + var res = c.query(DocsByAuthor.positional, &.{author_filter.?}) catch { 416 + try jw.endArray(); 417 + return try output.toOwnedSlice(); 418 + }; 419 + defer res.deinit(); 420 + for (res.rows) |row| { 421 + const doc = Doc.fromRow(row); 422 + if (try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) continue; 423 + try jw.write(doc.toJson()); 424 + } 425 + } 426 + try jw.endArray(); 427 + return try output.toOwnedSlice(); 428 + } 429 + 430 // build batch of queries to execute in single HTTP request 431 var statements: [3]db.Client.Statement = undefined; 432 var stmt_count: usize = 0; ··· 478 if (doc_sql != null) { 479 for (batch.get(query_idx)) |row| { 480 const doc = Doc.fromRow(row); 481 + if (author_filter) |af| { 482 + if (!std.mem.eql(u8, doc.did, af)) continue; 483 + } 484 if (try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) continue; 485 const uri_dupe = try alloc.dupe(u8, doc.uri); 486 try seen_uris.put(uri_dupe, {}); ··· 493 if (run_basepath) { 494 for (batch.get(query_idx)) |row| { 495 const doc = Doc.fromRow(row); 496 + if (author_filter) |af| { 497 + if (!std.mem.eql(u8, doc.did, af)) continue; 498 + } 499 if (!seen_uris.contains(doc.uri) and !try isDuplicateAuthorTitle(&seen_authors, alloc, doc.did, doc.title)) { 500 try jw.write(doc.toJson()); 501 } ··· 506 // process query 2: publication results 507 if (run_pubs) { 508 for (batch.get(query_idx)) |row| { 509 + const pub_result = Pub.fromRow(row); 510 + if (author_filter) |af| { 511 + if (!std.mem.eql(u8, pub_result.did, af)) continue; 512 + } 513 + try jw.write(pub_result.toJson()); 514 } 515 } 516 ··· 520 521 /// Local SQLite search (FTS queries only, no vector similarity) 522 /// Simplified version - just handles basic FTS query case to get started 523 + fn searchLocal(alloc: Allocator, local: *db.LocalDb, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8, author_filter: ?[]const u8) ![]const u8 { 524 // only handle basic FTS queries for now (most common case) 525 + // fall back to Turso for complex filter combinations and author-only browse 526 if (query.len == 0 or tag_filter != null) { 527 return error.UnsupportedQuery; 528 } ··· 560 561 while (rows.next()) |row| { 562 const doc = Doc.fromLocalRow(row); 563 + if (author_filter) |af| { 564 + if (!std.mem.eql(u8, doc.did, af)) continue; 565 + } 566 if (since_filter) |since| { 567 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 568 } ··· 588 589 while (bp_rows.next()) |row| { 590 const doc = Doc.fromLocalRow(row); 591 + if (author_filter) |af| { 592 + if (!std.mem.eql(u8, doc.did, af)) continue; 593 + } 594 if (since_filter) |since| { 595 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 596 } ··· 619 var doc_count: u32 = 0; 620 while (rows.next()) |row| { 621 const doc = Doc.fromLocalRow(row); 622 + if (author_filter) |af| { 623 + if (!std.mem.eql(u8, doc.did, af)) continue; 624 + } 625 if (since_filter) |since| { 626 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) continue; 627 } ··· 654 var bp_count: u32 = 0; 655 while (bp_rows.next()) |row| { 656 const doc = Doc.fromLocalRow(row); 657 + if (author_filter) |af| { 658 + if (!std.mem.eql(u8, doc.did, af)) { bp_count += 1; continue; } 659 + } 660 if (since_filter) |since| { 661 if (doc.createdAt.len > 0 and std.mem.order(u8, doc.createdAt, since) == .lt) { 662 bp_count += 1; ··· 688 defer iter_span.end(); 689 var pub_count: u32 = 0; 690 while (pub_rows.next()) |row| { 691 + const pub_result = Pub.fromLocalRow(row); 692 + if (author_filter) |af| { 693 + if (!std.mem.eql(u8, pub_result.did, af)) { pub_count += 1; continue; } 694 + } 695 + try jw.write(pub_result.toJson()); 696 pub_count += 1; 697 } 698 logfire.info("search.iterate.pubs_fts rows={d}", .{pub_count}); ··· 857 858 /// Hybrid search: run keyword + semantic, merge with Reciprocal Rank Fusion. 859 /// score(doc) = 1/(k + rank_keyword) + 1/(k + rank_semantic), k=60 860 + fn searchHybrid(alloc: Allocator, query: []const u8, tag_filter: ?[]const u8, platform_filter: ?[]const u8, since_filter: ?[]const u8, author_filter: ?[]const u8) ![]const u8 { 861 if (query.len == 0) return try alloc.dupe(u8, "[]"); 862 863 const span = logfire.span("search.hybrid", .{}); 864 defer span.end(); 865 866 // 1. keyword search (~10ms via local SQLite) 867 + const kw_json = searchKeyword(alloc, query, tag_filter, platform_filter, since_filter, author_filter) catch |err| blk: { 868 logfire.warn("search.hybrid: keyword failed: {}", .{err}); 869 break :blk try alloc.dupe(u8, "[]"); 870 }; 871 872 // 2. semantic search (~550ms via voyage + tpuf) 873 + const sem_json = searchSemantic(alloc, query, platform_filter, author_filter) catch |err| blk: { 874 logfire.warn("search.hybrid: semantic failed: {}", .{err}); 875 break :blk try alloc.dupe(u8, "[]"); 876 }; ··· 1077 } 1078 1079 /// Semantic search: embed query via Voyage, ANN search via turbopuffer. 1080 + fn searchSemantic(alloc: Allocator, query: []const u8, platform_filter: ?[]const u8, author_filter: ?[]const u8) ![]const u8 { 1081 if (query.len == 0) return try alloc.dupe(u8, "[]"); 1082 1083 if (!tpuf.isSemanticEnabled()) { ··· 1129 if (r.title.len == 0) continue; 1130 if (platform_filter) |pf| { 1131 if (!std.mem.eql(u8, r.platform, pf)) continue; 1132 + } 1133 + if (author_filter) |af| { 1134 + if (!std.mem.eql(u8, r.did, af)) continue; 1135 } 1136 var is_dup = false; 1137 for (seen[0..seen_count]) |s| {
+15 -2
mcp/src/pub_search/server.py
··· 27 28 ## tools 29 30 - - `search(query, tag, platform, since)` - keyword search with filters 31 - `search_semantic(query)` - meaning-based search (natural language queries) 32 - `search_hybrid(query)` - combined keyword + semantic with source annotations 33 - `get_document(uri)` - fetch full content by AT-URI ··· 66 67 - prefix matching on last word: "cat dog" matches "cat dogs" 68 - combine filters: `search("python", tag="tutorial", platform="leaflet")` 69 - use `since="2025-01-01"` for recent content 70 - `search_semantic("natural language query")` for meaning-based search 71 - `search_hybrid("query")` for best of both — results show `source` field ··· 97 tag: str | None = None, 98 platform: Platform | None = None, 99 since: str | None = None, 100 limit: int = 5, 101 ) -> list[SearchResult]: 102 """search documents and publications. ··· 106 tag: filter by tag 107 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 108 since: ISO date - only documents created after this date 109 limit: max results (default 5, max 40) 110 111 returns: 112 list of results with uri, title, snippet, platform, and web url 113 """ 114 - if not query and not tag: 115 return [] 116 117 params: dict[str, Any] = {"format": "v2", "limit": str(limit)} ··· 123 params["platform"] = platform 124 if since: 125 params["since"] = since 126 127 async with get_http_client() as client: 128 response = await client.get("/search", params=params) ··· 137 async def search_semantic( 138 query: str, 139 platform: Platform | None = None, 140 limit: int = 5, 141 ) -> list[SearchResult]: 142 """semantic search using vector embeddings. ··· 148 args: 149 query: natural language query 150 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 151 limit: max results (default 5, max 40) 152 153 returns: ··· 156 params: dict[str, Any] = {"q": query, "mode": "semantic", "format": "v2", "limit": str(limit)} 157 if platform: 158 params["platform"] = platform 159 160 async with get_http_client() as client: 161 response = await client.get("/search", params=params) ··· 173 async def search_hybrid( 174 query: str, 175 platform: Platform | None = None, 176 limit: int = 5, 177 ) -> list[SearchResult]: 178 """hybrid search combining keyword and semantic results. ··· 185 args: 186 query: search query 187 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 188 limit: max results (default 5, max 40) 189 190 returns: ··· 193 params: dict[str, Any] = {"q": query, "mode": "hybrid", "format": "v2", "limit": str(limit)} 194 if platform: 195 params["platform"] = platform 196 197 async with get_http_client() as client: 198 response = await client.get("/search", params=params)
··· 27 28 ## tools 29 30 + - `search(query, tag, platform, since, author)` - keyword search with filters 31 - `search_semantic(query)` - meaning-based search (natural language queries) 32 - `search_hybrid(query)` - combined keyword + semantic with source annotations 33 - `get_document(uri)` - fetch full content by AT-URI ··· 66 67 - prefix matching on last word: "cat dog" matches "cat dogs" 68 - combine filters: `search("python", tag="tutorial", platform="leaflet")` 69 + - filter by author: `search("python", author="nate.bsky.social")` or `search("", author="did:plc:xyz")` 70 - use `since="2025-01-01"` for recent content 71 - `search_semantic("natural language query")` for meaning-based search 72 - `search_hybrid("query")` for best of both — results show `source` field ··· 98 tag: str | None = None, 99 platform: Platform | None = None, 100 since: str | None = None, 101 + author: str | None = None, 102 limit: int = 5, 103 ) -> list[SearchResult]: 104 """search documents and publications. ··· 108 tag: filter by tag 109 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 110 since: ISO date - only documents created after this date 111 + author: filter by author (DID like "did:plc:xyz" or handle like "nate.bsky.social") 112 limit: max results (default 5, max 40) 113 114 returns: 115 list of results with uri, title, snippet, platform, and web url 116 """ 117 + if not query and not tag and not author: 118 return [] 119 120 params: dict[str, Any] = {"format": "v2", "limit": str(limit)} ··· 126 params["platform"] = platform 127 if since: 128 params["since"] = since 129 + if author: 130 + params["author"] = author 131 132 async with get_http_client() as client: 133 response = await client.get("/search", params=params) ··· 142 async def search_semantic( 143 query: str, 144 platform: Platform | None = None, 145 + author: str | None = None, 146 limit: int = 5, 147 ) -> list[SearchResult]: 148 """semantic search using vector embeddings. ··· 154 args: 155 query: natural language query 156 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 157 + author: filter by author (DID like "did:plc:xyz" or handle like "nate.bsky.social") 158 limit: max results (default 5, max 40) 159 160 returns: ··· 163 params: dict[str, Any] = {"q": query, "mode": "semantic", "format": "v2", "limit": str(limit)} 164 if platform: 165 params["platform"] = platform 166 + if author: 167 + params["author"] = author 168 169 async with get_http_client() as client: 170 response = await client.get("/search", params=params) ··· 182 async def search_hybrid( 183 query: str, 184 platform: Platform | None = None, 185 + author: str | None = None, 186 limit: int = 5, 187 ) -> list[SearchResult]: 188 """hybrid search combining keyword and semantic results. ··· 195 args: 196 query: search query 197 platform: filter by platform (leaflet, pckt, offprint, greengale, whitewind, other) 198 + author: filter by author (DID like "did:plc:xyz" or handle like "nate.bsky.social") 199 limit: max results (default 5, max 40) 200 201 returns: ··· 204 params: dict[str, Any] = {"q": query, "mode": "hybrid", "format": "v2", "limit": str(limit)} 205 if platform: 206 params["platform"] = platform 207 + if author: 208 + params["author"] = author 209 210 async with get_http_client() as client: 211 response = await client.get("/search", params=params)
+7 -3
site/functions/[[path]].js
··· 24 25 let suffix = ''; 26 const modifiers = []; 27 if (params.platform) modifiers.push(`on ${params.platform}`); 28 if (params.since) { 29 const preset = presetFromSince(params.since); ··· 47 else if (params.tag) parts.push(`documents tagged #${params.tag}`); 48 else parts.push('search results'); 49 50 if (params.platform) parts.push(`on ${params.platform}`); 51 if (params.since) { 52 const preset = presetFromSince(params.since); ··· 71 const tag = url.searchParams.get('tag'); 72 const platform = url.searchParams.get('platform'); 73 const since = url.searchParams.get('since'); 74 const mode = url.searchParams.get('mode'); 75 76 // if no search params, pass through (static tags in index.html are fine) 77 - if (!q && !tag && !platform && !since) { 78 return context.next(); 79 } 80 81 - const title = buildTitle({ q, tag, platform, since }); 82 - const description = buildDescription({ q, tag, platform, since }); 83 84 // build og:image URL with same search params 85 const ogImageUrl = new URL('/og-image', url.origin); ··· 87 if (tag) ogImageUrl.searchParams.set('tag', tag); 88 if (platform) ogImageUrl.searchParams.set('platform', platform); 89 if (since) ogImageUrl.searchParams.set('since', since); 90 if (mode) ogImageUrl.searchParams.set('mode', mode); 91 92 const ogUrl = url.toString();
··· 24 25 let suffix = ''; 26 const modifiers = []; 27 + if (params.author) modifiers.push(`by ${params.author}`); 28 if (params.platform) modifiers.push(`on ${params.platform}`); 29 if (params.since) { 30 const preset = presetFromSince(params.since); ··· 48 else if (params.tag) parts.push(`documents tagged #${params.tag}`); 49 else parts.push('search results'); 50 51 + if (params.author) parts.push(`by ${params.author}`); 52 if (params.platform) parts.push(`on ${params.platform}`); 53 if (params.since) { 54 const preset = presetFromSince(params.since); ··· 73 const tag = url.searchParams.get('tag'); 74 const platform = url.searchParams.get('platform'); 75 const since = url.searchParams.get('since'); 76 + const author = url.searchParams.get('author'); 77 const mode = url.searchParams.get('mode'); 78 79 // if no search params, pass through (static tags in index.html are fine) 80 + if (!q && !tag && !platform && !since && !author) { 81 return context.next(); 82 } 83 84 + const title = buildTitle({ q, tag, platform, since, author }); 85 + const description = buildDescription({ q, tag, platform, since, author }); 86 87 // build og:image URL with same search params 88 const ogImageUrl = new URL('/og-image', url.origin); ··· 90 if (tag) ogImageUrl.searchParams.set('tag', tag); 91 if (platform) ogImageUrl.searchParams.set('platform', platform); 92 if (since) ogImageUrl.searchParams.set('since', since); 93 + if (author) ogImageUrl.searchParams.set('author', author); 94 if (mode) ogImageUrl.searchParams.set('mode', mode); 95 96 const ogUrl = url.toString();
+73 -11
site/index.html
··· 248 color: var(--text-dim); 249 } 250 251 - .author-name a { 252 color: var(--text-secondary); 253 text-decoration: none; 254 } 255 256 - .author-name a:hover { 257 color: #2a9d5c; 258 } 259 260 .entity-type { ··· 595 color: #38bdf8; 596 } 597 598 .filter-chip .x { 599 cursor: pointer; 600 opacity: 0.6; ··· 903 let currentTag = null; 904 let currentPlatform = null; 905 let currentSince = null; 906 let currentMode = 'keyword'; 907 let allTags = []; 908 let popularSearches = []; ··· 923 } 924 925 async function search(query, tag = null, platform = null, append = false) { 926 - if (!query.trim() && !tag && !platform) return; 927 928 if (!append) { 929 searchBtn.disabled = true; ··· 935 if (tag) searchUrl += `&tag=${encodeURIComponent(tag)}`; 936 if (platform) searchUrl += `&platform=${encodeURIComponent(platform)}`; 937 if (currentSince) searchUrl += `&since=${encodeURIComponent(currentSince)}`; 938 if (currentMode !== 'keyword') searchUrl += `&mode=${currentMode}`; 939 940 try { ··· 1099 if (!handle) return; 1100 const nameSpan = el.querySelector('.author-name'); 1101 if (!nameSpan || nameSpan.innerHTML) return; // already populated 1102 - nameSpan.innerHTML = `<a href="https://bsky.app/profile/${escapeHtml(did)}" target="_blank">@${escapeHtml(handle)}</a> · `; 1103 }); 1104 } 1105 ··· 1241 if (currentTag) params.set('tag', currentTag); 1242 if (currentPlatform) params.set('platform', currentPlatform); 1243 if (currentSince) params.set('since', currentSince); 1244 if (currentMode !== 'keyword') params.set('mode', currentMode); 1245 const url = params.toString() ? `?${params}` : '/'; 1246 history.pushState(null, '', url); ··· 1267 renderActiveFilter(); 1268 renderTags(); 1269 updateUrl(); 1270 - if (queryInput.value.trim() || currentPlatform) { 1271 search(queryInput.value, null, currentPlatform); 1272 } else { 1273 renderEmptyState(); ··· 1290 renderActiveFilter(); 1291 renderPlatformFilter(); 1292 updateUrl(); 1293 - if (queryInput.value.trim() || currentTag) { 1294 search(queryInput.value, currentTag, null); 1295 } else { 1296 renderEmptyState(); 1297 } 1298 } 1299 1300 function renderPlatformFilter() { 1301 const platforms = [ 1302 { id: 'leaflet', label: 'leaflet' }, ··· 1354 currentSince = sinceFromPreset(preset); 1355 renderDateFilter(); 1356 renderActiveFilter(); 1357 - if (queryInput.value.trim() || currentTag || currentPlatform || currentSince) { 1358 doSearch(); 1359 } 1360 } ··· 1389 renderActiveFilter(); 1390 } 1391 // trigger search if there's a query 1392 - if (queryInput.value.trim() || currentTag || currentPlatform) { 1393 doSearch(); 1394 } 1395 } ··· 1397 const DATE_PRESET_LABELS = { week: 'last week', month: 'last month', year: 'last year' }; 1398 1399 function renderActiveFilter() { 1400 - if (!currentTag && !currentPlatform && !currentSince) { 1401 activeFilterDiv.innerHTML = ''; 1402 return; 1403 } ··· 1411 if (currentSince) { 1412 const label = DATE_PRESET_LABELS[currentDatePreset] || currentSince; 1413 chips.push(`<span class="filter-chip date">${escapeHtml(label)}<span class="x" onclick="setDateFilter(null)">\u00d7</span></span>`); 1414 } 1415 activeFilterDiv.innerHTML = `<div class="active-filters"><span class="active-filters-label">active filters:</span>${chips.join('')}</div>`; 1416 } ··· 1506 currentTag = params.get('tag') || null; 1507 currentPlatform = params.get('platform') || null; 1508 currentSince = params.get('since') || null; 1509 currentDatePreset = presetFromSince(currentSince); 1510 currentMode = params.get('mode') || 'keyword'; 1511 renderActiveFilter(); ··· 1515 renderModeToggle(); 1516 tagsDiv.style.display = currentMode === 'keyword' ? '' : 'none'; 1517 dateFilterDiv.style.display = currentMode !== 'semantic' ? '' : 'none'; 1518 - if (queryInput.value || currentTag || currentPlatform) search(queryInput.value, currentTag, currentPlatform); 1519 }); 1520 1521 // init ··· 1524 const initialTag = initialParams.get('tag'); 1525 const initialPlatform = initialParams.get('platform'); 1526 const initialSince = initialParams.get('since'); 1527 const initialMode = initialParams.get('mode'); 1528 if (initialQuery) queryInput.value = initialQuery; 1529 if (initialTag) currentTag = initialTag; 1530 if (initialPlatform) currentPlatform = initialPlatform; 1531 if (initialSince) { currentSince = initialSince; currentDatePreset = presetFromSince(initialSince); } 1532 if (initialMode) currentMode = initialMode; 1533 renderActiveFilter(); 1534 renderPlatformFilter(); ··· 1537 tagsDiv.style.display = currentMode === 'keyword' ? '' : 'none'; 1538 dateFilterDiv.style.display = currentMode !== 'semantic' ? '' : 'none'; 1539 1540 - if (initialQuery || initialTag || initialPlatform) { 1541 search(initialQuery || '', initialTag, initialPlatform); 1542 } 1543
··· 248 color: var(--text-dim); 249 } 250 251 + .author-name a, .author-name .author-link { 252 color: var(--text-secondary); 253 text-decoration: none; 254 } 255 256 + .author-name a:hover, .author-name .author-link:hover { 257 color: #2a9d5c; 258 + } 259 + 260 + .author-name .external-link { 261 + font-size: 10px; 262 + margin-left: 2px; 263 + opacity: 0.5; 264 + } 265 + 266 + .author-name .external-link:hover { 267 + opacity: 1; 268 } 269 270 .entity-type { ··· 605 color: #38bdf8; 606 } 607 608 + .filter-chip.author { 609 + background: rgba(42, 157, 92, 0.2); 610 + border: 1px solid #2a9d5c; 611 + color: #34d399; 612 + } 613 + 614 .filter-chip .x { 615 cursor: pointer; 616 opacity: 0.6; ··· 919 let currentTag = null; 920 let currentPlatform = null; 921 let currentSince = null; 922 + let currentAuthor = null; 923 let currentMode = 'keyword'; 924 let allTags = []; 925 let popularSearches = []; ··· 940 } 941 942 async function search(query, tag = null, platform = null, append = false) { 943 + if (!query.trim() && !tag && !platform && !currentAuthor) return; 944 945 if (!append) { 946 searchBtn.disabled = true; ··· 952 if (tag) searchUrl += `&tag=${encodeURIComponent(tag)}`; 953 if (platform) searchUrl += `&platform=${encodeURIComponent(platform)}`; 954 if (currentSince) searchUrl += `&since=${encodeURIComponent(currentSince)}`; 955 + if (currentAuthor) searchUrl += `&author=${encodeURIComponent(currentAuthor)}`; 956 if (currentMode !== 'keyword') searchUrl += `&mode=${currentMode}`; 957 958 try { ··· 1117 if (!handle) return; 1118 const nameSpan = el.querySelector('.author-name'); 1119 if (!nameSpan || nameSpan.innerHTML) return; // already populated 1120 + nameSpan.innerHTML = `<span class="author-link" onclick="setAuthor('${escapeHtml(did)}')" style="cursor:pointer">@${escapeHtml(handle)}</span><a href="https://bsky.app/profile/${escapeHtml(did)}" target="_blank" class="external-link" onclick="event.stopPropagation()" title="view on bsky">\u2197</a> · `; 1121 }); 1122 } 1123 ··· 1259 if (currentTag) params.set('tag', currentTag); 1260 if (currentPlatform) params.set('platform', currentPlatform); 1261 if (currentSince) params.set('since', currentSince); 1262 + if (currentAuthor) params.set('author', currentAuthor); 1263 if (currentMode !== 'keyword') params.set('mode', currentMode); 1264 const url = params.toString() ? `?${params}` : '/'; 1265 history.pushState(null, '', url); ··· 1286 renderActiveFilter(); 1287 renderTags(); 1288 updateUrl(); 1289 + if (queryInput.value.trim() || currentPlatform || currentAuthor) { 1290 search(queryInput.value, null, currentPlatform); 1291 } else { 1292 renderEmptyState(); ··· 1309 renderActiveFilter(); 1310 renderPlatformFilter(); 1311 updateUrl(); 1312 + if (queryInput.value.trim() || currentTag || currentAuthor) { 1313 search(queryInput.value, currentTag, null); 1314 } else { 1315 renderEmptyState(); 1316 } 1317 } 1318 1319 + function setAuthor(did) { 1320 + if (currentAuthor === did) { 1321 + clearAuthor(); 1322 + return; 1323 + } 1324 + currentAuthor = did; 1325 + renderActiveFilter(); 1326 + doSearch(); 1327 + } 1328 + 1329 + function clearAuthor() { 1330 + currentAuthor = null; 1331 + renderActiveFilter(); 1332 + updateUrl(); 1333 + if (queryInput.value.trim() || currentTag || currentPlatform) { 1334 + search(queryInput.value, currentTag, currentPlatform); 1335 + } else { 1336 + renderEmptyState(); 1337 + } 1338 + } 1339 + 1340 function renderPlatformFilter() { 1341 const platforms = [ 1342 { id: 'leaflet', label: 'leaflet' }, ··· 1394 currentSince = sinceFromPreset(preset); 1395 renderDateFilter(); 1396 renderActiveFilter(); 1397 + if (queryInput.value.trim() || currentTag || currentPlatform || currentSince || currentAuthor) { 1398 doSearch(); 1399 } 1400 } ··· 1429 renderActiveFilter(); 1430 } 1431 // trigger search if there's a query 1432 + if (queryInput.value.trim() || currentTag || currentPlatform || currentAuthor) { 1433 doSearch(); 1434 } 1435 } ··· 1437 const DATE_PRESET_LABELS = { week: 'last week', month: 'last month', year: 'last year' }; 1438 1439 function renderActiveFilter() { 1440 + if (!currentTag && !currentPlatform && !currentSince && !currentAuthor) { 1441 activeFilterDiv.innerHTML = ''; 1442 return; 1443 } ··· 1451 if (currentSince) { 1452 const label = DATE_PRESET_LABELS[currentDatePreset] || currentSince; 1453 chips.push(`<span class="filter-chip date">${escapeHtml(label)}<span class="x" onclick="setDateFilter(null)">\u00d7</span></span>`); 1454 + } 1455 + if (currentAuthor) { 1456 + const handle = authorCache.get(currentAuthor); 1457 + const label = handle ? `@${handle}` : currentAuthor.slice(0, 20) + '...'; 1458 + chips.push(`<span class="filter-chip author">${escapeHtml(label)}<span class="x" onclick="clearAuthor()">\u00d7</span></span>`); 1459 } 1460 activeFilterDiv.innerHTML = `<div class="active-filters"><span class="active-filters-label">active filters:</span>${chips.join('')}</div>`; 1461 } ··· 1551 currentTag = params.get('tag') || null; 1552 currentPlatform = params.get('platform') || null; 1553 currentSince = params.get('since') || null; 1554 + currentAuthor = params.get('author') || null; 1555 currentDatePreset = presetFromSince(currentSince); 1556 currentMode = params.get('mode') || 'keyword'; 1557 renderActiveFilter(); ··· 1561 renderModeToggle(); 1562 tagsDiv.style.display = currentMode === 'keyword' ? '' : 'none'; 1563 dateFilterDiv.style.display = currentMode !== 'semantic' ? '' : 'none'; 1564 + if (queryInput.value || currentTag || currentPlatform || currentAuthor) search(queryInput.value, currentTag, currentPlatform); 1565 }); 1566 1567 // init ··· 1570 const initialTag = initialParams.get('tag'); 1571 const initialPlatform = initialParams.get('platform'); 1572 const initialSince = initialParams.get('since'); 1573 + const initialAuthor = initialParams.get('author'); 1574 const initialMode = initialParams.get('mode'); 1575 if (initialQuery) queryInput.value = initialQuery; 1576 if (initialTag) currentTag = initialTag; 1577 if (initialPlatform) currentPlatform = initialPlatform; 1578 if (initialSince) { currentSince = initialSince; currentDatePreset = presetFromSince(initialSince); } 1579 + if (initialAuthor) { 1580 + currentAuthor = initialAuthor; 1581 + // resolve author handle if it's a DID and not yet cached 1582 + if (initialAuthor.startsWith('did:') && !authorCache.has(initialAuthor)) { 1583 + fetch(`https://public.api.bsky.app/xrpc/app.bsky.actor.getProfiles?actors=${encodeURIComponent(initialAuthor)}`) 1584 + .then(r => r.ok ? r.json() : null) 1585 + .then(data => { 1586 + if (data?.profiles?.[0]) { 1587 + authorCache.set(data.profiles[0].did, data.profiles[0].handle); 1588 + renderActiveFilter(); 1589 + } 1590 + }) 1591 + .catch(() => {}); 1592 + } 1593 + } 1594 if (initialMode) currentMode = initialMode; 1595 renderActiveFilter(); 1596 renderPlatformFilter(); ··· 1599 tagsDiv.style.display = currentMode === 'keyword' ? '' : 'none'; 1600 dateFilterDiv.style.display = currentMode !== 'semantic' ? '' : 'none'; 1601 1602 + if (initialQuery || initialTag || initialPlatform || initialAuthor) { 1603 search(initialQuery || '', initialTag, initialPlatform); 1604 } 1605