search for standard sites pub-search.waow.tech
search zig blog atproto

feat: search documents by publication subdomain (base_path)

- add base_path to publications_fts for FTS search
- add DocsByPubBasePath queries to find docs via publication match
- deduplicate results when doc matches both content and base_path
- fix use-after-free by duping URIs for hash map

searching "gyst" now returns all docs on gyst.leaflet.pub, not just
those with "gyst" in their content.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+267 -7
+5 -1
backend/src/db/schema.zig
··· 44 44 \\CREATE VIRTUAL TABLE IF NOT EXISTS publications_fts USING fts5( 45 45 \\ uri UNINDEXED, 46 46 \\ name, 47 - \\ description 47 + \\ description, 48 + \\ base_path 48 49 \\) 49 50 , &.{}); 50 51 ··· 162 163 // URL path field for documents (e.g., "/001" for zat.dev) 163 164 // used to build full URL: publication.url + document.path 164 165 client.exec("ALTER TABLE documents ADD COLUMN path TEXT", &.{}) catch {}; 166 + 167 + // note: publications_fts was rebuilt with base_path column via scripts/rebuild-pub-fts 168 + // new publications will include base_path via insertPublication in indexer.zig 165 169 }
+3 -3
backend/src/indexer.zig
··· 81 81 &.{ uri, did, rkey, name, description orelse "", base_path orelse "" }, 82 82 ); 83 83 84 - // update FTS index 84 + // update FTS index (includes base_path for subdomain search) 85 85 c.exec("DELETE FROM publications_fts WHERE uri = ?", &.{uri}) catch {}; 86 86 c.exec( 87 - "INSERT INTO publications_fts (uri, name, description) VALUES (?, ?, ?)", 88 - &.{ uri, name, description orelse "" }, 87 + "INSERT INTO publications_fts (uri, name, description, base_path) VALUES (?, ?, ?, ?)", 88 + &.{ uri, name, description orelse "", base_path orelse "" }, 89 89 ) catch {}; 90 90 } 91 91
+62 -2
backend/src/search.zig
··· 159 159 \\ORDER BY d.created_at DESC LIMIT 40 160 160 ); 161 161 162 + // Find documents by their publication's base_path (subdomain search) 163 + // e.g., searching "gyst" finds all docs on gyst.leaflet.pub 164 + const DocsByPubBasePath = zql.Query( 165 + \\SELECT d.uri, d.did, d.title, '' as snippet, 166 + \\ d.created_at, d.rkey, 167 + \\ p.base_path, 168 + \\ 1 as has_publication, 169 + \\ d.platform, COALESCE(d.path, '') as path 170 + \\FROM documents d 171 + \\JOIN publications p ON d.publication_uri = p.uri 172 + \\JOIN publications_fts pf ON p.uri = pf.uri 173 + \\WHERE publications_fts MATCH :query 174 + \\ORDER BY d.created_at DESC LIMIT 40 175 + ); 176 + 177 + const DocsByPubBasePathAndPlatform = zql.Query( 178 + \\SELECT d.uri, d.did, d.title, '' as snippet, 179 + \\ d.created_at, d.rkey, 180 + \\ p.base_path, 181 + \\ 1 as has_publication, 182 + \\ d.platform, COALESCE(d.path, '') as path 183 + \\FROM documents d 184 + \\JOIN publications p ON d.publication_uri = p.uri 185 + \\JOIN publications_fts pf ON p.uri = pf.uri 186 + \\WHERE publications_fts MATCH :query AND d.platform = :platform 187 + \\ORDER BY d.created_at DESC LIMIT 40 188 + ); 189 + 162 190 /// Publication search result (internal) 163 191 const Pub = struct { 164 192 uri: []const u8, ··· 219 247 const has_tag = tag_filter != null; 220 248 const has_platform = platform_filter != null; 221 249 222 - // search documents - handle all filter combinations 250 + // track seen URIs for deduplication (content match + base_path match) 251 + var seen_uris = std.StringHashMap(void).init(alloc); 252 + defer seen_uris.deinit(); 253 + 254 + // search documents by content (title, content) - handle all filter combinations 223 255 var doc_result = if (has_query and has_tag and has_platform) 224 256 c.query(DocsByFtsAndTagAndPlatform.positional, DocsByFtsAndTagAndPlatform.bind(.{ 225 257 .query = fts_query, ··· 244 276 if (doc_result) |*res| { 245 277 defer res.deinit(); 246 278 for (res.rows) |row| { 247 - try jw.write(Doc.fromRow(row).toJson()); 279 + const doc = Doc.fromRow(row); 280 + // dupe URI for hash map (outlives result) 281 + const uri_dupe = try alloc.dupe(u8, doc.uri); 282 + try seen_uris.put(uri_dupe, {}); 283 + try jw.write(doc.toJson()); 284 + } 285 + } 286 + 287 + // also search documents by publication base_path (subdomain search) 288 + // e.g., "gyst" finds all docs on gyst.leaflet.pub even if content doesn't contain "gyst" 289 + // skip if tag filter is set (tag filter is content-specific) 290 + if (has_query and !has_tag) { 291 + var basepath_result = if (has_platform) 292 + c.query(DocsByPubBasePathAndPlatform.positional, DocsByPubBasePathAndPlatform.bind(.{ 293 + .query = fts_query, 294 + .platform = platform_filter.?, 295 + })) catch null 296 + else 297 + c.query(DocsByPubBasePath.positional, DocsByPubBasePath.bind(.{ .query = fts_query })) catch null; 298 + 299 + if (basepath_result) |*res| { 300 + defer res.deinit(); 301 + for (res.rows) |row| { 302 + const doc = Doc.fromRow(row); 303 + // deduplicate: skip if already found by content search 304 + if (!seen_uris.contains(doc.uri)) { 305 + try jw.write(doc.toJson()); 306 + } 307 + } 248 308 } 249 309 } 250 310
+86
scripts/rebuild-pub-fts
··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["httpx", "pydantic-settings"] 5 + # /// 6 + """Rebuild publications_fts with base_path column for subdomain search.""" 7 + import os 8 + import httpx 9 + from pydantic_settings import BaseSettings, SettingsConfigDict 10 + 11 + 12 + class Settings(BaseSettings): 13 + model_config = SettingsConfigDict( 14 + env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 15 + ) 16 + turso_url: str 17 + turso_token: str 18 + 19 + @property 20 + def turso_host(self) -> str: 21 + url = self.turso_url 22 + if url.startswith("libsql://"): 23 + url = url[len("libsql://") :] 24 + return url 25 + 26 + 27 + settings = Settings() # type: ignore 28 + 29 + print("Rebuilding publications_fts with base_path column...") 30 + 31 + response = httpx.post( 32 + f"https://{settings.turso_host}/v2/pipeline", 33 + headers={ 34 + "Authorization": f"Bearer {settings.turso_token}", 35 + "Content-Type": "application/json", 36 + }, 37 + json={ 38 + "requests": [ 39 + {"type": "execute", "stmt": {"sql": "DROP TABLE IF EXISTS publications_fts"}}, 40 + { 41 + "type": "execute", 42 + "stmt": { 43 + "sql": """ 44 + CREATE VIRTUAL TABLE publications_fts USING fts5( 45 + uri UNINDEXED, 46 + name, 47 + description, 48 + base_path 49 + ) 50 + """ 51 + }, 52 + }, 53 + { 54 + "type": "execute", 55 + "stmt": { 56 + "sql": """ 57 + INSERT INTO publications_fts (uri, name, description, base_path) 58 + SELECT uri, name, COALESCE(description, ''), COALESCE(base_path, '') 59 + FROM publications 60 + """ 61 + }, 62 + }, 63 + {"type": "execute", "stmt": {"sql": "SELECT COUNT(*) FROM publications_fts"}}, 64 + {"type": "close"}, 65 + ] 66 + }, 67 + timeout=60, 68 + ) 69 + response.raise_for_status() 70 + data = response.json() 71 + 72 + for i, result in enumerate(data["results"][:-1]): # skip close 73 + if result["type"] == "error": 74 + print(f"Step {i} error: {result['error']}") 75 + elif result["type"] == "ok": 76 + if i == 3: # count query 77 + rows = result["response"]["result"].get("rows", []) 78 + if rows: 79 + count = ( 80 + rows[0][0].get("value", rows[0][0]) 81 + if isinstance(rows[0][0], dict) 82 + else rows[0][0] 83 + ) 84 + print(f"Rebuilt with {count} publications") 85 + 86 + print("Done!")
+111 -1
site/index.html
··· 111 111 .result-title { 112 112 color: #fff; 113 113 margin-bottom: 0.5rem; 114 + /* prevent long titles from breaking layout */ 115 + display: -webkit-box; 116 + -webkit-line-clamp: 2; 117 + -webkit-box-orient: vertical; 118 + overflow: hidden; 119 + word-break: break-word; 114 120 } 115 121 116 122 .result-title a { color: inherit; } ··· 383 389 .active-filter .clear:hover { 384 390 color: #c44; 385 391 } 392 + 393 + /* mobile improvements */ 394 + @media (max-width: 600px) { 395 + body { 396 + padding: 0.75rem; 397 + font-size: 13px; 398 + } 399 + 400 + .container { 401 + max-width: 100%; 402 + } 403 + 404 + /* ensure minimum 44px touch targets */ 405 + .tag, .platform-option, .suggestion { 406 + min-height: 44px; 407 + display: inline-flex; 408 + align-items: center; 409 + padding: 0.5rem 0.75rem; 410 + } 411 + 412 + button { 413 + min-height: 44px; 414 + padding: 0.5rem 0.75rem; 415 + } 416 + 417 + /* stack search box on very small screens */ 418 + .search-box { 419 + flex-direction: column; 420 + gap: 0.5rem; 421 + } 422 + 423 + .search-box input[type="text"] { 424 + width: 100%; 425 + } 426 + 427 + .search-box button { 428 + width: 100%; 429 + } 430 + 431 + /* result card mobile tweaks */ 432 + .result { 433 + padding: 0.75rem 0; 434 + } 435 + 436 + .result:hover { 437 + margin: 0 -0.75rem; 438 + padding: 0.75rem; 439 + } 440 + 441 + .result-title { 442 + font-size: 14px; 443 + line-height: 1.4; 444 + } 445 + 446 + .result-snippet { 447 + font-size: 12px; 448 + line-height: 1.5; 449 + } 450 + 451 + /* badges inline on mobile */ 452 + .entity-type, .platform-badge { 453 + font-size: 9px; 454 + padding: 2px 5px; 455 + margin-right: 6px; 456 + vertical-align: middle; 457 + } 458 + 459 + /* tags wrap better on mobile */ 460 + .tags-list, .platform-filter-list { 461 + gap: 0.5rem; 462 + } 463 + 464 + /* suggestions responsive */ 465 + .suggestions { 466 + line-height: 2; 467 + } 468 + 469 + /* related items more compact */ 470 + .related-item { 471 + max-width: 150px; 472 + font-size: 11px; 473 + padding: 0.5rem; 474 + } 475 + } 476 + 477 + /* ensure touch targets on tablets too */ 478 + @media (hover: none) and (pointer: coarse) { 479 + .tag, .platform-option, .suggestion, .related-item { 480 + min-height: 44px; 481 + display: inline-flex; 482 + align-items: center; 483 + } 484 + } 386 485 </style> 387 486 </head> 388 487 <body> ··· 460 559 if (results.length === 0) { 461 560 resultsDiv.innerHTML = ` 462 561 <div class="empty-state"> 463 - <p>no results${query ? ` for "${escapeHtml(query)}"` : ''}${tag ? ` in #${escapeHtml(tag)}` : ''}${platform ? ` on ${escapeHtml(platform)}` : ''}</p> 562 + <p>no results${query ? ` for ${formatQueryForDisplay(query)}` : ''}${tag ? ` in #${escapeHtml(tag)}` : ''}${platform ? ` on ${escapeHtml(platform)}` : ''}</p> 464 563 <p>try different keywords</p> 465 564 </div> 466 565 `; ··· 524 623 return str.replace(/[&<>"']/g, c => ({ 525 624 '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' 526 625 })[c]); 626 + } 627 + 628 + // display query without adding redundant quotes 629 + function formatQueryForDisplay(query) { 630 + if (!query) return ''; 631 + const escaped = escapeHtml(query); 632 + // if query is already fully quoted, don't add more quotes 633 + if (query.startsWith('"') && query.endsWith('"')) { 634 + return escaped; 635 + } 636 + return `"${escaped}"`; 527 637 } 528 638 529 639 // platform-specific URL patterns