search for standard sites pub-search.waow.tech
search zig blog atproto

fix: strip trailing slashes from base_path to prevent broken URLs

base_path values like "tedium.co/" combined with paths like "/some-post"
produce double-slash URLs ("https://tedium.co//some-post") that 404.

The trailing slash comes from publication URLs like "https://tedium.co/"
where stripUrlScheme preserved it. Fix in three places:
- tap.zig: stripUrlScheme strips trailing slash
- indexer.zig: HTTP fallback strips trailing slash
- indexer.zig: normalize base_path after all resolution (catches values
already stored in publications table with trailing slashes)

Backfill: RTRIM(base_path, '/') on both documents and publications.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+19 -5
+9 -2
backend/src/ingest/indexer.zig
··· 130 130 // our extractor reads into publication_uri. Strip the scheme to match 131 131 // base_path convention (frontend prepends "https://"). 132 132 if (base_path.len == 0 and pub_uri.len > 0) { 133 - const host = if (std.mem.startsWith(u8, pub_uri, "https://")) 133 + var host = if (std.mem.startsWith(u8, pub_uri, "https://")) 134 134 pub_uri["https://".len..] 135 135 else if (std.mem.startsWith(u8, pub_uri, "http://")) 136 136 pub_uri["http://".len..] 137 137 else 138 - ""; 138 + @as([]const u8, ""); 139 + // strip trailing slash to avoid double-slash when combined with path 140 + if (host.len > 1 and host[host.len - 1] == '/') 141 + host = host[0 .. host.len - 1]; 139 142 if (host.len > 0 and host.len <= base_path_buf.len) { 140 143 @memcpy(base_path_buf[0..host.len], host); 141 144 base_path = base_path_buf[0..host.len]; 142 145 } 143 146 } 147 + 148 + // normalize: strip trailing slash to avoid double-slash in URLs 149 + if (base_path.len > 1 and base_path[base_path.len - 1] == '/') 150 + base_path = base_path[0 .. base_path.len - 1]; 144 151 145 152 // skip .test domains (dev/staging data) 146 153 if (std.mem.endsWith(u8, base_path, ".test")) return;
+10 -3
backend/src/ingest/tap.zig
··· 514 514 515 515 fn stripUrlScheme(url: ?[]const u8) ?[]const u8 { 516 516 const u = url orelse return null; 517 - if (mem.startsWith(u8, u, "https://")) return u["https://".len..]; 518 - if (mem.startsWith(u8, u, "http://")) return u["http://".len..]; 519 - return u; 517 + const without_scheme = if (mem.startsWith(u8, u, "https://")) 518 + u["https://".len..] 519 + else if (mem.startsWith(u8, u, "http://")) 520 + u["http://".len..] 521 + else 522 + u; 523 + // strip trailing slash to avoid double-slash when combined with path 524 + if (without_scheme.len > 1 and without_scheme[without_scheme.len - 1] == '/') 525 + return without_scheme[0 .. without_scheme.len - 1]; 526 + return without_scheme; 520 527 }