atproto relay implementation in zig zlay.waow.tech

fix: remove startup throttle, keep background thread

throttled batching (25/2s) made memory WORSE — extended the overlap
of threads in TLS handshake phase, reaching 8+ GiB. unthrottled
spawning produces a higher but shorter spike (~3 GiB for ~10s) as
many connections fail fast and free memory quickly. background thread
preserved so HTTP server + health probes start immediately.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+8 -12
+8 -12
src/slurper.zig
··· 256 256 257 257 // spawn worker startup and crawl queue in background threads so the 258 258 // HTTP server can start immediately (health probes need it) 259 - self.startup_thread = try std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, spawnWorkersThrottled, .{self}); 259 + self.startup_thread = try std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, spawnWorkers, .{self}); 260 260 self.crawl_thread = try std.Thread.spawn(.{ .stack_size = @import("main.zig").default_stack_size }, processCrawlQueue, .{self}); 261 261 } 262 262 ··· 461 461 self.allocator.destroy(sub); 462 462 } 463 463 464 - /// background thread: load hosts from DB and spawn workers in throttled batches. 465 - /// mirrors Go relay's ResubscribeAllHosts pattern but with batching to bound 466 - /// concurrent TLS handshakes and memory pressure during startup. 467 - fn spawnWorkersThrottled(self: *Slurper) void { 464 + /// background thread: load hosts from DB and spawn all workers. 465 + /// runs in background so HTTP server + probes come up immediately. 466 + /// Go relay: ResubscribeAllHosts loops with 1ms sleep per host (goroutines). 467 + /// we spawn all at once — the brief memory spike from concurrent TLS handshakes 468 + /// is shorter than a throttled ramp (many hosts fail-fast, freeing memory quickly). 469 + fn spawnWorkers(self: *Slurper) void { 468 470 const hosts = self.persist.listActiveHosts(self.allocator) catch |err| { 469 471 log.err("failed to load hosts: {s}", .{@errorName(err)}); 470 472 return; ··· 477 479 self.allocator.free(hosts); 478 480 } 479 481 480 - const batch_size: usize = 25; 481 - const batch_delay_ns: u64 = 2 * std.time.ns_per_s; 482 - for (hosts, 0..) |host, i| { 482 + for (hosts) |host| { 483 483 if (self.shutdown.load(.acquire)) break; 484 484 self.spawnWorker(host.id, host.hostname) catch |err| { 485 485 log.warn("failed to spawn worker for {s}: {s}", .{ host.hostname, @errorName(err) }); 486 486 }; 487 - if ((i + 1) % batch_size == 0 and i + 1 < hosts.len) { 488 - log.info("spawned {d}/{d} workers, pausing...", .{ i + 1, hosts.len }); 489 - std.posix.nanosleep(batch_delay_ns / std.time.ns_per_s, batch_delay_ns % std.time.ns_per_s); 490 - } 491 487 } 492 488 493 489 log.info("startup complete: {d} host(s) spawned", .{hosts.len});