fix: use tryLock for metrics to prevent contention stalls

atproto relay implementation in zig zlay.waow.tech

during startup with ~2,700 threads, mutex contention blocks the
single-threaded metrics server indefinitely. switch all metric-
gathering methods (validator queues, LRU caches, event buffer,
ring buffer) to tryLock — returns 0 instead of blocking when
the lock is contended. prometheus tolerates missing data points.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

zzstoatzz.io 6 days ago 1d80d846 27d71026

+14 -13

4 changed files

expand all

unified split

src

event_log.zig

lru.zig

ring_buffer.zig

validator.zig

+2 -2

src/event_log.zig

··· 105 105 alive: std.atomic.Value(bool) = .{ .raw = true }, 106 106 flush_cond: std.Thread.Condition = .{}, 107 107 108 - /// current evtbuf entry count (for metrics) 108 + /// current evtbuf entry count (for metrics — non-blocking, returns 0 if lock is contended) 109 109 pub fn evtbufLen(self: *DiskPersist) usize { 110 - self.mutex.lock(); 110 + if (!self.mutex.tryLock()) return 0; 111 111 defer self.mutex.unlock(); 112 112 return self.evtbuf.items.len; 113 113 }

+2 -1

src/lru.zig

··· 108 108 return self.map.contains(key); 109 109 } 110 110 111 + /// entry count (non-blocking — returns 0 if lock is contended) 111 112 pub fn count(self: *Self) u32 { 112 - self.mutex.lock(); 113 + if (!self.mutex.tryLock()) return 0; 113 114 defer self.mutex.unlock(); 114 115 return self.len; 115 116 }

+2 -2

src/ring_buffer.zig

··· 87 87 return frame; 88 88 } 89 89 90 - /// number of frames currently buffered 90 + /// number of frames currently buffered (non-blocking — returns 0 if lock is contended) 91 91 pub fn count(self: *Self) usize { 92 - self.mutex.lock(); 92 + if (!self.mutex.tryLock()) return 0; 93 93 defer self.mutex.unlock(); 94 94 return self.len; 95 95 }

+8 -8

src/validator.zig

··· 578 578 return self.cache.count(); 579 579 } 580 580 581 - /// migration queue length (for diagnostics) 581 + /// migration queue length (for diagnostics — non-blocking, returns 0 if lock is contended) 582 582 pub fn migrationQueueLen(self: *Validator) usize { 583 - self.queue_mutex.lock(); 583 + if (!self.queue_mutex.tryLock()) return 0; 584 584 defer self.queue_mutex.unlock(); 585 585 return self.migration_queue.items.len; 586 586 } 587 587 588 - /// migration pending (suppressed) count (for diagnostics) 588 + /// migration pending (suppressed) count (for diagnostics — non-blocking) 589 589 pub fn migrationPendingCount(self: *Validator) u32 { 590 - self.queue_mutex.lock(); 590 + if (!self.queue_mutex.tryLock()) return 0; 591 591 defer self.queue_mutex.unlock(); 592 592 return self.migration_pending.count(); 593 593 } 594 594 595 - /// resolve queue length (for diagnostics) 595 + /// resolve queue length (for diagnostics — non-blocking) 596 596 pub fn resolveQueueLen(self: *Validator) usize { 597 - self.queue_mutex.lock(); 597 + if (!self.queue_mutex.tryLock()) return 0; 598 598 defer self.queue_mutex.unlock(); 599 599 return self.queue.items.len; 600 600 } 601 601 602 - /// resolve dedup set size (for diagnostics) 602 + /// resolve dedup set size (for diagnostics — non-blocking) 603 603 pub fn resolveQueuedSetCount(self: *Validator) u32 { 604 - self.queue_mutex.lock(); 604 + if (!self.queue_mutex.tryLock()) return 0; 605 605 defer self.queue_mutex.unlock(); 606 606 return self.queued_set.count(); 607 607 }