at protocol indexer with flexible filtering, xrpc queries, and a cursor-backed event stream, built on fjall
at-protocol
atproto
indexer
rust
fjall
1#!/usr/bin/env nu
2use common.nu *
3
4def main [] {
5 # 1. ensure http-nu is installed
6 if (which http-nu | is-empty) {
7 print "http-nu not found, installing..."
8 cargo install http-nu
9 }
10
11 # 2. setup ports and paths
12 let port = 3010
13 let mock_port = 3012
14 let url = $"http://localhost:($port)"
15 let mock_url = $"http://localhost:($mock_port)"
16 let db_path = (mktemp -d -t hydrant_throttling.XXXXXX)
17
18 print $"testing crawler throttling..."
19 print $"database path: ($db_path)"
20
21 # 3. start mock relay
22 print $"starting mock relay on ($mock_port)..."
23 let mock_pid = (
24 bash -c $"http-nu :($mock_port) tests/mock_relay.nu > ($db_path)/mock.log 2>&1 & echo $!"
25 | str trim
26 | into int
27 )
28 print $"mock relay pid: ($mock_pid)"
29
30 # give mock relay a moment
31 sleep 1sec
32
33 # 4. start hydrant with low throttling limits
34 let binary = build-hydrant
35
36 let log_file = $"($db_path)/hydrant.log"
37 print $"starting hydrant - logs at ($log_file)..."
38
39 let hydrant_pid = (
40 with-env {
41 HYDRANT_DATABASE_PATH: ($db_path),
42 HYDRANT_FULL_NETWORK: "true",
43 HYDRANT_RELAY_HOST: ($mock_url),
44 HYDRANT_DISABLE_FIREHOSE: "true",
45 HYDRANT_DISABLE_BACKFILL: "true", # disable backfill so pending count stays up
46 HYDRANT_API_PORT: ($port | into string),
47 HYDRANT_LOG_LEVEL: "debug",
48 RUST_LOG: "debug",
49 HYDRANT_CRAWLER_MAX_PENDING_REPOS: "2",
50 HYDRANT_CRAWLER_RESUME_PENDING_REPOS: "1"
51 } {
52 sh -c $"($binary) >($log_file) 2>&1 & echo $!" | str trim | into int
53 }
54 )
55 print $"hydrant started with pid: ($hydrant_pid)"
56
57 mut success = false
58
59 try {
60 if (wait-for-api $url) {
61 print "hydrant api is up."
62
63 # wait for crawler to run and hit limit
64 print "waiting for crawler to hit throttling limit..."
65
66 # retry check for 30s
67 for i in 1..30 {
68 let stats = (http get $"($url)/stats?accurate=true").counts
69 let pending = ($stats.pending | into int)
70
71 # we expect 5 repos from the mock, but max pending is 2.
72 # wait, the crawler fetches a page (5 repos) THEN adds to DB.
73 # so pending will jump to 5.
74 # then next loop, it checks pending > 2.
75 # so pending should be 5.
76
77 print $"[($i)/30] pending: ($pending)"
78
79 if $pending >= 5 {
80 print "crawler discovered repos."
81 break
82 }
83
84 sleep 1sec
85 }
86
87 # now check logs for throttling message
88 print "checking logs for throttling message..."
89 sleep 2sec # give logging a moment
90
91 let logs = (open $log_file | str replace --all "\n" " ")
92 if ($logs | str contains "throttling: above max pending") {
93 print "CONFIRMED: crawler is throttling!"
94
95 # now testing resumption
96 print "testing resumption by removing repos..."
97
98 # remove 4 repos to drop pending (5) to 1 (<= resume limit 1)
99 # mock repos are did:web:mock1.com ... mock5.com
100 curl -s -X DELETE -H "Content-Type: application/json" -d '[
101 {"did": "did:web:mock1.com"},
102 {"did": "did:web:mock2.com"},
103 {"did": "did:web:mock3.com"},
104 {"did": "did:web:mock4.com"}
105 ]' $"($url)/repos"
106
107 print "waiting for crawler to wake up (max 10s)..."
108 sleep 15sec
109
110 # check logs for resumption message
111 let logs_after = (open $log_file | str replace --all "\n" " ")
112 if ($logs_after | str contains "throttling released") {
113 print "CONFIRMED: crawler resumed!"
114 $success = true
115 } else {
116 print "FAILED: resumption message not found in logs"
117 $success = false
118 }
119
120 } else {
121 print "FAILED: throttling message not found in logs"
122 }
123
124 } else {
125 print "hydrant failed to start."
126 }
127 } catch { |e|
128 print $"test failed with error: ($e)"
129 }
130
131 # cleanup
132 print "stopping processes..."
133 try { kill $hydrant_pid }
134 try { kill $mock_pid }
135
136 if $success {
137 print "test passed!"
138 exit 0
139 } else {
140 print "test failed!"
141 print "hydrant logs:"
142 open $log_file | tail -n 20
143 exit 1
144 }
145}