···8888### Testing
8989- `nu tests/repo_sync_integrity.nu` - Runs the full integration test suite using Nushell. This builds the binary, starts a temporary instance, performs a backfill against a real PDS, and verifies record integrity.
9090- `nu tests/verify_crawler.nu` - Verifies full-network crawler functionality using a mock relay.
9191+- `nu tests/throttling_test.nu` - Verifies crawler throttling logic when pending queue is full.
9192- `nu tests/stream_test.nu` - Tests WebSocket streaming functionality. Verifies both live event streaming during backfill and historical replay with cursor.
9293- `nu tests/authenticated_stream_test.nu` - Tests authenticated event streaming. Verifies that create, update, and delete actions on a real account are correctly streamed by Hydrant in the correct order. Requires `TEST_REPO` and `TEST_PASSWORD` in `.env`.
9394- `nu tests/debug_endpoints.nu` - Tests debug/introspection endpoints (`/debug/iter`, `/debug/get`) and verifies DB content and serialization.
+148
tests/throttling_test.nu
···11+#!/usr/bin/env nu
22+use common.nu *
33+44+def main [] {
55+ # 1. ensure http-nu is installed
66+ if (which http-nu | is-empty) {
77+ print "http-nu not found, installing..."
88+ cargo install http-nu
99+ }
1010+1111+ # 2. setup ports and paths
1212+ let port = 3010
1313+ let mock_port = 3012
1414+ let url = $"http://localhost:($port)"
1515+ let mock_url = $"http://localhost:($mock_port)"
1616+ let db_path = (mktemp -d -t hydrant_throttling.XXXXXX)
1717+1818+ print $"testing crawler throttling..."
1919+ print $"database path: ($db_path)"
2020+2121+ # 3. start mock relay
2222+ print $"starting mock relay on ($mock_port)..."
2323+ let mock_pid = (
2424+ bash -c $"http-nu :($mock_port) tests/mock_relay.nu > ($db_path)/mock.log 2>&1 & echo $!"
2525+ | str trim
2626+ | into int
2727+ )
2828+ print $"mock relay pid: ($mock_pid)"
2929+3030+ # give mock relay a moment
3131+ sleep 1sec
3232+3333+ # 4. start hydrant with low throttling limits
3434+ let binary = build-hydrant
3535+3636+ let log_file = $"($db_path)/hydrant.log"
3737+ print $"starting hydrant - logs at ($log_file)..."
3838+3939+ let hydrant_pid = (
4040+ with-env {
4141+ HYDRANT_DATABASE_PATH: ($db_path),
4242+ HYDRANT_FULL_NETWORK: "true",
4343+ HYDRANT_RELAY_HOST: ($mock_url),
4444+ HYDRANT_DISABLE_FIREHOSE: "true",
4545+ HYDRANT_DISABLE_BACKFILL: "true", # disable backfill so pending count stays up
4646+ HYDRANT_API_PORT: ($port | into string),
4747+ HYDRANT_LOG_LEVEL: "debug",
4848+ HYDRANT_CRAWLER_MAX_PENDING_REPOS: "2",
4949+ HYDRANT_CRAWLER_RESUME_PENDING_REPOS: "1"
5050+ } {
5151+ sh -c $"($binary) >($log_file) 2>&1 & echo $!" | str trim | into int
5252+ }
5353+ )
5454+ print $"hydrant started with pid: ($hydrant_pid)"
5555+5656+ mut success = false
5757+5858+ try {
5959+ if (wait-for-api $url) {
6060+ print "hydrant api is up."
6161+6262+ # wait for crawler to run and hit limit
6363+ print "waiting for crawler to hit throttling limit..."
6464+6565+ # retry check for 30s
6666+ for i in 1..30 {
6767+ let stats = (http get $"($url)/stats?accurate=true").counts
6868+ let pending = ($stats.pending | into int)
6969+7070+ # we expect 5 repos from the mock, but max pending is 2.
7171+ # wait, the crawler fetches a page (5 repos) THEN adds to DB.
7272+ # so pending will jump to 5.
7373+ # then next loop, it checks pending > 2.
7474+ # so pending should be 5.
7575+7676+ print $"[($i)/30] pending: ($pending)"
7777+7878+ if $pending >= 5 {
7979+ print "crawler discovered repos."
8080+ break
8181+ }
8282+8383+ sleep 1sec
8484+ }
8585+8686+ # now check logs for throttling message
8787+ print "checking logs for throttling message..."
8888+ sleep 2sec # give logging a moment
8989+9090+ let logs = (open $log_file | str replace --all "\n" " ")
9191+ if ($logs | str contains "crawler throttling: pending repos") {
9292+ print "CONFIRMED: crawler is throttling!"
9393+9494+ # now testing resumption
9595+ print "testing resumption by removing repos..."
9696+9797+ # remove 4 repos to drop pending (5) to 1 (<= resume limit 1)
9898+ # mock repos are did:web:mock1.com ... mock5.com
9999+ let to_remove = {
100100+ dids: [
101101+ "did:web:mock1.com",
102102+ "did:web:mock2.com",
103103+ "did:web:mock3.com",
104104+ "did:web:mock4.com"
105105+ ]
106106+ }
107107+108108+ http post --content-type application/json $"($url)/repo/remove" $to_remove
109109+110110+ print "waiting for crawler to wake up (max 10s)..."
111111+ sleep 15sec
112112+113113+ # check logs for resumption message
114114+ let logs_after = (open $log_file | str replace --all "\n" " ")
115115+ if ($logs_after | str contains "crawler resuming") {
116116+ print "CONFIRMED: crawler resumed!"
117117+ $success = true
118118+ } else {
119119+ print "FAILED: resumption message not found in logs"
120120+ $success = false
121121+ }
122122+123123+ } else {
124124+ print "FAILED: throttling message not found in logs"
125125+ }
126126+127127+ } else {
128128+ print "hydrant failed to start."
129129+ }
130130+ } catch { |e|
131131+ print $"test failed with error: ($e)"
132132+ }
133133+134134+ # cleanup
135135+ print "stopping processes..."
136136+ try { kill $hydrant_pid }
137137+ try { kill $mock_pid }
138138+139139+ if $success {
140140+ print "test passed!"
141141+ exit 0
142142+ } else {
143143+ print "test failed!"
144144+ print "hydrant logs:"
145145+ open $log_file | tail -n 20
146146+ exit 1
147147+ }
148148+}