at protocol indexer with flexible filtering, xrpc queries, and a cursor-backed event stream, built on fjall
at-protocol atproto indexer rust fjall

[tests] add throttling integration test

ptr.pet 07946248 c8ad7421

verified
+149
+1
AGENTS.md
··· 88 88 ### Testing 89 89 - `nu tests/repo_sync_integrity.nu` - Runs the full integration test suite using Nushell. This builds the binary, starts a temporary instance, performs a backfill against a real PDS, and verifies record integrity. 90 90 - `nu tests/verify_crawler.nu` - Verifies full-network crawler functionality using a mock relay. 91 + - `nu tests/throttling_test.nu` - Verifies crawler throttling logic when pending queue is full. 91 92 - `nu tests/stream_test.nu` - Tests WebSocket streaming functionality. Verifies both live event streaming during backfill and historical replay with cursor. 92 93 - `nu tests/authenticated_stream_test.nu` - Tests authenticated event streaming. Verifies that create, update, and delete actions on a real account are correctly streamed by Hydrant in the correct order. Requires `TEST_REPO` and `TEST_PASSWORD` in `.env`. 93 94 - `nu tests/debug_endpoints.nu` - Tests debug/introspection endpoints (`/debug/iter`, `/debug/get`) and verifies DB content and serialization.
+148
tests/throttling_test.nu
··· 1 + #!/usr/bin/env nu 2 + use common.nu * 3 + 4 + def main [] { 5 + # 1. ensure http-nu is installed 6 + if (which http-nu | is-empty) { 7 + print "http-nu not found, installing..." 8 + cargo install http-nu 9 + } 10 + 11 + # 2. setup ports and paths 12 + let port = 3010 13 + let mock_port = 3012 14 + let url = $"http://localhost:($port)" 15 + let mock_url = $"http://localhost:($mock_port)" 16 + let db_path = (mktemp -d -t hydrant_throttling.XXXXXX) 17 + 18 + print $"testing crawler throttling..." 19 + print $"database path: ($db_path)" 20 + 21 + # 3. start mock relay 22 + print $"starting mock relay on ($mock_port)..." 23 + let mock_pid = ( 24 + bash -c $"http-nu :($mock_port) tests/mock_relay.nu > ($db_path)/mock.log 2>&1 & echo $!" 25 + | str trim 26 + | into int 27 + ) 28 + print $"mock relay pid: ($mock_pid)" 29 + 30 + # give mock relay a moment 31 + sleep 1sec 32 + 33 + # 4. start hydrant with low throttling limits 34 + let binary = build-hydrant 35 + 36 + let log_file = $"($db_path)/hydrant.log" 37 + print $"starting hydrant - logs at ($log_file)..." 38 + 39 + let hydrant_pid = ( 40 + with-env { 41 + HYDRANT_DATABASE_PATH: ($db_path), 42 + HYDRANT_FULL_NETWORK: "true", 43 + HYDRANT_RELAY_HOST: ($mock_url), 44 + HYDRANT_DISABLE_FIREHOSE: "true", 45 + HYDRANT_DISABLE_BACKFILL: "true", # disable backfill so pending count stays up 46 + HYDRANT_API_PORT: ($port | into string), 47 + HYDRANT_LOG_LEVEL: "debug", 48 + HYDRANT_CRAWLER_MAX_PENDING_REPOS: "2", 49 + HYDRANT_CRAWLER_RESUME_PENDING_REPOS: "1" 50 + } { 51 + sh -c $"($binary) >($log_file) 2>&1 & echo $!" | str trim | into int 52 + } 53 + ) 54 + print $"hydrant started with pid: ($hydrant_pid)" 55 + 56 + mut success = false 57 + 58 + try { 59 + if (wait-for-api $url) { 60 + print "hydrant api is up." 61 + 62 + # wait for crawler to run and hit limit 63 + print "waiting for crawler to hit throttling limit..." 64 + 65 + # retry check for 30s 66 + for i in 1..30 { 67 + let stats = (http get $"($url)/stats?accurate=true").counts 68 + let pending = ($stats.pending | into int) 69 + 70 + # we expect 5 repos from the mock, but max pending is 2. 71 + # wait, the crawler fetches a page (5 repos) THEN adds to DB. 72 + # so pending will jump to 5. 73 + # then next loop, it checks pending > 2. 74 + # so pending should be 5. 75 + 76 + print $"[($i)/30] pending: ($pending)" 77 + 78 + if $pending >= 5 { 79 + print "crawler discovered repos." 80 + break 81 + } 82 + 83 + sleep 1sec 84 + } 85 + 86 + # now check logs for throttling message 87 + print "checking logs for throttling message..." 88 + sleep 2sec # give logging a moment 89 + 90 + let logs = (open $log_file | str replace --all "\n" " ") 91 + if ($logs | str contains "crawler throttling: pending repos") { 92 + print "CONFIRMED: crawler is throttling!" 93 + 94 + # now testing resumption 95 + print "testing resumption by removing repos..." 96 + 97 + # remove 4 repos to drop pending (5) to 1 (<= resume limit 1) 98 + # mock repos are did:web:mock1.com ... mock5.com 99 + let to_remove = { 100 + dids: [ 101 + "did:web:mock1.com", 102 + "did:web:mock2.com", 103 + "did:web:mock3.com", 104 + "did:web:mock4.com" 105 + ] 106 + } 107 + 108 + http post --content-type application/json $"($url)/repo/remove" $to_remove 109 + 110 + print "waiting for crawler to wake up (max 10s)..." 111 + sleep 15sec 112 + 113 + # check logs for resumption message 114 + let logs_after = (open $log_file | str replace --all "\n" " ") 115 + if ($logs_after | str contains "crawler resuming") { 116 + print "CONFIRMED: crawler resumed!" 117 + $success = true 118 + } else { 119 + print "FAILED: resumption message not found in logs" 120 + $success = false 121 + } 122 + 123 + } else { 124 + print "FAILED: throttling message not found in logs" 125 + } 126 + 127 + } else { 128 + print "hydrant failed to start." 129 + } 130 + } catch { |e| 131 + print $"test failed with error: ($e)" 132 + } 133 + 134 + # cleanup 135 + print "stopping processes..." 136 + try { kill $hydrant_pid } 137 + try { kill $mock_pid } 138 + 139 + if $success { 140 + print "test passed!" 141 + exit 0 142 + } else { 143 + print "test failed!" 144 + print "hydrant logs:" 145 + open $log_file | tail -n 20 146 + exit 1 147 + } 148 + }