add orchestration rules, test matrix, and fix postgres schema

+6 -2

CLAUDE.md

··· 7 ```bash 8 zig build # build 9 just dev # build + run with DEBUG logging 10 - just test # run test scripts 11 just services-up # start redis + postgres containers 12 just test-broker all # test broker backends 13 just test-db all # test database backends ··· 16 ## testing 17 18 **NEVER run server and test scripts in same bash command** - server is long-running. 19 - use benchmark scripts or run in separate terminals. 20 21 ## env vars 22 ··· 45 46 ## docs 47 48 - `docs/scratch/` - working notes, patterns to deduplicate later 49 - `ROADMAP.md` - implementation status vs python prefect

··· 7 ```bash 8 zig build # build 9 just dev # build + run with DEBUG logging 10 + just test # run functional tests (requires running server) 11 + just bench-compare # benchmark zig vs python 12 just services-up # start redis + postgres containers 13 just test-broker all # test broker backends 14 just test-db all # test database backends ··· 17 ## testing 18 19 **NEVER run server and test scripts in same bash command** - server is long-running. 20 + 21 + - `scripts/test-api-sequence` - functional tests (correctness) 22 + - `scripts/benchmark` - performance benchmark (throughput, latency, memory) 23 24 ## env vars 25 ··· 48 49 ## docs 50 51 + - `docs/docker.md` - container registry, publishing, CI notes 52 - `docs/scratch/` - working notes, patterns to deduplicate later 53 - `ROADMAP.md` - implementation status vs python prefect

+17 -12

Dockerfile

··· 1 # Build stage 2 FROM alpine:3.20 AS builder 3 4 - # install build dependencies + CA certs for TLS 5 RUN apk add --no-cache curl xz ca-certificates 6 7 # download zig 0.15.2 based on architecture 8 - RUN ARCH=$(uname -m) && \ 9 - if [ "$ARCH" = "x86_64" ]; then ZIG_ARCH="x86_64"; \ 10 - elif [ "$ARCH" = "aarch64" ]; then ZIG_ARCH="aarch64"; \ 11 - else echo "Unsupported arch: $ARCH" && exit 1; fi && \ 12 - curl -L "https://ziglang.org/download/0.15.2/zig-${ZIG_ARCH}-linux-0.15.2.tar.xz" | \ 13 tar -xJ -C /usr/local && \ 14 ln -s /usr/local/zig-${ZIG_ARCH}-linux-0.15.2/zig /usr/local/bin/zig 15 ··· 19 20 RUN zig build -Doptimize=ReleaseFast 21 22 - # copy facil.io shared library from zig cache 23 RUN mkdir -p /build/lib && \ 24 - find / -name "libfacil.io.so" 2>/dev/null -exec cp {} /build/lib/ \; 25 26 - # Runtime stage 27 FROM alpine:3.20 28 29 - RUN apk add --no-cache libstdc++ 30 31 WORKDIR /app 32 33 - # copy binary and shared library 34 COPY --from=builder /build/zig-out/bin/prefect-server /app/ 35 COPY --from=builder /build/lib/ /app/lib/ 36 37 - # set library path 38 ENV LD_LIBRARY_PATH=/app/lib 39 ENV PREFECT_SERVER_PORT=4200 40 ENV PREFECT_SERVER_LOGGING_LEVEL=INFO 41 42 EXPOSE 4200 43 44 CMD ["/app/prefect-server"]

··· 1 # Build stage 2 FROM alpine:3.20 AS builder 3 4 + # install build dependencies 5 RUN apk add --no-cache curl xz ca-certificates 6 7 # download zig 0.15.2 based on architecture 8 + ARG TARGETARCH 9 + RUN case "$TARGETARCH" in \ 10 + amd64) ZIG_ARCH="x86_64" ;; \ 11 + arm64) ZIG_ARCH="aarch64" ;; \ 12 + *) echo "Unsupported arch: $TARGETARCH" && exit 1 ;; \ 13 + esac && \ 14 + curl -fsSL "https://ziglang.org/download/0.15.2/zig-${ZIG_ARCH}-linux-0.15.2.tar.xz" | \ 15 tar -xJ -C /usr/local && \ 16 ln -s /usr/local/zig-${ZIG_ARCH}-linux-0.15.2/zig /usr/local/bin/zig 17 ··· 21 22 RUN zig build -Doptimize=ReleaseFast 23 24 + # find and copy facil.io shared library 25 RUN mkdir -p /build/lib && \ 26 + find /build/.zig-cache -name "libfacil.io.so" -exec cp {} /build/lib/ \; 27 28 + # Runtime stage - minimal image 29 FROM alpine:3.20 30 31 + # libstdc++ required for zig runtime, curl for healthcheck 32 + RUN apk add --no-cache libstdc++ curl 33 34 WORKDIR /app 35 36 COPY --from=builder /build/zig-out/bin/prefect-server /app/ 37 COPY --from=builder /build/lib/ /app/lib/ 38 39 ENV LD_LIBRARY_PATH=/app/lib 40 + ENV PREFECT_SERVER_API_HOST=0.0.0.0 41 ENV PREFECT_SERVER_PORT=4200 42 ENV PREFECT_SERVER_LOGGING_LEVEL=INFO 43 44 EXPOSE 4200 45 + 46 + HEALTHCHECK --interval=10s --timeout=3s --start-period=5s --retries=3 \ 47 + CMD curl -f http://localhost:4200/api/health || exit 1 48 49 CMD ["/app/prefect-server"]

+27 -4

README.md

··· 1 # prefect-server 2 3 - zig implementation of [prefect server](https://github.com/prefecthq/prefect) - single binary, ~10x faster than python. 4 5 supports sqlite/postgres for storage and memory/redis for messaging. see [ROADMAP.md](ROADMAP.md) for implementation status. 6 ··· 8 9 ```bash 10 just dev # build and run with debug logging 11 - just test # run api tests against local server 12 just docker-test # build and test in docker 13 - just bench-compare # benchmark against python server 14 ``` 15 16 requires [zig 0.15](https://ziglang.org/) and [just](https://github.com/casey/just). run `just --list` for all commands. 17 18 ## testing 19 20 - integration tests use the official [prefect python client](https://github.com/prefecthq/prefect) to validate API compatibility. see [`scripts/`](scripts/) for test harnesses. 21 22 ## configuration 23

··· 1 # prefect-server 2 3 + zig implementation of [prefect server](https://github.com/prefecthq/prefect) - single binary, ~3x faster, ~20x less memory. 4 5 supports sqlite/postgres for storage and memory/redis for messaging. see [ROADMAP.md](ROADMAP.md) for implementation status. 6 ··· 8 9 ```bash 10 just dev # build and run with debug logging 11 + just test # run functional tests (requires running server) 12 + just bench-compare # benchmark zig vs python (throughput, latency, memory) 13 just docker-test # build and test in docker 14 ``` 15 16 requires [zig 0.15](https://ziglang.org/) and [just](https://github.com/casey/just). run `just --list` for all commands. 17 18 ## testing 19 20 + **functional tests** (`scripts/test-api-sequence`) - verify API correctness by exercising all endpoints. includes scheduler integration tests with intentional delays. 21 + 22 + **performance benchmark** (`scripts/benchmark`) - measure throughput (requests/second), latency percentiles (p50/p95/p99), and memory usage. 23 + 24 + ```bash 25 + just bench # benchmark zig server 26 + just bench-compare # compare zig vs python 27 + just bench-matrix # test all db × broker combinations 28 + ``` 29 + 30 + ## docker 31 + 32 + published to [atcr.io](https://atcr.io) (AT Protocol container registry): 33 + 34 + ```bash 35 + docker pull atcr.io/zzstoatzz.io/prefect-server:latest 36 + docker run -p 4200:4200 atcr.io/zzstoatzz.io/prefect-server:latest 37 + ``` 38 + 39 + to publish: 40 + ```bash 41 + just docker-publish # push :latest 42 + just docker-publish v0.1.0 # push specific tag 43 + ``` 44 45 ## configuration 46

+44 -10

ROADMAP.md

··· 85 - [x] DELETE /api/block_documents/{id} 86 - [ ] POST /api/block_capabilities/ 87 88 - ### concurrency 89 - - [ ] POST /api/concurrency_limits/ 90 - - [ ] POST /api/concurrency_limits/filter 91 - [ ] POST /api/v2/concurrency_limits/ 92 93 ### artifacts 94 - [ ] POST /api/artifacts/ ··· 121 122 ## orchestration 123 124 - - [x] global bookkeeping transforms (SetStartTime, SetEndTime, IncrementRunTime, IncrementRunCount) 125 - - [ ] state transition rules (prevent invalid transitions) 126 - - [ ] retry policies 127 - - [ ] concurrency limits 128 129 ## background services 130 131 - [x] event_persister (batched event writes, deduplication, retention trimming) 132 - [x] event_broadcaster (websocket fan-out to /api/events/out subscribers) 133 - - [ ] scheduler (create flow runs from deployment schedules) 134 - [ ] late_runs (mark runs as late) 135 - [ ] foreman (infrastructure management) 136 - [ ] cancellation_cleanup (clean up cancelled runs) ··· 214 - table, linked to deployment 215 - CRUD API for managing schedules 216 217 - 6. **scheduler service** - creates runs from schedules (NEXT) 218 - background service 219 - queries deployments needing runs 220 - creates flow_runs in SCHEDULED state 221 222 - 7. **get_scheduled_flow_runs** - workers poll for work 223 - `POST /work_pools/{name}/get_scheduled_flow_runs` 224 - returns runs ready to execute 225 226 ### what's working (~5x faster than python) 227 - flow/flow_run/task_run lifecycle ··· 229 - variables (full CRUD) 230 - work pools, work queues, workers (full CRUD + heartbeat) 231 - deployments + schedules (full CRUD, `.serve()` support) 232 - events (ingest via websocket, persist, broadcast with filtered backfill) 233 - dual database backends (sqlite/postgres) 234 - dual message brokers (memory/redis)

··· 85 - [x] DELETE /api/block_documents/{id} 86 - [ ] POST /api/block_capabilities/ 87 88 + ### concurrency (v2 only - skip v1 API) 89 - [ ] POST /api/v2/concurrency_limits/ 90 + - [ ] POST /api/v2/concurrency_limits/filter 91 + - [ ] lease-based slot management (increment/decrement with lease, renew, TTL) 92 + - note: v1 tag-based concurrency was reconstituted to use v2 in python prefect. 93 + we should only implement the v2 API to avoid the complexity of supporting both. 94 95 ### artifacts 96 - [ ] POST /api/artifacts/ ··· 123 124 ## orchestration 125 126 + ### global transforms (bookkeeping) 127 + - [x] SetStartTime - set start_time when first entering RUNNING 128 + - [x] SetEndTime - set end_time when entering terminal state 129 + - [x] IncrementRunTime - accumulate total_run_time when exiting RUNNING 130 + - [x] IncrementRunCount - increment run_count when entering RUNNING 131 + 132 + ### orchestration rules framework 133 + - [x] ResponseStatus enum (ACCEPT, REJECT, WAIT, ABORT) 134 + - [x] OrchestrationRule abstraction (from_states, to_states, before_transition) 135 + - [x] Policy composition (CoreFlowPolicy as ordered list of rules) 136 + - [x] applyPolicy function to run rules in order 137 + 138 + ### flow run rules (CoreFlowPolicy) 139 + - [x] PreventPendingTransitions - reject PENDING/RUNNING/CANCELLING/CANCELLED → PENDING 140 + - [x] CopyScheduledTime - copy scheduled_time when SCHEDULED → PENDING 141 + - [x] WaitForScheduledTime - delay transition if scheduled_time in future 142 + - [ ] PreventDuplicateTransitions - idempotency via transition_id 143 + - [ ] HandleFlowTerminalStateTransitions - prevent leaving completed with persisted data 144 + - [ ] RetryFailedFlows - govern retry transitions 145 + - [ ] HandlePausingFlows - govern RUNNING → PAUSED 146 + - [ ] HandleResumingPausedFlows - govern PAUSED → RUNNING 147 + 148 + ### task run rules (CoreTaskPolicy) 149 + - [ ] CacheRetrieval - check for cached results before execution 150 + - [ ] CacheInsertion - write completed results to cache 151 + - [ ] PreventRunningTasksFromStoppedFlows - tasks can't run if flow isn't running 152 + - [ ] RetryFailedTasks - govern retry transitions 153 + 154 + ### other 155 + - [ ] concurrency limits (v2 only) 156 157 ## background services 158 159 - [x] event_persister (batched event writes, deduplication, retention trimming) 160 - [x] event_broadcaster (websocket fan-out to /api/events/out subscribers) 161 + - [x] scheduler (create flow runs from deployment schedules - interval + cron) 162 - [ ] late_runs (mark runs as late) 163 - [ ] foreman (infrastructure management) 164 - [ ] cancellation_cleanup (clean up cancelled runs) ··· 242 - table, linked to deployment 243 - CRUD API for managing schedules 244 245 + 6. ~~**scheduler service** - creates runs from schedules~~ ✓ 246 - background service 247 - queries deployments needing runs 248 - creates flow_runs in SCHEDULED state 249 + - supports interval + cron schedules 250 + - parameter merging (schedule overrides deployment) 251 + - idempotent via idempotency_key 252 253 + 7. ~~**get_scheduled_flow_runs** - workers poll for work~~ ✓ 254 - `POST /work_pools/{name}/get_scheduled_flow_runs` 255 - returns runs ready to execute 256 + - updates pool/deployment status to READY 257 258 ### what's working (~5x faster than python) 259 - flow/flow_run/task_run lifecycle ··· 261 - variables (full CRUD) 262 - work pools, work queues, workers (full CRUD + heartbeat) 263 - deployments + schedules (full CRUD, `.serve()` support) 264 + - scheduler service (interval + cron, idempotent, parameter merging) 265 + - get_scheduled_flow_runs (worker polling) 266 - events (ingest via websocket, persist, broadcast with filtered backfill) 267 - dual database backends (sqlite/postgres) 268 - dual message brokers (memory/redis)

+14

build.zig

··· 25 .optimize = optimize, 26 }); 27 28 const exe = b.addExecutable(.{ 29 .name = "prefect-server", 30 .root_module = b.createModule(.{ ··· 36 .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 37 .{ .name = "zap", .module = zap.module("zap") }, 38 .{ .name = "pg", .module = pg.module("pg") }, 39 }, 40 }), 41 }); ··· 65 .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 66 .{ .name = "zap", .module = zap.module("zap") }, 67 .{ .name = "pg", .module = pg.module("pg") }, 68 }, 69 }), 70 });

··· 25 .optimize = optimize, 26 }); 27 28 + const cron = b.dependency("cron", .{ 29 + .target = target, 30 + .optimize = optimize, 31 + }); 32 + 33 + const redis_dep = b.dependency("redis", .{ 34 + .target = target, 35 + .optimize = optimize, 36 + }); 37 + 38 const exe = b.addExecutable(.{ 39 .name = "prefect-server", 40 .root_module = b.createModule(.{ ··· 46 .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 47 .{ .name = "zap", .module = zap.module("zap") }, 48 .{ .name = "pg", .module = pg.module("pg") }, 49 + .{ .name = "cron", .module = cron.module("cron") }, 50 + .{ .name = "redis", .module = redis_dep.module("redis") }, 51 }, 52 }), 53 }); ··· 77 .{ .name = "zqlite", .module = zqlite.module("zqlite") }, 78 .{ .name = "zap", .module = zap.module("zap") }, 79 .{ .name = "pg", .module = pg.module("pg") }, 80 + .{ .name = "cron", .module = cron.module("cron") }, 81 + .{ .name = "redis", .module = redis_dep.module("redis") }, 82 }, 83 }), 84 });

+8

build.zig.zon

··· 20 .url = "git+https://github.com/karlseguin/pg.zig?ref=master#f8d4892387fbad2abdf775783e101e50a7114335", 21 .hash = "pg-0.0.0-Wp_7gag6BgD_QAZrPhNNEGpnUZR_LEkKT40Ura3p-4yX", 22 }, 23 }, 24 .paths = .{ 25 "build.zig",

··· 20 .url = "git+https://github.com/karlseguin/pg.zig?ref=master#f8d4892387fbad2abdf775783e101e50a7114335", 21 .hash = "pg-0.0.0-Wp_7gag6BgD_QAZrPhNNEGpnUZR_LEkKT40Ura3p-4yX", 22 }, 23 + .cron = .{ 24 + .url = "https://tangled.sh/zzstoatzz.io/cron/archive/main", 25 + .hash = "cron-0.1.0-gPeJt-VgAAA-aoA699DBG_9FDHEYay0gVXTRvxCp_Ew0", 26 + }, 27 + .redis = .{ 28 + .url = "https://tangled.sh/zzstoatzz.io/redis/archive/main", 29 + .hash = "redis-0.1.0-NGPovmBFAgA-97WY7lE9Wr1hqrZtZhhiILKgZFcc09Nt", 30 + }, 31 }, 32 .paths = .{ 33 "build.zig",

+1 -1

compose.yml

··· 26 condition: service_healthy 27 required: false 28 healthcheck: 29 - test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:4200/api/health"] 30 interval: 5s 31 timeout: 3s 32 retries: 3

··· 26 condition: service_healthy 27 required: false 28 healthcheck: 29 + test: ["CMD", "curl", "-f", "http://localhost:4200/api/health"] 30 interval: 5s 31 timeout: 3s 32 retries: 3

+59

docs/docker.md

···

··· 1 + # docker 2 + 3 + ## registry 4 + 5 + images are published to [atcr.io](https://atcr.io) - a distributed container registry built on AT Protocol. 6 + 7 + ``` 8 + atcr.io/zzstoatzz.io/prefect-server:latest 9 + ``` 10 + 11 + ## publishing 12 + 13 + ```bash 14 + just docker-publish # push :latest 15 + just docker-publish v0.1.0 # push specific tag 16 + ``` 17 + 18 + requires authentication via `docker-credential-atcr`: 19 + ```bash 20 + # install credential helper 21 + curl -fsSL https://github.com/mary-ext/atcr/releases/latest/download/docker-credential-atcr-darwin-arm64 \ 22 + -o /usr/local/bin/docker-credential-atcr && chmod +x /usr/local/bin/docker-credential-atcr 23 + 24 + # add to ~/.docker/config.json 25 + # "credHelpers": { "atcr.io": "atcr" } 26 + 27 + # first push triggers browser auth 28 + ATCR_AUTO_AUTH=1 docker push atcr.io/zzstoatzz.io/prefect-server:latest 29 + ``` 30 + 31 + ## CI (future) 32 + 33 + ATCR.io uses device authorization (browser-based) for initial auth, then stores a device token: 34 + ``` 35 + atcr_device_<random> 36 + ``` 37 + 38 + **potential CI approach** (not yet tested): 39 + 1. authenticate locally to generate device token 40 + 2. extract token: `docker-credential-atcr get <<< "atcr.io"` 41 + 3. store as CI secret (e.g., `ATCR_TOKEN`) 42 + 4. in CI, write credentials before push: 43 + ```bash 44 + echo '{"ServerURL":"atcr.io","Username":"zzstoatzz.io","Secret":"$ATCR_TOKEN"}' | docker-credential-atcr store 45 + ``` 46 + 47 + **open questions:** 48 + - token expiration/rotation policy 49 + - whether ATCR.io will add app passwords or CI-specific auth 50 + - official GitHub Actions support 51 + 52 + check [atcr.io](https://atcr.io) for updates on CI/CD support. 53 + 54 + ## image details 55 + 56 + - base: `alpine:3.20` 57 + - size: ~47MB 58 + - healthcheck: `curl -f http://localhost:4200/api/health` 59 + - multi-arch: amd64, arm64

+69

docs/scratch/timestamps.md

···

··· 1 + # timestamp handling in prefect 2 + 3 + ## python implementation 4 + 5 + ### storage 6 + - **PostgreSQL**: `TIMESTAMP(timezone=True)` - native timezone-aware 7 + - **SQLite**: `DATETIME()` naive, manually converts to/from UTC 8 + 9 + ### format 10 + all timestamps are UTC timezone-aware. JSON serialization uses ISO 8601: 11 + ``` 12 + 2024-01-22T15:30:45.123456+00:00 13 + ``` 14 + 15 + ### key fields for flow_run 16 + - `expected_start_time` - when the run was originally scheduled 17 + - `next_scheduled_start_time` - **used for scheduling queries** (we're missing this!) 18 + - `start_time` - actual start 19 + - `end_time` - actual end 20 + - `state_timestamp` - when state last changed 21 + 22 + ### get_scheduled_flow_runs query 23 + ```sql 24 + WHERE fr.state_type = 'SCHEDULED' 25 + AND fr.next_scheduled_start_time <= :scheduled_before 26 + ORDER BY fr.next_scheduled_start_time ASC 27 + ``` 28 + 29 + ## our implementation 30 + 31 + ### storage 32 + - **SQLite**: `TEXT` with format `2024-01-22T15:30:45.123456Z` 33 + - using SQLite `strftime('%Y-%m-%dT%H:%M:%fZ', 'now')` for defaults 34 + 35 + ### issues 36 + 37 + 1. **missing `next_scheduled_start_time`** - we filter on `expected_start_time` but python uses `next_scheduled_start_time` 38 + 39 + 2. **string comparison is fragile** - we do `expected_start_time <= ?` as string comparison 40 + - our format: `2024-01-22T15:30:45Z` (T separator, Z suffix) 41 + - client format: `2024-01-22 15:30:45+00:00` (space, +00:00 suffix) 42 + - ASCII: `T` (84) > space (32), so comparison fails 43 + 44 + 3. **bandaid fix** - normalizing client timestamps (space→T, +00:00→Z) works but is fragile 45 + 46 + ### proper fix 47 + 48 + 1. add `next_scheduled_start_time` column to `flow_run` 49 + 2. parse timestamps to integers (epoch microseconds) for comparison 50 + 3. or store timestamps as integers in DB for proper numeric comparison 51 + 52 + ## .serve() vs workers 53 + 54 + ### .serve() (Runner) 55 + - creates deployment, starts local polling loop 56 + - calls `POST /deployments/get_scheduled_flow_runs` every N seconds 57 + - executes flows locally in the same process 58 + - **NOT a worker** 59 + 60 + ### workers 61 + - standalone daemon process 62 + - connects to work pools/queues 63 + - work pool workers: `POST /work_pools/{name}/get_scheduled_flow_runs` 64 + - task workers: WebSocket `WS /task_runs/subscriptions/scheduled` 65 + 66 + ### we test 67 + - `test_cron_scheduler` - server-side scheduler creates runs (correct) 68 + - `test_worker_execution` - mislabeled! tests `.serve()` Runner, not a worker 69 + - `test_serve_with_schedule` - verifies deployment has schedule attached (correct)

+1 -1

docs/services.md

··· 1 # services 2 3 - background workers managed by `services/mod.zig`. 4 5 ## registry 6

··· 1 # services 2 3 + background workers managed by `services.zig`. 4 5 ## registry 6

+29 -7

justfile

··· 53 PREFECT_API_URL=http://localhost:4200/api ./scripts/test-flow 54 PREFECT_API_URL=http://localhost:4200/api ./scripts/test-blocks 55 56 - # run all tests 57 test-all: test test-serve test-client test-broker test-db 58 59 # start dev services (redis + postgres) ··· 94 docker compose run --rm test 95 docker compose down 96 97 - # benchmark against local server 98 - bench workload="scripts/test-api-sequence" iterations="3": 99 - ./scripts/benchmark --server zig --workload {{workload}} --iterations {{iterations}} 100 101 - # benchmark comparison (zig vs python) 102 - bench-compare workload="scripts/test-api-sequence" iterations="3": 103 - ./scripts/benchmark --compare --workload {{workload}} --iterations {{iterations}} 104 105 # clean build artifacts 106 clean:

··· 53 PREFECT_API_URL=http://localhost:4200/api ./scripts/test-flow 54 PREFECT_API_URL=http://localhost:4200/api ./scripts/test-blocks 55 56 + # run worker integration tests (scheduler + worker execution) 57 + test-worker: 58 + PREFECT_API_URL=http://localhost:4200/api ./scripts/test-worker 59 + 60 + # run full test matrix (all db × broker combinations) 61 + test-matrix: 62 + ./scripts/test-matrix 63 + 64 + # run quick test (sqlite + memory only) 65 + test-quick: 66 + ./scripts/test-matrix --quick 67 + 68 + # run all tests (legacy - prefer test-matrix) 69 test-all: test test-serve test-client test-broker test-db 70 71 # start dev services (redis + postgres) ··· 106 docker compose run --rm test 107 docker compose down 108 109 + # performance benchmark (zig server) 110 + bench: 111 + ./scripts/benchmark --server zig 112 113 + # performance benchmark comparison (zig vs python) 114 + bench-compare: 115 + ./scripts/benchmark --compare 116 + 117 + # performance benchmark matrix (all db × broker combinations) 118 + bench-matrix: 119 + ./scripts/benchmark --matrix 120 + 121 + # publish docker image to atcr.io 122 + docker-publish tag="latest": 123 + docker build -t prefect-server:{{tag}} . 124 + docker tag prefect-server:{{tag}} atcr.io/zzstoatzz.io/prefect-server:{{tag}} 125 + docker push atcr.io/zzstoatzz.io/prefect-server:{{tag}} 126 127 # clean build artifacts 128 clean:

+2 -6

loq.toml

··· 6 7 [[rules]] 8 path = "scripts/benchmark" 9 - max_lines = 575 10 11 [[rules]] 12 path = "scripts/test-api-sequence" 13 - max_lines = 1000 14 - 15 - [[rules]] 16 - path = "src/broker/redis.zig" 17 - max_lines = 1020 18 19 [[rules]] 20 path = "src/api/work_pools.zig"

··· 6 7 [[rules]] 8 path = "scripts/benchmark" 9 + max_lines = 650 10 11 [[rules]] 12 path = "scripts/test-api-sequence" 13 + max_lines = 1400 14 15 [[rules]] 16 path = "src/api/work_pools.zig"

+343 -271

scripts/benchmark

··· 4 # dependencies = ["httpx", "rich", "psutil"] 5 # /// 6 """ 7 - Instrumented benchmark runner for prefect-server. 8 9 - Compares zig vs python servers with: 10 - - Timing breakdown by test section 11 - - Memory usage tracking 12 - - Request count per section 13 14 Usage: 15 ./scripts/benchmark --server zig 16 ./scripts/benchmark --server python 17 ./scripts/benchmark --compare 18 - ./scripts/benchmark --compare --iterations 3 19 - ./scripts/benchmark --server zig --db-backend postgres --broker-backend redis 20 - ./scripts/benchmark --matrix # runs all db × broker combinations 21 """ 22 23 import argparse 24 - import json 25 import os 26 import shutil 27 import signal 28 import subprocess 29 - import sys 30 import time 31 - from dataclasses import dataclass, field 32 from pathlib import Path 33 from typing import Literal 34 ··· 41 42 SERVER_PORT = 4200 43 API_URL = f"http://localhost:{SERVER_PORT}/api" 44 - HEALTH_ENDPOINT = f"{API_URL}/health" 45 46 47 @dataclass 48 - class SectionResult: 49 name: str 50 - duration_ms: float 51 requests: int 52 - passed: bool 53 54 55 @dataclass 56 class BenchmarkResult: 57 server: str 58 success: bool 59 - total_duration_ms: float 60 - total_requests: int 61 memory_mb: float 62 - sections: list[SectionResult] = field(default_factory=list) 63 - error: str | None = None 64 db_backend: str = "sqlite" 65 broker_backend: str = "memory" 66 67 68 def wait_for_health(timeout: float = 30.0) -> bool: ··· 70 start = time.time() 71 while time.time() - start < timeout: 72 try: 73 - resp = httpx.get(HEALTH_ENDPOINT, timeout=2.0) 74 if resp.status_code == 200: 75 return True 76 except httpx.RequestError: ··· 95 96 97 def get_process_memory_mb(pid: int) -> float: 98 - """Get memory usage of a process in MB.""" 99 try: 100 proc = psutil.Process(pid) 101 - return proc.memory_info().rss / (1024 * 1024) 102 except (psutil.NoSuchProcess, psutil.AccessDenied): 103 return 0.0 104 ··· 132 console.print(f"[red]error: zig binary not found at {binary}[/red]") 133 return False 134 135 - # clean db for sqlite 136 if self.db_backend == "sqlite": 137 db_path = self.project_root / "prefect.db" 138 if db_path.exists(): ··· 142 env["PREFECT_SERVER_LOGGING_LEVEL"] = "WARNING" 143 env["PREFECT_DATABASE_BACKEND"] = self.db_backend 144 env["PREFECT_BROKER_BACKEND"] = self.broker_backend 145 - 146 - # postgres config - use full URL 147 if self.db_backend == "postgres": 148 env.setdefault("PREFECT_DATABASE_URL", "postgresql://prefect:prefect@localhost:5432/prefect") 149 150 - # redis config - not currently used by zig server but set for consistency 151 - if self.broker_backend == "redis": 152 - pass # zig uses default localhost:6379 153 - 154 self.process = subprocess.Popen( 155 [str(binary)], 156 cwd=self.project_root, ··· 191 return True 192 193 def get_memory_mb(self) -> float: 194 - """Get server memory usage.""" 195 if self.process: 196 return get_process_memory_mb(self.process.pid) 197 return 0.0 ··· 207 kill_port(SERVER_PORT) 208 209 210 - def run_workload(project_root: Path, workload_script: str = "scripts/test-api-sequence") -> dict | None: 211 - """Run workload script and return JSON results.""" 212 - workload = project_root / workload_script 213 214 - env = os.environ.copy() 215 - env["PREFECT_API_URL"] = API_URL 216 - env["PREFECT_LOGGING_LEVEL"] = "WARNING" 217 - env.pop("VIRTUAL_ENV", None) 218 - env.pop("UV_RUN_RECURSION_DEPTH", None) 219 220 - try: 221 - result = subprocess.run( 222 - [str(workload), "--json"], 223 - cwd=project_root, 224 - env=env, 225 - capture_output=True, 226 - text=True, 227 - timeout=300, 228 - ) 229 - 230 - if result.returncode != 0: 231 - # try to parse JSON anyway (might have partial results) 232 pass 233 234 - # find JSON in output (might have other output mixed in) 235 - for line in result.stdout.strip().split("\n"): 236 - if line.startswith("{"): 237 - return json.loads(line) 238 239 - return None 240 241 - except (subprocess.TimeoutExpired, json.JSONDecodeError) as e: 242 - console.print(f"[red]workload error: {e}[/red]") 243 - return None 244 245 246 - def benchmark_single( 247 - server_type: Literal["zig", "python"], 248 - iterations: int = 1, 249 - workload: str = "scripts/test-api-sequence", 250 - db_backend: str = "sqlite", 251 - broker_backend: str = "memory", 252 - ) -> list[BenchmarkResult]: 253 - """Run benchmark against a single server.""" 254 - results = [] 255 256 - backend_label = f"({db_backend}/{broker_backend})" if server_type == "zig" else "" 257 - console.print(f"\n[bold]benchmarking {server_type} server {backend_label}[/bold]") 258 259 - manager = ServerManager(server_type, db_backend, broker_backend) 260 261 - for i in range(iterations): 262 - if iterations > 1: 263 - console.print(f" iteration {i + 1}/{iterations}...", end=" ") 264 - else: 265 - console.print(f" starting server...", end=" ") 266 267 - if not manager.start(): 268 - results.append(BenchmarkResult( 269 - server=server_type, 270 success=False, 271 - total_duration_ms=0, 272 - total_requests=0, 273 - memory_mb=0, 274 - error="failed to start server", 275 - db_backend=db_backend, 276 - broker_backend=broker_backend, 277 - )) 278 - console.print("[red]failed[/red]") 279 - continue 280 281 - console.print("running workload...", end=" ") 282 283 - # measure memory before workload 284 - memory_before = manager.get_memory_mb() 285 286 - data = run_workload(manager.project_root, workload) 287 - 288 - # measure memory after workload 289 - memory_after = manager.get_memory_mb() 290 - memory_mb = max(memory_before, memory_after) 291 - 292 - manager.stop() 293 294 - if data is None: 295 - results.append(BenchmarkResult( 296 - server=server_type, 297 - success=False, 298 - total_duration_ms=0, 299 - total_requests=0, 300 - memory_mb=memory_mb, 301 - error="workload failed", 302 - db_backend=db_backend, 303 - broker_backend=broker_backend, 304 - )) 305 - console.print("[red]failed[/red]") 306 - continue 307 308 - sections = [ 309 - SectionResult( 310 - name=s["name"], 311 - duration_ms=s["duration_ms"], 312 - requests=s["requests"], 313 - passed=s["passed"], 314 ) 315 - for s in data.get("sections", []) 316 - ] 317 318 - results.append(BenchmarkResult( 319 - server=server_type, 320 - success=data.get("passed", False), 321 - total_duration_ms=data.get("total_duration_ms", 0), 322 - total_requests=data.get("total_requests", 0), 323 memory_mb=memory_mb, 324 - sections=sections, 325 - db_backend=db_backend, 326 - broker_backend=broker_backend, 327 - )) 328 329 - console.print(f"[green]{data.get('total_duration_ms', 0):.0f}ms[/green] ({memory_mb:.1f}MB)") 330 - 331 - return results 332 333 334 - def print_comparison(zig_results: list[BenchmarkResult], python_results: list[BenchmarkResult]): 335 - """Print detailed comparison.""" 336 337 - def avg(results: list[BenchmarkResult], key: str) -> float: 338 - # use results with valid timing data, not just fully successful ones 339 - with_data = [r for r in results if getattr(r, key, 0) > 0] 340 - if not with_data: 341 - return 0 342 - return sum(getattr(r, key) for r in with_data) / len(with_data) 343 344 - def avg_section(results: list[BenchmarkResult], section_name: str) -> float: 345 - successful = [r for r in results if r.success] 346 - if not successful: 347 - return 0 348 - total = 0 349 - count = 0 350 - for r in successful: 351 - for s in r.sections: 352 - if s.name == section_name: 353 - total += s.duration_ms 354 - count += 1 355 - return total / count if count else 0 356 357 - # summary table 358 - console.print() 359 - summary = Table(title="summary") 360 - summary.add_column("metric", style="cyan") 361 - summary.add_column("zig", justify="right") 362 - summary.add_column("python", justify="right") 363 - summary.add_column("zig advantage", justify="right") 364 365 - zig_time = avg(zig_results, "total_duration_ms") 366 - python_time = avg(python_results, "total_duration_ms") 367 - speedup = python_time / zig_time if zig_time > 0 else 0 368 369 - summary.add_row( 370 - "time", 371 - f"{zig_time:.0f}ms", 372 - f"{python_time:.0f}ms", 373 - f"[green]{speedup:.1f}x faster[/green]" if speedup >= 1 else f"[yellow]{1/speedup:.1f}x slower[/yellow]" if speedup > 0 else "n/a", 374 - ) 375 376 - zig_mem = avg(zig_results, "memory_mb") 377 - python_mem = avg(python_results, "memory_mb") 378 - mem_ratio = python_mem / zig_mem if zig_mem > 0 else 0 379 380 - if mem_ratio >= 1: 381 - mem_advantage = f"[green]{mem_ratio:.1f}x smaller[/green]" 382 - elif mem_ratio > 0: 383 - mem_advantage = f"[yellow]{1/mem_ratio:.1f}x larger[/yellow]" 384 - else: 385 - mem_advantage = "n/a" 386 387 - summary.add_row("memory", f"{zig_mem:.1f}MB", f"{python_mem:.1f}MB", mem_advantage) 388 389 - zig_reqs = avg(zig_results, "total_requests") 390 - python_reqs = avg(python_results, "total_requests") 391 - summary.add_row("requests", f"{zig_reqs:.0f}", f"{python_reqs:.0f}", "") 392 393 - console.print(summary) 394 395 - # section breakdown (show if both have sections data) 396 - zig_has_sections = zig_results and zig_results[0].sections 397 - python_has_sections = python_results and python_results[0].sections 398 - if zig_has_sections and python_has_sections: 399 - console.print() 400 - breakdown = Table(title="timing breakdown by section") 401 - breakdown.add_column("section", style="cyan") 402 - breakdown.add_column("zig", justify="right") 403 - breakdown.add_column("python", justify="right") 404 - breakdown.add_column("speedup", justify="right") 405 406 - section_names = [s.name for s in zig_results[0].sections] 407 - for name in section_names: 408 - zig_ms = avg_section(zig_results, name) 409 - python_ms = avg_section(python_results, name) 410 - section_speedup = python_ms / zig_ms if zig_ms > 0 else 0 411 412 - speedup_str = "" 413 - if section_speedup > 1.5: 414 - speedup_str = f"[green]{section_speedup:.1f}x[/green]" 415 - elif section_speedup > 1: 416 - speedup_str = f"[dim]{section_speedup:.1f}x[/dim]" 417 - elif section_speedup > 0: 418 - speedup_str = f"[yellow]{section_speedup:.1f}x[/yellow]" 419 420 - breakdown.add_row(name, f"{zig_ms:.1f}ms", f"{python_ms:.1f}ms", speedup_str) 421 422 - console.print(breakdown) 423 424 - # final verdict 425 - if zig_time > 0 and python_time > 0 and speedup > 0: 426 - if speedup >= 1: 427 - console.print(f"\n[bold green]zig is {speedup:.1f}x faster overall[/bold green]") 428 - else: 429 - console.print(f"\n[bold yellow]python is {1/speedup:.1f}x faster overall[/bold yellow]") 430 431 432 def ensure_docker_services(db_backend: str, broker_backend: str, project_root: Path) -> bool: ··· 447 capture_output=True, 448 check=True, 449 ) 450 - # wait for services to be ready 451 time.sleep(2 if "postgres" in services else 1) 452 return True 453 except subprocess.CalledProcessError as e: ··· 455 return False 456 457 458 - def print_matrix_results(results: list[BenchmarkResult]): 459 """Print matrix benchmark results.""" 460 console.print() 461 table = Table(title="benchmark matrix (zig server)") 462 table.add_column("db", style="cyan") 463 table.add_column("broker", style="cyan") 464 - table.add_column("time", justify="right") 465 table.add_column("memory", justify="right") 466 table.add_column("status", justify="center") 467 468 for r in results: 469 - status = "[green]✓[/green]" if r.success else f"[red]✗ {r.error or 'failed'}[/red]" 470 - table.add_row( 471 - r.db_backend, 472 - r.broker_backend, 473 - f"{r.total_duration_ms:.0f}ms" if r.success else "-", 474 - f"{r.memory_mb:.1f}MB" if r.success else "-", 475 - status, 476 - ) 477 478 console.print(table) 479 ··· 481 def main(): 482 parser = argparse.ArgumentParser(description="benchmark prefect servers") 483 parser.add_argument("--server", choices=["zig", "python"], help="server to benchmark") 484 - parser.add_argument("--compare", action="store_true", help="compare both servers") 485 parser.add_argument("--matrix", action="store_true", help="run all db × broker combinations (zig only)") 486 - parser.add_argument("--iterations", type=int, default=1, help="iterations per server") 487 - parser.add_argument("--workload", default="scripts/test-api-sequence", help="workload script to run") 488 - parser.add_argument("--db-backend", choices=["sqlite", "postgres"], default="sqlite", help="database backend") 489 - parser.add_argument("--broker-backend", choices=["memory", "redis"], default="memory", help="broker backend") 490 491 args = parser.parse_args() 492 493 if not args.server and not args.compare and not args.matrix: 494 parser.error("must specify --server, --compare, or --matrix") 495 496 console.print(f"[bold]prefect-server benchmark[/bold]") 497 - if args.iterations > 1: 498 - console.print(f"iterations: {args.iterations}") 499 500 project_root = Path(__file__).parent.parent 501 502 if args.matrix: 503 - # run all db × broker combinations 504 combinations = [ 505 ("sqlite", "memory"), 506 ("sqlite", "redis"), ··· 508 ("postgres", "redis"), 509 ] 510 511 - all_results = [] 512 for db_backend, broker_backend in combinations: 513 if not ensure_docker_services(db_backend, broker_backend, project_root): 514 - all_results.append(BenchmarkResult( 515 - server="zig", 516 - success=False, 517 - total_duration_ms=0, 518 - total_requests=0, 519 - memory_mb=0, 520 error="docker service failed", 521 - db_backend=db_backend, 522 - broker_backend=broker_backend, 523 )) 524 continue 525 - 526 - results = benchmark_single( 527 - "zig", args.iterations, args.workload, 528 - db_backend=db_backend, broker_backend=broker_backend 529 - ) 530 - if results: 531 - # use average if multiple iterations 532 - if len(results) > 1: 533 - avg_result = BenchmarkResult( 534 - server="zig", 535 - success=all(r.success for r in results), 536 - total_duration_ms=sum(r.total_duration_ms for r in results) / len(results), 537 - total_requests=sum(r.total_requests for r in results) // len(results), 538 - memory_mb=sum(r.memory_mb for r in results) / len(results), 539 - db_backend=db_backend, 540 - broker_backend=broker_backend, 541 - ) 542 - all_results.append(avg_result) 543 - else: 544 - all_results.append(results[0]) 545 546 - print_matrix_results(all_results) 547 548 elif args.compare: 549 if not ensure_docker_services(args.db_backend, args.broker_backend, project_root): 550 return 551 - zig_results = benchmark_single( 552 - "zig", args.iterations, args.workload, 553 - db_backend=args.db_backend, broker_backend=args.broker_backend 554 - ) 555 - python_results = benchmark_single("python", args.iterations, args.workload) 556 - print_comparison(zig_results, python_results) 557 else: 558 if not ensure_docker_services(args.db_backend, args.broker_backend, project_root): 559 return 560 - results = benchmark_single( 561 - args.server, args.iterations, args.workload, 562 - db_backend=args.db_backend, broker_backend=args.broker_backend 563 - ) 564 - if results and results[0].success: 565 - console.print(f"\n[bold]average: {sum(r.total_duration_ms for r in results) / len(results):.0f}ms[/bold]") 566 567 568 if __name__ == "__main__":

··· 4 # dependencies = ["httpx", "rich", "psutil"] 5 # /// 6 """ 7 + Performance benchmark for prefect-server. 8 9 + Measures actual API throughput and latency with concurrent requests. 10 + Separate from functional tests (test-api-sequence). 11 12 Usage: 13 ./scripts/benchmark --server zig 14 ./scripts/benchmark --server python 15 ./scripts/benchmark --compare 16 + ./scripts/benchmark --matrix 17 """ 18 19 import argparse 20 import os 21 import shutil 22 import signal 23 + import statistics 24 import subprocess 25 import time 26 + from dataclasses import dataclass 27 from pathlib import Path 28 from typing import Literal 29 ··· 36 37 SERVER_PORT = 4200 38 API_URL = f"http://localhost:{SERVER_PORT}/api" 39 + 40 + # benchmark configuration 41 + WARMUP_REQUESTS = 10 42 + BENCHMARK_DURATION = 5.0 # seconds 43 + CONCURRENT_CLIENTS = 4 44 + 45 + 46 + @dataclass 47 + class LatencyStats: 48 + min_ms: float 49 + avg_ms: float 50 + p50_ms: float 51 + p95_ms: float 52 + p99_ms: float 53 + max_ms: float 54 55 56 @dataclass 57 + class EndpointResult: 58 name: str 59 requests: int 60 + rps: float 61 + latency: LatencyStats 62 63 64 @dataclass 65 class BenchmarkResult: 66 server: str 67 success: bool 68 memory_mb: float 69 + endpoints: list[EndpointResult] 70 db_backend: str = "sqlite" 71 broker_backend: str = "memory" 72 + error: str | None = None 73 + 74 + 75 + def percentile(data: list[float], p: float) -> float: 76 + """Calculate percentile of sorted data.""" 77 + if not data: 78 + return 0.0 79 + k = (len(data) - 1) * p / 100 80 + f = int(k) 81 + c = f + 1 if f + 1 < len(data) else f 82 + return data[f] + (k - f) * (data[c] - data[f]) 83 + 84 + 85 + def calculate_latency_stats(latencies_ms: list[float]) -> LatencyStats: 86 + """Calculate latency statistics from a list of latencies.""" 87 + if not latencies_ms: 88 + return LatencyStats(0, 0, 0, 0, 0, 0) 89 + sorted_lat = sorted(latencies_ms) 90 + return LatencyStats( 91 + min_ms=sorted_lat[0], 92 + avg_ms=statistics.mean(sorted_lat), 93 + p50_ms=percentile(sorted_lat, 50), 94 + p95_ms=percentile(sorted_lat, 95), 95 + p99_ms=percentile(sorted_lat, 99), 96 + max_ms=sorted_lat[-1], 97 + ) 98 99 100 def wait_for_health(timeout: float = 30.0) -> bool: ··· 102 start = time.time() 103 while time.time() - start < timeout: 104 try: 105 + resp = httpx.get(f"{API_URL}/health", timeout=2.0) 106 if resp.status_code == 200: 107 return True 108 except httpx.RequestError: ··· 127 128 129 def get_process_memory_mb(pid: int) -> float: 130 + """Get memory usage of a process and all children in MB.""" 131 try: 132 proc = psutil.Process(pid) 133 + total = proc.memory_info().rss 134 + for child in proc.children(recursive=True): 135 + try: 136 + total += child.memory_info().rss 137 + except (psutil.NoSuchProcess, psutil.AccessDenied): 138 + pass 139 + return total / (1024 * 1024) 140 except (psutil.NoSuchProcess, psutil.AccessDenied): 141 return 0.0 142 ··· 170 console.print(f"[red]error: zig binary not found at {binary}[/red]") 171 return False 172 173 if self.db_backend == "sqlite": 174 db_path = self.project_root / "prefect.db" 175 if db_path.exists(): ··· 179 env["PREFECT_SERVER_LOGGING_LEVEL"] = "WARNING" 180 env["PREFECT_DATABASE_BACKEND"] = self.db_backend 181 env["PREFECT_BROKER_BACKEND"] = self.broker_backend 182 if self.db_backend == "postgres": 183 env.setdefault("PREFECT_DATABASE_URL", "postgresql://prefect:prefect@localhost:5432/prefect") 184 185 self.process = subprocess.Popen( 186 [str(binary)], 187 cwd=self.project_root, ··· 222 return True 223 224 def get_memory_mb(self) -> float: 225 if self.process: 226 return get_process_memory_mb(self.process.pid) 227 return 0.0 ··· 237 kill_port(SERVER_PORT) 238 239 240 + def benchmark_endpoint( 241 + client: httpx.Client, 242 + name: str, 243 + method: str, 244 + path: str, 245 + json_body: dict | None = None, 246 + setup_fn=None, 247 + ) -> EndpointResult: 248 + """Benchmark a single endpoint.""" 249 + # setup if needed 250 + setup_data = {} 251 + if setup_fn: 252 + setup_data = setup_fn(client) 253 254 + # format path with setup data 255 + formatted_path = path.format(**setup_data) if setup_data else path 256 257 + # warmup 258 + for _ in range(WARMUP_REQUESTS): 259 + try: 260 + if method == "GET": 261 + client.get(formatted_path) 262 + else: 263 + client.post(formatted_path, json=json_body or {}) 264 + except httpx.RequestError: 265 pass 266 267 + # benchmark 268 + latencies: list[float] = [] 269 + start_time = time.perf_counter() 270 + request_count = 0 271 272 + while time.perf_counter() - start_time < BENCHMARK_DURATION: 273 + req_start = time.perf_counter() 274 + try: 275 + if method == "GET": 276 + resp = client.get(formatted_path) 277 + else: 278 + resp = client.post(formatted_path, json=json_body or {}) 279 + if resp.status_code in (200, 201, 204): 280 + latencies.append((time.perf_counter() - req_start) * 1000) 281 + request_count += 1 282 + except httpx.RequestError: 283 + pass 284 285 + elapsed = time.perf_counter() - start_time 286 + rps = request_count / elapsed if elapsed > 0 else 0 287 288 + return EndpointResult( 289 + name=name, 290 + requests=request_count, 291 + rps=rps, 292 + latency=calculate_latency_stats(latencies), 293 + ) 294 295 296 + def run_benchmark(manager: ServerManager) -> BenchmarkResult: 297 + """Run the full benchmark suite.""" 298 + client = httpx.Client(base_url=API_URL, timeout=10.0) 299 + endpoints: list[EndpointResult] = [] 300 301 + try: 302 + # health endpoint (baseline) 303 + endpoints.append(benchmark_endpoint(client, "health", "GET", "/health")) 304 305 + # create a flow for subsequent tests 306 + resp = client.post("/flows/", json={"name": "bench-flow"}) 307 + if resp.status_code not in (200, 201): 308 + return BenchmarkResult( 309 + server=manager.server_type, 310 + success=False, 311 + memory_mb=manager.get_memory_mb(), 312 + endpoints=[], 313 + db_backend=manager.db_backend, 314 + broker_backend=manager.broker_backend, 315 + error="failed to create test flow", 316 + ) 317 + flow_id = resp.json()["id"] 318 319 + # create a flow run for read tests 320 + resp = client.post("/flow_runs/", json={ 321 + "flow_id": flow_id, 322 + "name": "bench-run", 323 + "state": {"type": "PENDING", "name": "Pending"}, 324 + }) 325 + if resp.status_code not in (200, 201): 326 + return BenchmarkResult( 327 + server=manager.server_type, 328 success=False, 329 + memory_mb=manager.get_memory_mb(), 330 + endpoints=[], 331 + db_backend=manager.db_backend, 332 + broker_backend=manager.broker_backend, 333 + error="failed to create test flow run", 334 + ) 335 + flow_run_id = resp.json()["id"] 336 337 + # GET /flows/{id} 338 + endpoints.append(benchmark_endpoint( 339 + client, "GET flow", "GET", f"/flows/{flow_id}" 340 + )) 341 342 + # POST /flows/filter 343 + endpoints.append(benchmark_endpoint( 344 + client, "filter flows", "POST", "/flows/filter", {"limit": 10} 345 + )) 346 347 + # GET /flow_runs/{id} 348 + endpoints.append(benchmark_endpoint( 349 + client, "GET flow_run", "GET", f"/flow_runs/{flow_run_id}" 350 + )) 351 352 + # POST /flow_runs/filter 353 + endpoints.append(benchmark_endpoint( 354 + client, "filter flow_runs", "POST", "/flow_runs/filter", {"limit": 10} 355 + )) 356 357 + # POST /flow_runs/ (create) 358 + import uuid 359 + def create_flow_run(): 360 + return benchmark_endpoint( 361 + client, "create flow_run", "POST", "/flow_runs/", 362 + {"flow_id": flow_id, "name": f"bench-{uuid.uuid4().hex[:8]}", 363 + "state": {"type": "PENDING", "name": "Pending"}} 364 ) 365 + endpoints.append(create_flow_run()) 366 367 + memory_mb = manager.get_memory_mb() 368 + 369 + return BenchmarkResult( 370 + server=manager.server_type, 371 + success=True, 372 memory_mb=memory_mb, 373 + endpoints=endpoints, 374 + db_backend=manager.db_backend, 375 + broker_backend=manager.broker_backend, 376 + ) 377 378 + except Exception as e: 379 + return BenchmarkResult( 380 + server=manager.server_type, 381 + success=False, 382 + memory_mb=manager.get_memory_mb(), 383 + endpoints=endpoints, 384 + db_backend=manager.db_backend, 385 + broker_backend=manager.broker_backend, 386 + error=str(e), 387 + ) 388 + finally: 389 + client.close() 390 391 392 + def print_result(result: BenchmarkResult) -> None: 393 + """Print benchmark results for a single server.""" 394 + if not result.success: 395 + console.print(f"[red]benchmark failed: {result.error}[/red]") 396 + return 397 398 + table = Table(title=f"{result.server} ({result.db_backend}/{result.broker_backend})") 399 + table.add_column("endpoint", style="cyan") 400 + table.add_column("reqs", justify="right") 401 + table.add_column("rps", justify="right") 402 + table.add_column("avg", justify="right") 403 + table.add_column("p50", justify="right") 404 + table.add_column("p95", justify="right") 405 + table.add_column("p99", justify="right") 406 407 + for ep in result.endpoints: 408 + table.add_row( 409 + ep.name, 410 + str(ep.requests), 411 + f"{ep.rps:.0f}", 412 + f"{ep.latency.avg_ms:.2f}ms", 413 + f"{ep.latency.p50_ms:.2f}ms", 414 + f"{ep.latency.p95_ms:.2f}ms", 415 + f"{ep.latency.p99_ms:.2f}ms", 416 + ) 417 418 + console.print(table) 419 + console.print(f"memory: {result.memory_mb:.1f}MB\n") 420 421 422 + def print_comparison(zig: BenchmarkResult, python: BenchmarkResult) -> None: 423 + """Print comparison between zig and python.""" 424 + console.print() 425 + table = Table(title="zig vs python comparison") 426 + table.add_column("endpoint", style="cyan") 427 + table.add_column("zig rps", justify="right") 428 + table.add_column("python rps", justify="right") 429 + table.add_column("speedup", justify="right") 430 + table.add_column("zig p50", justify="right") 431 + table.add_column("python p50", justify="right") 432 433 + zig_eps = {ep.name: ep for ep in zig.endpoints} 434 + python_eps = {ep.name: ep for ep in python.endpoints} 435 436 + total_zig_rps = 0 437 + total_python_rps = 0 438 439 + for name in zig_eps: 440 + if name not in python_eps: 441 + continue 442 + z, p = zig_eps[name], python_eps[name] 443 + speedup = z.rps / p.rps if p.rps > 0 else 0 444 445 + total_zig_rps += z.rps 446 + total_python_rps += p.rps 447 448 + speedup_str = f"[green]{speedup:.1f}x[/green]" if speedup >= 1.5 else ( 449 + f"[dim]{speedup:.1f}x[/dim]" if speedup >= 1 else f"[yellow]{speedup:.1f}x[/yellow]" 450 + ) 451 452 + table.add_row( 453 + name, 454 + f"{z.rps:.0f}", 455 + f"{p.rps:.0f}", 456 + speedup_str, 457 + f"{z.latency.p50_ms:.2f}ms", 458 + f"{p.latency.p50_ms:.2f}ms", 459 + ) 460 461 + console.print(table) 462 463 + # summary 464 + overall_speedup = total_zig_rps / total_python_rps if total_python_rps > 0 else 0 465 + mem_ratio = python.memory_mb / zig.memory_mb if zig.memory_mb > 0 else 0 466 467 + console.print() 468 + summary = Table(title="summary") 469 + summary.add_column("metric", style="cyan") 470 + summary.add_column("zig", justify="right") 471 + summary.add_column("python", justify="right") 472 + summary.add_column("advantage", justify="right") 473 474 + summary.add_row( 475 + "total rps", 476 + f"{total_zig_rps:.0f}", 477 + f"{total_python_rps:.0f}", 478 + f"[green]{overall_speedup:.1f}x faster[/green]" if overall_speedup >= 1 else f"[yellow]{1/overall_speedup:.1f}x slower[/yellow]", 479 + ) 480 + summary.add_row( 481 + "memory", 482 + f"{zig.memory_mb:.1f}MB", 483 + f"{python.memory_mb:.1f}MB", 484 + f"[green]{mem_ratio:.1f}x smaller[/green]" if mem_ratio >= 1 else f"[yellow]{1/mem_ratio:.1f}x larger[/yellow]", 485 + ) 486 487 + console.print(summary) 488 489 490 def ensure_docker_services(db_backend: str, broker_backend: str, project_root: Path) -> bool: ··· 505 capture_output=True, 506 check=True, 507 ) 508 time.sleep(2 if "postgres" in services else 1) 509 return True 510 except subprocess.CalledProcessError as e: ··· 512 return False 513 514 515 + def run_single( 516 + server_type: Literal["zig", "python"], 517 + db_backend: str = "sqlite", 518 + broker_backend: str = "memory", 519 + ) -> BenchmarkResult | None: 520 + """Run benchmark for a single server configuration.""" 521 + backend_label = f"({db_backend}/{broker_backend})" if server_type == "zig" else "" 522 + console.print(f"\n[bold]benchmarking {server_type} server {backend_label}[/bold]") 523 + console.print(f" starting server...", end=" ") 524 + 525 + manager = ServerManager(server_type, db_backend, broker_backend) 526 + if not manager.start(): 527 + console.print("[red]failed[/red]") 528 + return BenchmarkResult( 529 + server=server_type, 530 + success=False, 531 + memory_mb=0, 532 + endpoints=[], 533 + db_backend=db_backend, 534 + broker_backend=broker_backend, 535 + error="failed to start server", 536 + ) 537 + 538 + console.print("running benchmark...", end=" ") 539 + result = run_benchmark(manager) 540 + manager.stop() 541 + 542 + if result.success: 543 + total_rps = sum(ep.rps for ep in result.endpoints) 544 + console.print(f"[green]{total_rps:.0f} total rps[/green] ({result.memory_mb:.1f}MB)") 545 + else: 546 + console.print(f"[red]failed: {result.error}[/red]") 547 + 548 + return result 549 + 550 + 551 + def print_matrix_results(results: list[BenchmarkResult]) -> None: 552 """Print matrix benchmark results.""" 553 console.print() 554 table = Table(title="benchmark matrix (zig server)") 555 table.add_column("db", style="cyan") 556 table.add_column("broker", style="cyan") 557 + table.add_column("total rps", justify="right") 558 table.add_column("memory", justify="right") 559 table.add_column("status", justify="center") 560 561 for r in results: 562 + if r.success: 563 + total_rps = sum(ep.rps for ep in r.endpoints) 564 + status = "[green]✓[/green]" 565 + table.add_row(r.db_backend, r.broker_backend, f"{total_rps:.0f}", f"{r.memory_mb:.1f}MB", status) 566 + else: 567 + table.add_row(r.db_backend, r.broker_backend, "-", "-", f"[red]✗ {r.error or 'failed'}[/red]") 568 569 console.print(table) 570 ··· 572 def main(): 573 parser = argparse.ArgumentParser(description="benchmark prefect servers") 574 parser.add_argument("--server", choices=["zig", "python"], help="server to benchmark") 575 + parser.add_argument("--compare", action="store_true", help="compare zig vs python") 576 parser.add_argument("--matrix", action="store_true", help="run all db × broker combinations (zig only)") 577 + parser.add_argument("--db-backend", choices=["sqlite", "postgres"], default="sqlite") 578 + parser.add_argument("--broker-backend", choices=["memory", "redis"], default="memory") 579 + parser.add_argument("--duration", type=float, default=5.0, help="benchmark duration per endpoint (seconds)") 580 581 args = parser.parse_args() 582 583 if not args.server and not args.compare and not args.matrix: 584 parser.error("must specify --server, --compare, or --matrix") 585 586 + global BENCHMARK_DURATION 587 + BENCHMARK_DURATION = args.duration 588 + 589 console.print(f"[bold]prefect-server benchmark[/bold]") 590 + console.print(f"duration: {BENCHMARK_DURATION}s per endpoint") 591 592 project_root = Path(__file__).parent.parent 593 594 if args.matrix: 595 combinations = [ 596 ("sqlite", "memory"), 597 ("sqlite", "redis"), ··· 599 ("postgres", "redis"), 600 ] 601 602 + results = [] 603 for db_backend, broker_backend in combinations: 604 if not ensure_docker_services(db_backend, broker_backend, project_root): 605 + results.append(BenchmarkResult( 606 + server="zig", success=False, memory_mb=0, endpoints=[], 607 + db_backend=db_backend, broker_backend=broker_backend, 608 error="docker service failed", 609 )) 610 continue 611 + result = run_single("zig", db_backend, broker_backend) 612 + if result: 613 + results.append(result) 614 615 + print_matrix_results(results) 616 617 elif args.compare: 618 if not ensure_docker_services(args.db_backend, args.broker_backend, project_root): 619 return 620 + 621 + zig_result = run_single("zig", args.db_backend, args.broker_backend) 622 + python_result = run_single("python") 623 + 624 + if zig_result and python_result and zig_result.success and python_result.success: 625 + print_comparison(zig_result, python_result) 626 + else: 627 + if zig_result: 628 + print_result(zig_result) 629 + if python_result: 630 + print_result(python_result) 631 + 632 else: 633 if not ensure_docker_services(args.db_backend, args.broker_backend, project_root): 634 return 635 + result = run_single(args.server, args.db_backend, args.broker_backend) 636 + if result: 637 + print_result(result) 638 639 640 if __name__ == "__main__":

+358 -4

scripts/test-api-sequence

··· 4 # dependencies = ["httpx", "rich"] 5 # /// 6 """ 7 - Instrumented API test suite for prefect-server. 8 9 - Runs the full API sequence and reports timing breakdown per section. 10 - Use --json for machine-readable output (for benchmark script). 11 """ 12 13 import json as json_lib ··· 238 return True 239 240 241 def test_task_run(client: CountingClient) -> bool: 242 """Test task run lifecycle.""" 243 # create ··· 837 return True 838 839 840 def test_get_scheduled_flow_runs(client: CountingClient) -> bool: 841 """Test get_scheduled_flow_runs endpoint (worker polling).""" 842 from datetime import datetime, timezone ··· 933 results.append(run_test("admin", test_admin)) 934 results.append(run_test("flow_run (success)", lambda c: test_flow_run(c, should_fail=False))) 935 results.append(run_test("flow_run (failure)", lambda c: test_flow_run(c, should_fail=True))) 936 results.append(run_test("task_run", test_task_run)) 937 results.append(run_test("filters", test_filters)) 938 results.append(run_test("logs", test_logs)) ··· 941 results.append(run_test("work_pools", test_work_pools)) 942 results.append(run_test("deployments", test_deployments)) 943 results.append(run_test("get_scheduled_flow_runs", test_get_scheduled_flow_runs)) 944 945 total_duration = sum(r.duration_ms for r in results) 946 total_requests = sum(r.requests for r in results) ··· 968 # human-readable output 969 console.print("\n" + "=" * 60) 970 971 - table = Table(title="timing breakdown") 972 table.add_column("section", style="cyan") 973 table.add_column("time", justify="right") 974 table.add_column("reqs", justify="right")

··· 4 # dependencies = ["httpx", "rich"] 5 # /// 6 """ 7 + Functional test suite for prefect-server API. 8 + 9 + Tests API correctness by exercising all endpoints with expected request/response patterns. 10 + Includes scheduler integration tests (which have intentional delays to verify background services). 11 + 12 + For performance benchmarking, use ./scripts/benchmark instead. 13 14 + Usage: 15 + ./scripts/test-api-sequence # human-readable output 16 + ./scripts/test-api-sequence --json # machine-readable for CI 17 + ./scripts/test-api-sequence --quiet # minimal output 18 """ 19 20 import json as json_lib ··· 245 return True 246 247 248 + def test_orchestration_rules(client: CountingClient) -> bool: 249 + """Test orchestration rules (PreventPendingTransitions).""" 250 + if not QUIET: 251 + console.print(f"server: {BASE_URL}\n") 252 + 253 + # 1. create flow 254 + if not QUIET: 255 + console.print("[bold]1. create flow[/bold]") 256 + resp = client.post("/flows/", json={"name": f"orchestration-test-{uuid.uuid4().hex[:8]}"}) 257 + if resp.status_code not in (200, 201): 258 + if not QUIET: 259 + console.print(f"[red]FAIL[/red]: create flow {resp.status_code}") 260 + return False 261 + flow_id = resp.json()["id"] 262 + if not QUIET: 263 + console.print(f" flow_id: {flow_id}") 264 + 265 + # 2. create flow run in PENDING state 266 + if not QUIET: 267 + console.print("\n[bold]2. create flow run (PENDING)[/bold]") 268 + resp = client.post("/flow_runs/", json={ 269 + "flow_id": flow_id, 270 + "name": f"orch-run-{uuid.uuid4().hex[:8]}", 271 + "state": {"type": "PENDING", "name": "Pending"}, 272 + }) 273 + if resp.status_code not in (200, 201): 274 + if not QUIET: 275 + console.print(f"[red]FAIL[/red]: create flow run {resp.status_code}") 276 + return False 277 + flow_run_id = resp.json()["id"] 278 + if not QUIET: 279 + console.print(f" flow_run_id: {flow_run_id}") 280 + 281 + # 3. try PENDING → PENDING (should be REJECT) 282 + if not QUIET: 283 + console.print("\n[bold]3. PENDING → PENDING (expect REJECT)[/bold]") 284 + resp = client.post(f"/flow_runs/{flow_run_id}/set_state", json={ 285 + "state": {"type": "PENDING", "name": "Pending"}, 286 + }) 287 + if resp.status_code not in (200, 201): 288 + if not QUIET: 289 + console.print(f"[red]FAIL[/red]: unexpected status code {resp.status_code}") 290 + return False 291 + result = resp.json() 292 + status = result.get("status") 293 + if status != "REJECT": 294 + if not QUIET: 295 + console.print(f"[red]FAIL[/red]: expected REJECT, got {status}") 296 + return False 297 + if not QUIET: 298 + reason = result.get("details", {}).get("reason", "") 299 + console.print(f" [green]status: {status} (correct)[/green]") 300 + console.print(f" reason: {reason}") 301 + 302 + # 4. PENDING → RUNNING (should be ACCEPT) 303 + if not QUIET: 304 + console.print("\n[bold]4. PENDING → RUNNING (expect ACCEPT)[/bold]") 305 + resp = client.post(f"/flow_runs/{flow_run_id}/set_state", json={ 306 + "state": {"type": "RUNNING", "name": "Running"}, 307 + }) 308 + if resp.status_code not in (200, 201): 309 + if not QUIET: 310 + console.print(f"[red]FAIL[/red]: {resp.status_code}") 311 + return False 312 + result = resp.json() 313 + status = result.get("status") 314 + if status != "ACCEPT": 315 + if not QUIET: 316 + console.print(f"[red]FAIL[/red]: expected ACCEPT, got {status}") 317 + return False 318 + if not QUIET: 319 + console.print(f" [green]status: {status} (correct)[/green]") 320 + 321 + # 5. try RUNNING → PENDING (should be REJECT) 322 + if not QUIET: 323 + console.print("\n[bold]5. RUNNING → PENDING (expect REJECT)[/bold]") 324 + resp = client.post(f"/flow_runs/{flow_run_id}/set_state", json={ 325 + "state": {"type": "PENDING", "name": "Pending"}, 326 + }) 327 + if resp.status_code not in (200, 201): 328 + if not QUIET: 329 + console.print(f"[red]FAIL[/red]: unexpected status code {resp.status_code}") 330 + return False 331 + result = resp.json() 332 + status = result.get("status") 333 + if status != "REJECT": 334 + if not QUIET: 335 + console.print(f"[red]FAIL[/red]: expected REJECT, got {status}") 336 + return False 337 + if not QUIET: 338 + console.print(f" [green]status: {status} (correct)[/green]") 339 + 340 + # 6. verify run is still RUNNING (reject didn't change state) 341 + if not QUIET: 342 + console.print("\n[bold]6. verify state unchanged after REJECT[/bold]") 343 + resp = client.get(f"/flow_runs/{flow_run_id}") 344 + if resp.status_code != 200: 345 + return False 346 + actual_state = resp.json().get("state_type") 347 + if actual_state != "RUNNING": 348 + if not QUIET: 349 + console.print(f"[red]FAIL[/red]: expected RUNNING, got {actual_state}") 350 + return False 351 + if not QUIET: 352 + console.print(f" [green]state: {actual_state} (correct - unchanged)[/green]") 353 + 354 + # 7. complete normally 355 + if not QUIET: 356 + console.print("\n[bold]7. RUNNING → COMPLETED (expect ACCEPT)[/bold]") 357 + resp = client.post(f"/flow_runs/{flow_run_id}/set_state", json={ 358 + "state": {"type": "COMPLETED", "name": "Completed"}, 359 + }) 360 + if resp.status_code not in (200, 201): 361 + return False 362 + status = resp.json().get("status") 363 + if status != "ACCEPT": 364 + if not QUIET: 365 + console.print(f"[red]FAIL[/red]: expected ACCEPT, got {status}") 366 + return False 367 + if not QUIET: 368 + console.print(f" [green]status: {status} (correct)[/green]") 369 + 370 + # ========================================================================= 371 + # CopyScheduledTime rule tests 372 + # ========================================================================= 373 + 374 + # 8. create a SCHEDULED flow run with next_scheduled_start_time 375 + scheduled_time = "2025-06-15T10:00:00Z" 376 + if not QUIET: 377 + console.print(f"\n[bold]8. create SCHEDULED run (next_scheduled_start_time={scheduled_time})[/bold]") 378 + resp = client.post("/flow_runs/", json={ 379 + "flow_id": flow_id, 380 + "name": f"scheduled-run-{uuid.uuid4().hex[:8]}", 381 + "state": {"type": "SCHEDULED", "name": "Scheduled"}, 382 + "next_scheduled_start_time": scheduled_time, 383 + }) 384 + if resp.status_code not in (200, 201): 385 + if not QUIET: 386 + console.print(f"[red]FAIL[/red]: create scheduled run {resp.status_code}") 387 + return False 388 + scheduled_run_id = resp.json()["id"] 389 + if not QUIET: 390 + console.print(f" scheduled_run_id: {scheduled_run_id}") 391 + 392 + # 9. transition SCHEDULED → PENDING (CopyScheduledTime should copy scheduled_time) 393 + if not QUIET: 394 + console.print("\n[bold]9. SCHEDULED → PENDING (expect scheduled_time copied)[/bold]") 395 + resp = client.post(f"/flow_runs/{scheduled_run_id}/set_state", json={ 396 + "state": {"type": "PENDING", "name": "Pending"}, 397 + }) 398 + if resp.status_code not in (200, 201): 399 + if not QUIET: 400 + console.print(f"[red]FAIL[/red]: set_state {resp.status_code}") 401 + return False 402 + status = resp.json().get("status") 403 + if status != "ACCEPT": 404 + if not QUIET: 405 + console.print(f"[red]FAIL[/red]: expected ACCEPT, got {status}") 406 + return False 407 + if not QUIET: 408 + console.print(f" [green]status: {status} (correct)[/green]") 409 + 410 + # 10. verify expected_start_time was set from next_scheduled_start_time 411 + if not QUIET: 412 + console.print("\n[bold]10. verify expected_start_time copied[/bold]") 413 + resp = client.get(f"/flow_runs/{scheduled_run_id}") 414 + if resp.status_code != 200: 415 + return False 416 + run_data = resp.json() 417 + expected_start = run_data.get("expected_start_time") 418 + if expected_start != scheduled_time: 419 + if not QUIET: 420 + console.print(f"[red]FAIL[/red]: expected_start_time={expected_start}, expected {scheduled_time}") 421 + return False 422 + if not QUIET: 423 + console.print(f" [green]expected_start_time: {expected_start} (correct)[/green]") 424 + 425 + return True 426 + 427 + 428 def test_task_run(client: CountingClient) -> bool: 429 """Test task run lifecycle.""" 430 # create ··· 1024 return True 1025 1026 1027 + def test_scheduler_idempotency(client: CountingClient) -> bool: 1028 + """Test that scheduler is idempotent - running twice doesn't create duplicates.""" 1029 + import time as time_mod 1030 + 1031 + def fail(msg: str) -> bool: 1032 + if not QUIET: console.print(f"[red]FAIL[/red]: {msg}") 1033 + return False 1034 + 1035 + def log(msg: str) -> None: 1036 + if not QUIET: console.print(msg) 1037 + 1038 + # setup: create flow, work pool, deployment with interval schedule 1039 + log("[bold]setup[/bold]") 1040 + resp = client.post("/flows/", json={"name": f"idem-flow-{uuid.uuid4().hex[:8]}"}) 1041 + if resp.status_code not in (200, 201): return fail(f"create flow {resp.status_code}") 1042 + flow_id = resp.json().get("id") 1043 + 1044 + pool_name = f"idem-pool-{uuid.uuid4().hex[:8]}" 1045 + resp = client.post("/work_pools/", json={"name": pool_name, "type": "process"}) 1046 + if resp.status_code not in (200, 201): return fail(f"create work_pool {resp.status_code}") 1047 + log(f" pool: {pool_name}") 1048 + 1049 + # create deployment with interval schedule (every hour) 1050 + resp = client.post("/deployments/", json={ 1051 + "name": f"idem-deploy-{uuid.uuid4().hex[:8]}", 1052 + "flow_id": flow_id, 1053 + "work_pool_name": pool_name, 1054 + "schedules": [{"schedule": {"interval": 3600}, "active": True}], # every hour 1055 + }) 1056 + if resp.status_code not in (200, 201): return fail(f"create deployment {resp.status_code}") 1057 + deployment = resp.json() 1058 + deployment_id = deployment.get("id") 1059 + log(f" deployment: {deployment_id}") 1060 + 1061 + # wait for scheduler to run once (default 5s interval) 1062 + log("[bold]waiting for scheduler (7s)...[/bold]") 1063 + time_mod.sleep(7) 1064 + 1065 + # count runs after first scheduler tick 1066 + resp = client.post("/flow_runs/filter", json={ 1067 + "flow_runs": {"deployment_id": {"any_": [deployment_id]}}, 1068 + "limit": 100, 1069 + }) 1070 + if resp.status_code != 200: return fail(f"filter flow_runs {resp.status_code}") 1071 + runs_after_first = resp.json() 1072 + count_after_first = len(runs_after_first) 1073 + log(f" runs after first tick: {count_after_first}") 1074 + 1075 + if count_after_first == 0: 1076 + return fail("scheduler did not create any runs") 1077 + 1078 + # wait for scheduler to run again 1079 + log("[bold]waiting for second scheduler tick (7s)...[/bold]") 1080 + time_mod.sleep(7) 1081 + 1082 + # count runs after second scheduler tick 1083 + resp = client.post("/flow_runs/filter", json={ 1084 + "flow_runs": {"deployment_id": {"any_": [deployment_id]}}, 1085 + "limit": 100, 1086 + }) 1087 + if resp.status_code != 200: return fail(f"filter flow_runs {resp.status_code}") 1088 + runs_after_second = resp.json() 1089 + count_after_second = len(runs_after_second) 1090 + log(f" runs after second tick: {count_after_second}") 1091 + 1092 + # key test: same number of runs means idempotency works 1093 + # (scheduler shouldn't create duplicates for same scheduled times) 1094 + if count_after_second != count_after_first: 1095 + return fail(f"idempotency failed: {count_after_first} -> {count_after_second} runs") 1096 + log(f" [green]idempotency verified: count unchanged[/green]") 1097 + 1098 + # cleanup 1099 + client.delete(f"/deployments/{deployment_id}") 1100 + client.delete(f"/work_pools/{pool_name}") 1101 + log(" cleanup: ok") 1102 + 1103 + return True 1104 + 1105 + 1106 + def test_parameter_merging(client: CountingClient) -> bool: 1107 + """Test that schedule parameters override deployment parameters.""" 1108 + import time as time_mod 1109 + 1110 + def fail(msg: str) -> bool: 1111 + if not QUIET: console.print(f"[red]FAIL[/red]: {msg}") 1112 + return False 1113 + 1114 + def log(msg: str) -> None: 1115 + if not QUIET: console.print(msg) 1116 + 1117 + # setup 1118 + log("[bold]setup[/bold]") 1119 + resp = client.post("/flows/", json={"name": f"params-flow-{uuid.uuid4().hex[:8]}"}) 1120 + if resp.status_code not in (200, 201): return fail(f"create flow {resp.status_code}") 1121 + flow_id = resp.json().get("id") 1122 + 1123 + pool_name = f"params-pool-{uuid.uuid4().hex[:8]}" 1124 + resp = client.post("/work_pools/", json={"name": pool_name, "type": "process"}) 1125 + if resp.status_code not in (200, 201): return fail(f"create work_pool {resp.status_code}") 1126 + log(f" pool: {pool_name}") 1127 + 1128 + # create deployment with base parameters 1129 + # schedule has override parameter 1130 + resp = client.post("/deployments/", json={ 1131 + "name": f"params-deploy-{uuid.uuid4().hex[:8]}", 1132 + "flow_id": flow_id, 1133 + "work_pool_name": pool_name, 1134 + "parameters": {"base_key": "base_value", "override_key": "deployment_value"}, 1135 + "schedules": [{ 1136 + "schedule": {"interval": 3600}, 1137 + "active": True, 1138 + "parameters": {"override_key": "schedule_value", "schedule_key": "schedule_only"}, 1139 + }], 1140 + }) 1141 + if resp.status_code not in (200, 201): return fail(f"create deployment {resp.status_code}") 1142 + deployment = resp.json() 1143 + deployment_id = deployment.get("id") 1144 + log(f" deployment: {deployment_id}") 1145 + log(f" deployment params: {deployment.get('parameters')}") 1146 + 1147 + # wait for scheduler to create runs 1148 + log("[bold]waiting for scheduler (7s)...[/bold]") 1149 + time_mod.sleep(7) 1150 + 1151 + # get the scheduled runs and check their parameters 1152 + resp = client.post("/flow_runs/filter", json={ 1153 + "flow_runs": {"deployment_id": {"any_": [deployment_id]}}, 1154 + "limit": 10, 1155 + }) 1156 + if resp.status_code != 200: return fail(f"filter flow_runs {resp.status_code}") 1157 + runs = resp.json() 1158 + log(f" found {len(runs)} runs") 1159 + 1160 + if len(runs) == 0: 1161 + return fail("scheduler did not create any runs") 1162 + 1163 + # check merged parameters on first run 1164 + run_params = runs[0].get("parameters", {}) 1165 + if isinstance(run_params, str): 1166 + import json as json_mod 1167 + run_params = json_mod.loads(run_params) 1168 + log(f" run params: {run_params}") 1169 + 1170 + # verify merging: 1171 + # - base_key should be from deployment 1172 + # - override_key should be from schedule (override) 1173 + # - schedule_key should be from schedule (new key) 1174 + if run_params.get("base_key") != "base_value": 1175 + return fail(f"base_key not preserved: {run_params.get('base_key')}") 1176 + if run_params.get("override_key") != "schedule_value": 1177 + return fail(f"override_key not overridden: {run_params.get('override_key')}") 1178 + if run_params.get("schedule_key") != "schedule_only": 1179 + return fail(f"schedule_key not added: {run_params.get('schedule_key')}") 1180 + 1181 + log(" [green]parameter merging verified[/green]") 1182 + 1183 + # cleanup 1184 + client.delete(f"/deployments/{deployment_id}") 1185 + client.delete(f"/work_pools/{pool_name}") 1186 + log(" cleanup: ok") 1187 + 1188 + return True 1189 + 1190 + 1191 def test_get_scheduled_flow_runs(client: CountingClient) -> bool: 1192 """Test get_scheduled_flow_runs endpoint (worker polling).""" 1193 from datetime import datetime, timezone ··· 1284 results.append(run_test("admin", test_admin)) 1285 results.append(run_test("flow_run (success)", lambda c: test_flow_run(c, should_fail=False))) 1286 results.append(run_test("flow_run (failure)", lambda c: test_flow_run(c, should_fail=True))) 1287 + results.append(run_test("orchestration_rules", test_orchestration_rules)) 1288 results.append(run_test("task_run", test_task_run)) 1289 results.append(run_test("filters", test_filters)) 1290 results.append(run_test("logs", test_logs)) ··· 1293 results.append(run_test("work_pools", test_work_pools)) 1294 results.append(run_test("deployments", test_deployments)) 1295 results.append(run_test("get_scheduled_flow_runs", test_get_scheduled_flow_runs)) 1296 + results.append(run_test("scheduler_idempotency", test_scheduler_idempotency)) 1297 + results.append(run_test("parameter_merging", test_parameter_merging)) 1298 1299 total_duration = sum(r.duration_ms for r in results) 1300 total_requests = sum(r.requests for r in results) ··· 1322 # human-readable output 1323 console.print("\n" + "=" * 60) 1324 1325 + table = Table(title="test results") 1326 table.add_column("section", style="cyan") 1327 table.add_column("time", justify="right") 1328 table.add_column("reqs", justify="right")

+284

scripts/test-matrix

···

··· 1 + #!/usr/bin/env bash 2 + # test-matrix - run integration tests across all backend combinations 3 + # 4 + # combinations tested: 5 + # 1. sqlite + memory broker 6 + # 2. sqlite + redis broker 7 + # 3. postgres + memory broker 8 + # 4. postgres + redis broker 9 + # 10 + # usage: 11 + # ./scripts/test-matrix # run full matrix (default) 12 + # ./scripts/test-matrix --quick # sqlite + memory only (fast local dev) 13 + # ./scripts/test-matrix --no-cleanup # keep containers running after 14 + 15 + set -euo pipefail 16 + 17 + SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" 18 + PROJECT_DIR="$(dirname "$SCRIPT_DIR")" 19 + 20 + RED='\033[0;31m' 21 + GREEN='\033[0;32m' 22 + YELLOW='\033[1;33m' 23 + BLUE='\033[0;34m' 24 + CYAN='\033[0;36m' 25 + NC='\033[0m' 26 + 27 + info() { echo -e "${GREEN}[INFO]${NC} $*"; } 28 + step() { echo -e "${BLUE}[STEP]${NC} $*"; } 29 + warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } 30 + error() { echo -e "${RED}[ERROR]${NC} $*"; } 31 + matrix() { echo -e "${CYAN}[MATRIX]${NC} $*"; } 32 + 33 + SERVER_PID="" 34 + TEST_PORT=4202 35 + POSTGRES_PORT=5434 36 + REDIS_PORT=6380 37 + CLEANUP=true 38 + QUICK_MODE=false 39 + 40 + # parse args 41 + for arg in "$@"; do 42 + case $arg in 43 + --quick) QUICK_MODE=true ;; 44 + --no-cleanup) CLEANUP=false ;; 45 + esac 46 + done 47 + 48 + cleanup() { 49 + if [[ -n "$SERVER_PID" ]] && kill -0 "$SERVER_PID" 2>/dev/null; then 50 + kill "$SERVER_PID" 2>/dev/null || true 51 + wait "$SERVER_PID" 2>/dev/null || true 52 + fi 53 + lsof -ti:$TEST_PORT 2>/dev/null | xargs -r kill 2>/dev/null || true 54 + rm -f /tmp/prefect-matrix-*.db 2>/dev/null || true 55 + } 56 + trap cleanup EXIT 57 + 58 + wait_for_server() { 59 + local timeout=${1:-30} 60 + local count=0 61 + while [[ $count -lt $timeout ]]; do 62 + if curl -s "http://localhost:$TEST_PORT/api/health" >/dev/null 2>&1; then 63 + return 0 64 + fi 65 + sleep 0.5 66 + ((count++)) 67 + done 68 + return 1 69 + } 70 + 71 + wait_for_postgres() { 72 + local timeout=${1:-30} 73 + local count=0 74 + while [[ $count -lt $timeout ]]; do 75 + if docker exec prefect-test-matrix-postgres pg_isready -U prefect >/dev/null 2>&1; then 76 + return 0 77 + fi 78 + sleep 1 79 + ((count++)) 80 + done 81 + return 1 82 + } 83 + 84 + start_services() { 85 + step "Starting docker services (postgres + redis)..." 86 + 87 + # stop any existing test containers 88 + docker rm -f prefect-test-matrix-postgres prefect-test-matrix-redis 2>/dev/null || true 89 + 90 + # start postgres 91 + docker run -d \ 92 + --name prefect-test-matrix-postgres \ 93 + -e POSTGRES_USER=prefect \ 94 + -e POSTGRES_PASSWORD=prefect \ 95 + -e POSTGRES_DB=prefect_test \ 96 + -p "${POSTGRES_PORT}:5432" \ 97 + postgres:16-alpine >/dev/null 2>&1 98 + 99 + # start redis 100 + docker run -d \ 101 + --name prefect-test-matrix-redis \ 102 + -p "${REDIS_PORT}:6379" \ 103 + redis:7-alpine >/dev/null 2>&1 104 + 105 + # wait for postgres 106 + if ! wait_for_postgres 30; then 107 + error "PostgreSQL failed to start" 108 + return 1 109 + fi 110 + 111 + # wait for redis 112 + local count=0 113 + while [[ $count -lt 30 ]]; do 114 + if redis-cli -p "$REDIS_PORT" ping >/dev/null 2>&1; then 115 + break 116 + fi 117 + sleep 0.5 118 + ((count++)) 119 + done 120 + 121 + info "Services ready" 122 + } 123 + 124 + stop_services() { 125 + if [[ "$CLEANUP" == "true" ]]; then 126 + step "Stopping docker services..." 127 + docker rm -f prefect-test-matrix-postgres prefect-test-matrix-redis 2>/dev/null || true 128 + else 129 + info "Leaving containers running (--no-cleanup)" 130 + fi 131 + } 132 + 133 + # run a single test configuration 134 + run_test() { 135 + local db_backend="$1" 136 + local broker_backend="$2" 137 + local label="${db_backend}+${broker_backend}" 138 + 139 + matrix "Testing: $label" 140 + 141 + # prepare environment 142 + local db_path="/tmp/prefect-matrix-${db_backend}-${broker_backend}-$$.db" 143 + rm -f "$db_path" 144 + 145 + export PREFECT_SERVER_PORT=$TEST_PORT 146 + export PREFECT_SERVER_LOGGING_LEVEL=WARNING 147 + export PREFECT_BROKER_BACKEND="$broker_backend" 148 + 149 + if [[ "$db_backend" == "sqlite" ]]; then 150 + export PREFECT_DATABASE_BACKEND=sqlite 151 + export PREFECT_DATABASE_PATH="$db_path" 152 + unset PREFECT_DATABASE_URL 2>/dev/null || true 153 + else 154 + export PREFECT_DATABASE_BACKEND=postgres 155 + export PREFECT_DATABASE_URL="postgresql://prefect:prefect@localhost:${POSTGRES_PORT}/prefect_test" 156 + # truncate all tables between tests (safer than DROP SCHEMA which breaks connections) 157 + docker exec prefect-test-matrix-postgres psql -U prefect -d prefect_test -c " 158 + DO \$\$ DECLARE r RECORD; 159 + BEGIN 160 + FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP 161 + EXECUTE 'TRUNCATE TABLE ' || quote_ident(r.tablename) || ' CASCADE'; 162 + END LOOP; 163 + END \$\$; 164 + " >/dev/null 2>&1 || true 165 + fi 166 + 167 + if [[ "$broker_backend" == "redis" ]]; then 168 + export PREFECT_REDIS_MESSAGING_HOST=localhost 169 + export PREFECT_REDIS_MESSAGING_PORT=$REDIS_PORT 170 + # clear redis between tests 171 + redis-cli -p "$REDIS_PORT" FLUSHALL >/dev/null 2>&1 || true 172 + fi 173 + 174 + # start server 175 + "$PROJECT_DIR/zig-out/bin/prefect-server" & 176 + SERVER_PID=$! 177 + 178 + if ! wait_for_server 30; then 179 + error "[$label] Server failed to start" 180 + return 1 181 + fi 182 + 183 + # run test suite 184 + step "[$label] Running API tests..." 185 + if PREFECT_API_URL="http://localhost:$TEST_PORT/api" "$PROJECT_DIR/scripts/test-api-sequence" >/dev/null 2>&1; then 186 + info "[$label] PASSED" 187 + local result=0 188 + else 189 + error "[$label] FAILED" 190 + local result=1 191 + fi 192 + 193 + # stop server 194 + kill "$SERVER_PID" 2>/dev/null || true 195 + wait "$SERVER_PID" 2>/dev/null || true 196 + SERVER_PID="" 197 + 198 + rm -f "$db_path" 199 + return $result 200 + } 201 + 202 + main() { 203 + echo "" 204 + matrix "============================================" 205 + matrix " prefect-server test matrix" 206 + matrix "============================================" 207 + echo "" 208 + 209 + # build first 210 + step "Building..." 211 + (cd "$PROJECT_DIR" && zig build) || { error "Build failed"; exit 1; } 212 + 213 + # run unit tests 214 + step "Running unit tests..." 215 + (cd "$PROJECT_DIR" && zig build test --summary all 2>&1) || { error "Unit tests failed"; exit 1; } 216 + echo "" 217 + 218 + if [[ "$QUICK_MODE" == "true" ]]; then 219 + matrix "Quick mode: sqlite + memory only" 220 + echo "" 221 + 222 + run_test sqlite memory || exit 1 223 + 224 + echo "" 225 + matrix "============================================" 226 + matrix " QUICK TEST PASSED (1/4 combinations)" 227 + matrix "============================================" 228 + echo "" 229 + warn "Run without --quick to test full matrix" 230 + exit 0 231 + fi 232 + 233 + # start services for full matrix 234 + start_services || exit 1 235 + echo "" 236 + 237 + # track results 238 + local passed=0 239 + local failed=0 240 + declare -a results=() 241 + 242 + # test all 4 combinations 243 + for db in sqlite postgres; do 244 + for broker in memory redis; do 245 + step "Starting test: ${db}+${broker}" 246 + if run_test "$db" "$broker"; then 247 + results+=("${GREEN}PASS${NC} ${db}+${broker}") 248 + ((passed++)) || true 249 + else 250 + results+=("${RED}FAIL${NC} ${db}+${broker}") 251 + ((failed++)) || true 252 + fi 253 + step "Completed test: ${db}+${broker}" 254 + echo "" 255 + done 256 + done 257 + 258 + # cleanup 259 + stop_services 260 + 261 + # summary 262 + echo "" 263 + matrix "============================================" 264 + matrix " TEST MATRIX RESULTS" 265 + matrix "============================================" 266 + for r in "${results[@]}"; do 267 + echo -e " $r" 268 + done 269 + echo "" 270 + matrix "Passed: $passed / 4" 271 + 272 + if [[ $failed -gt 0 ]]; then 273 + error "Matrix test FAILED" 274 + exit 1 275 + fi 276 + 277 + echo "" 278 + matrix "============================================" 279 + matrix " ALL 4 COMBINATIONS PASSED" 280 + matrix "============================================" 281 + echo "" 282 + } 283 + 284 + main

+327

scripts/test-worker

···

··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["prefect>=3.0", "httpx"] 5 + # /// 6 + """ 7 + Schedule and runner integration tests for prefect-server. 8 + 9 + Tests: 10 + 1. Cron scheduler creates flow runs on schedule (server-side) 11 + 2. .serve() Runner polls get_scheduled_flow_runs and executes locally 12 + 3. .serve() creates deployment with schedule attached 13 + 14 + NOTE: .serve() is NOT a worker. It's a Runner that: 15 + - Creates a deployment 16 + - Polls POST /deployments/get_scheduled_flow_runs every N seconds 17 + - Executes runs locally in the same process 18 + 19 + Workers are separate standalone daemons that poll work pools. 20 + 21 + Requires running server at PREFECT_API_URL. 22 + """ 23 + 24 + import os 25 + import signal 26 + import subprocess 27 + import sys 28 + import time 29 + import uuid 30 + 31 + import httpx 32 + 33 + API_URL = os.environ.get("PREFECT_API_URL", "http://localhost:4200/api") 34 + 35 + 36 + def api(method: str, path: str, **kwargs) -> httpx.Response: 37 + """Make API request.""" 38 + url = f"{API_URL}{path}" 39 + return httpx.request(method, url, timeout=30, **kwargs) 40 + 41 + 42 + def wait_for_condition(check_fn, timeout: int = 60, interval: float = 1.0, desc: str = "condition"): 43 + """Wait for condition to be true.""" 44 + start = time.time() 45 + while time.time() - start < timeout: 46 + result = check_fn() 47 + if result: 48 + return result 49 + time.sleep(interval) 50 + raise TimeoutError(f"timeout waiting for {desc}") 51 + 52 + 53 + def test_cron_scheduler(): 54 + """Test that cron schedules trigger flow runs.""" 55 + print("\n=== test_cron_scheduler ===") 56 + 57 + suffix = uuid.uuid4().hex[:8] 58 + 59 + # create flow 60 + resp = api("POST", "/flows/", json={"name": f"cron-test-flow-{suffix}"}) 61 + assert resp.status_code in (200, 201), f"create flow failed: {resp.status_code}" 62 + flow_id = resp.json()["id"] 63 + print(f" flow: {flow_id}") 64 + 65 + # create work pool 66 + pool_name = f"cron-test-pool-{suffix}" 67 + resp = api("POST", "/work_pools/", json={"name": pool_name, "type": "process"}) 68 + assert resp.status_code in (200, 201), f"create pool failed: {resp.status_code}" 69 + print(f" pool: {pool_name}") 70 + 71 + # create deployment with cron schedule (every minute) 72 + # use */1 * * * * to trigger within 60 seconds 73 + resp = api("POST", "/deployments/", json={ 74 + "name": f"cron-test-deploy-{suffix}", 75 + "flow_id": flow_id, 76 + "work_pool_name": pool_name, 77 + "schedules": [{"schedule": {"cron": "*/1 * * * *"}, "active": True}], 78 + }) 79 + assert resp.status_code in (200, 201), f"create deployment failed: {resp.status_code}" 80 + deployment_id = resp.json()["id"] 81 + print(f" deployment: {deployment_id}") 82 + print(f" schedule: */1 * * * * (every minute)") 83 + 84 + # wait for scheduler to create a run (up to 70 seconds for cron) 85 + print(" waiting for scheduler to create run (up to 70s)...") 86 + 87 + def check_run_created(): 88 + resp = api("POST", "/flow_runs/filter", json={ 89 + "flow_runs": {"deployment_id": {"any_": [deployment_id]}}, 90 + "limit": 10, 91 + }) 92 + if resp.status_code == 200: 93 + runs = resp.json() 94 + if runs: 95 + return runs[0] 96 + return None 97 + 98 + run = wait_for_condition(check_run_created, timeout=70, interval=2, desc="scheduled run") 99 + print(f" ✓ scheduler created run: {run['id']}") 100 + print(f" state: {run.get('state', {}).get('type', 'unknown')}") 101 + 102 + # cleanup 103 + api("DELETE", f"/deployments/{deployment_id}") 104 + api("DELETE", f"/work_pools/{pool_name}") 105 + api("DELETE", f"/flows/{flow_id}") 106 + 107 + print(" ✓ cron scheduler test passed") 108 + return True 109 + 110 + 111 + def test_serve_runner(): 112 + """Test that .serve() Runner polls get_scheduled_flow_runs and executes locally. 113 + 114 + .serve() is NOT a worker - it's a Runner that: 115 + 1. Creates a deployment with schedule 116 + 2. Polls POST /deployments/get_scheduled_flow_runs every N seconds 117 + 3. Executes matching runs locally in the same process 118 + 119 + This is distinct from Workers which are standalone daemons that poll work pools. 120 + """ 121 + print("\n=== test_serve_runner ===") 122 + 123 + # .serve() creates a Runner that polls for scheduled runs. The server-side 124 + # scheduler creates runs, and .serve() picks them up via get_scheduled_flow_runs. 125 + 126 + import threading 127 + from prefect import flow 128 + from prefect.client.orchestration import get_client 129 + 130 + suffix = uuid.uuid4().hex[:8] 131 + deployment_name = f"worker-exec-{suffix}" 132 + execution_marker = f"/tmp/worker_exec_marker_{suffix}" 133 + serve_error = None 134 + serve_started = threading.Event() 135 + 136 + @flow 137 + def worker_exec_flow(marker_file: str = execution_marker): 138 + """Flow that creates a marker file to prove it ran.""" 139 + import pathlib 140 + pathlib.Path(marker_file).write_text("executed") 141 + return "done" 142 + 143 + def run_serve(): 144 + nonlocal serve_error 145 + try: 146 + serve_started.set() 147 + # use interval=1 for faster testing 148 + worker_exec_flow.serve( 149 + name=deployment_name, 150 + interval=1, # every 1 second - .serve() handles this locally 151 + ) 152 + except Exception as e: 153 + serve_error = e 154 + 155 + # start serve in background 156 + serve_thread = threading.Thread(target=run_serve, daemon=True) 157 + serve_thread.start() 158 + 159 + # wait for serve to start 160 + serve_started.wait(timeout=10) 161 + time.sleep(2) # give it time to register deployment 162 + 163 + try: 164 + with get_client(sync_client=True) as client: 165 + # verify deployment exists 166 + try: 167 + deployment = client.read_deployment_by_name(f"worker-exec-flow/{deployment_name}") 168 + print(f" deployment: {deployment.id}") 169 + except Exception as e: 170 + if serve_error: 171 + raise RuntimeError(f"serve failed: {serve_error}") 172 + raise RuntimeError(f"deployment not found: {e}") 173 + 174 + # .serve() with interval will execute the flow locally on schedule 175 + # wait for it to execute (interval=1s, so should be quick) 176 + print(" waiting for .serve() to execute flow (up to 15s)...") 177 + 178 + def check_marker_exists(): 179 + import pathlib 180 + return pathlib.Path(execution_marker).exists() 181 + 182 + try: 183 + wait_for_condition(check_marker_exists, timeout=15, interval=0.5, desc="flow execution") 184 + print(" ✓ flow executed (marker file created)") 185 + 186 + # verify run was recorded in API 187 + runs = client.read_flow_runs() 188 + deployment_runs = [r for r in runs if r.deployment_id == deployment.id] 189 + if deployment_runs: 190 + latest = max(deployment_runs, key=lambda r: r.created) 191 + print(f" run state: {latest.state.type if latest.state else 'unknown'}") 192 + 193 + print(" ✓ serve runner test passed") 194 + return True 195 + 196 + except TimeoutError: 197 + # .serve() might not execute if there's an issue 198 + if serve_error: 199 + print(f" serve error: {serve_error}") 200 + print(" ✗ flow was not executed by .serve()") 201 + return False 202 + 203 + finally: 204 + # cleanup marker 205 + import pathlib 206 + pathlib.Path(execution_marker).unlink(missing_ok=True) 207 + 208 + 209 + def test_serve_with_schedule(): 210 + """Test .serve() creates deployment with schedule.""" 211 + print("\n=== test_serve_with_schedule ===") 212 + 213 + # this test uses the prefect client .serve() method 214 + # which is a blocking call, so we run it in a thread 215 + 216 + import threading 217 + from prefect import flow 218 + from prefect.client.orchestration import get_client 219 + 220 + suffix = uuid.uuid4().hex[:8] 221 + deployment_name = f"serve-schedule-{suffix}" 222 + serve_error = None 223 + 224 + @flow 225 + def serve_test_flow(): 226 + return "ok" 227 + 228 + def run_serve(): 229 + nonlocal serve_error 230 + try: 231 + # serve with cron schedule 232 + serve_test_flow.serve( 233 + name=deployment_name, 234 + cron="*/5 * * * *", # every 5 minutes 235 + ) 236 + except Exception as e: 237 + serve_error = e 238 + 239 + # start serve in background 240 + serve_thread = threading.Thread(target=run_serve, daemon=True) 241 + serve_thread.start() 242 + 243 + # wait for deployment to be created 244 + print(" waiting for deployment (5s)...") 245 + time.sleep(5) 246 + 247 + # check deployment exists with schedule 248 + with get_client(sync_client=True) as client: 249 + try: 250 + deployment = client.read_deployment_by_name(f"serve-test-flow/{deployment_name}") 251 + print(f" ✓ deployment created: {deployment.id}") 252 + 253 + # check schedules 254 + schedules = deployment.schedules 255 + if schedules: 256 + print(f" ✓ schedule attached: {len(schedules)} schedule(s)") 257 + for s in schedules: 258 + print(f" - {s.schedule}") 259 + else: 260 + print(" ✗ no schedules found on deployment") 261 + return False 262 + 263 + print(" ✓ .serve() with schedule test passed") 264 + return True 265 + 266 + except Exception as e: 267 + if serve_error: 268 + print(f" ✗ serve failed: {serve_error}") 269 + else: 270 + print(f" ✗ deployment not found: {e}") 271 + return False 272 + 273 + 274 + def main(): 275 + print(f"api url: {API_URL}") 276 + 277 + # verify server is running 278 + try: 279 + resp = api("GET", "/health") 280 + if resp.status_code != 200: 281 + print(f"server not healthy: {resp.status_code}") 282 + sys.exit(1) 283 + except Exception as e: 284 + print(f"cannot connect to server: {e}") 285 + sys.exit(1) 286 + 287 + print("server healthy") 288 + 289 + results = [] 290 + 291 + # test 1: cron scheduler 292 + try: 293 + results.append(("cron_scheduler", test_cron_scheduler())) 294 + except Exception as e: 295 + print(f" ✗ cron scheduler test failed: {e}") 296 + results.append(("cron_scheduler", False)) 297 + 298 + # test 2: serve runner (NOT a worker - Runner polls for scheduled runs) 299 + try: 300 + results.append(("serve_runner", test_serve_runner())) 301 + except Exception as e: 302 + print(f" ✗ serve runner test failed: {e}") 303 + results.append(("serve_runner", False)) 304 + 305 + # test 3: serve with schedule 306 + try: 307 + results.append(("serve_with_schedule", test_serve_with_schedule())) 308 + except Exception as e: 309 + print(f" ✗ serve with schedule test failed: {e}") 310 + results.append(("serve_with_schedule", False)) 311 + 312 + # summary 313 + print("\n=== summary ===") 314 + passed = sum(1 for _, ok in results if ok) 315 + total = len(results) 316 + 317 + for name, ok in results: 318 + status = "✓" if ok else "✗" 319 + print(f" {status} {name}") 320 + 321 + print(f"\n{passed}/{total} tests passed") 322 + 323 + sys.exit(0 if passed == total else 1) 324 + 325 + 326 + if __name__ == "__main__": 327 + main()

+21 -1

src/api/deployments.zig

··· 534 535 var scheduled_before: ?[]const u8 = null; 536 if (obj.get("scheduled_before")) |v| { 537 - if (v == .string) scheduled_before = v.string; 538 } 539 540 var limit: usize = 100; ··· 564 jw.endArray() catch {}; 565 566 json_util.send(r, output.toOwnedSlice() catch "[]"); 567 } 568 569 // JSON helpers ··· 755 try jw.write(run.state_name); 756 try jw.objectField("expected_start_time"); 757 try jw.write(run.expected_start_time); 758 try jw.objectField("start_time"); 759 try jw.write(run.start_time); 760 try jw.objectField("end_time");

··· 534 535 var scheduled_before: ?[]const u8 = null; 536 if (obj.get("scheduled_before")) |v| { 537 + if (v == .string) scheduled_before = normalizeTimestamp(alloc, v.string); 538 } 539 540 var limit: usize = 100; ··· 564 jw.endArray() catch {}; 565 566 json_util.send(r, output.toOwnedSlice() catch "[]"); 567 + } 568 + 569 + // Timestamp normalization - convert various timestamp formats to ISO8601 570 + // Client may send "2026-01-22 16:40:23.915842+00:00" but db stores "2026-01-22T16:40:23.915842Z" 571 + fn normalizeTimestamp(alloc: std.mem.Allocator, raw: []const u8) ?[]const u8 { 572 + // find space between date and time 573 + const space_idx = mem.indexOf(u8, raw, " ") orelse return raw; 574 + 575 + var normalized = alloc.alloc(u8, raw.len) catch return raw; 576 + @memcpy(normalized, raw); 577 + normalized[space_idx] = 'T'; 578 + 579 + // convert +00:00 to Z 580 + if (mem.endsWith(u8, normalized, "+00:00")) { 581 + normalized[normalized.len - 6] = 'Z'; 582 + return normalized[0 .. normalized.len - 5]; 583 + } 584 + return normalized; 585 } 586 587 // JSON helpers ··· 773 try jw.write(run.state_name); 774 try jw.objectField("expected_start_time"); 775 try jw.write(run.expected_start_time); 776 + try jw.objectField("next_scheduled_start_time"); 777 + try jw.write(run.next_scheduled_start_time); 778 try jw.objectField("start_time"); 779 try jw.write(run.start_time); 780 try jw.objectField("end_time");

+1 -1

src/api/events.zig

··· 4 const zap = @import("zap"); 5 const log = @import("../logging.zig"); 6 const messaging = @import("../utilities/messaging.zig"); // legacy fallback 7 - const broker = @import("../broker/mod.zig"); 8 const event_broadcaster = @import("../services/event_broadcaster.zig"); 9 10 /// Topic for Prefect events (matches Python: "events")

··· 4 const zap = @import("zap"); 5 const log = @import("../logging.zig"); 6 const messaging = @import("../utilities/messaging.zig"); // legacy fallback 7 + const broker = @import("../broker.zig"); 8 const event_broadcaster = @import("../services/event_broadcaster.zig"); 9 10 /// Topic for Prefect events (matches Python: "events")

+142 -12

src/api/flow_runs.zig

··· 8 const uuid_util = @import("../utilities/uuid.zig"); 9 const time_util = @import("../utilities/time.zig"); 10 const json_util = @import("../utilities/json.zig"); 11 - const orchestration = @import("../orchestration/orchestration.zig"); 12 13 // POST /flow_runs/ - create flow run 14 // GET /flow_runs/{id} - read flow run ··· 50 } 51 } 52 53 json_util.sendStatus(r, "{\"detail\":\"not found\"}", .not_found); 54 } 55 ··· 97 if (s.object.get("name")) |n| state_name = n.string; 98 } 99 100 var new_id_buf: [36]u8 = undefined; 101 const new_id = uuid_util.generate(&new_id_buf); 102 var ts_buf: [32]u8 = undefined; 103 const now = time_util.timestamp(&ts_buf); 104 105 - db.insertFlowRun(new_id, flow_id, name, state_type, state_name, now, .{}) catch { 106 json_util.sendStatus(r, "{\"detail\":\"insert failed\"}", .internal_server_error); 107 return; 108 }; ··· 123 .tags = "[]", 124 .run_count = 0, 125 .expected_start_time = null, 126 .start_time = null, 127 .end_time = null, 128 .total_run_time = 0.0, ··· 131 .work_queue_name = null, 132 .work_queue_id = null, 133 .auto_scheduled = false, 134 }; 135 136 const resp = writeFlowRun(alloc, run, state_id) catch { ··· 157 json_util.send(r, resp); 158 } 159 160 fn setState(r: zap.Request, id: []const u8) !void { 161 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 162 defer arena.deinit(); ··· 187 // get current run state for orchestration 188 const current_run = db.getFlowRun(alloc, id) catch null; 189 190 - // apply orchestration bookkeeping transforms 191 - var ctx = orchestration.TransitionContext{ 192 - .current_state_type = if (current_run) |run| 193 - if (run.state_type.len > 0) orchestration.StateType.fromString(run.state_type) else null 194 else 195 null, 196 .current_state_timestamp = if (current_run) |run| 197 if (run.state_timestamp.len > 0) run.state_timestamp else null 198 else ··· 201 .end_time = if (current_run) |run| run.end_time else null, 202 .run_count = if (current_run) |run| run.run_count else 0, 203 .total_run_time = if (current_run) |run| run.total_run_time else 0.0, 204 - .proposed_state_type = orchestration.StateType.fromString(state_type), 205 .proposed_state_timestamp = now, 206 }; 207 - orchestration.applyBookkeeping(&ctx); 208 209 // atomic state transition with orchestration data 210 - db.setFlowRunState(id, state_id, state_type, state_name, now, ctx.new_start_time, ctx.new_end_time, ctx.new_run_count, ctx.new_total_run_time) catch { 211 json_util.sendStatus(r, "{\"detail\":\"update failed\"}", .internal_server_error); 212 return; 213 }; 214 215 - const resp = writeStateResponse(alloc, state_type, state_name, now, state_id) catch { 216 json_util.sendStatus(r, "{\"detail\":\"serialize error\"}", .internal_server_error); 217 return; 218 }; ··· 334 try jw.endObject(); 335 } 336 337 - fn writeStateResponse(alloc: std.mem.Allocator, state_type: []const u8, state_name: []const u8, timestamp: []const u8, state_id: []const u8) ![]const u8 { 338 var output: std.Io.Writer.Allocating = .init(alloc); 339 var jw: json.Stringify = .{ .writer = &output.writer }; 340 341 try jw.beginObject(); 342 343 try jw.objectField("status"); 344 - try jw.write("ACCEPT"); 345 346 try jw.objectField("details"); 347 try jw.beginObject(); 348 try jw.endObject(); 349 350 try jw.objectField("state");

··· 8 const uuid_util = @import("../utilities/uuid.zig"); 9 const time_util = @import("../utilities/time.zig"); 10 const json_util = @import("../utilities/json.zig"); 11 + const orchestration = @import("../orchestration.zig"); 12 13 // POST /flow_runs/ - create flow run 14 // GET /flow_runs/{id} - read flow run ··· 50 } 51 } 52 53 + // PATCH /flow_runs/{id} - update 54 + if (mem.eql(u8, method, "PATCH")) { 55 + const id = routing.extractIdAfter(target, "/flow_runs/") orelse 56 + routing.extractIdAfter(target, "/api/flow_runs/"); 57 + if (id) |flow_run_id| { 58 + try patch(r, flow_run_id); 59 + return; 60 + } 61 + } 62 + 63 json_util.sendStatus(r, "{\"detail\":\"not found\"}", .not_found); 64 } 65 ··· 107 if (s.object.get("name")) |n| state_name = n.string; 108 } 109 110 + // extract optional scheduling fields 111 + const next_scheduled_start_time: ?[]const u8 = if (obj.get("next_scheduled_start_time")) |v| switch (v) { 112 + .string => |s| s, 113 + else => null, 114 + } else null; 115 + 116 var new_id_buf: [36]u8 = undefined; 117 const new_id = uuid_util.generate(&new_id_buf); 118 var ts_buf: [32]u8 = undefined; 119 const now = time_util.timestamp(&ts_buf); 120 121 + db.insertFlowRun(new_id, flow_id, name, state_type, state_name, now, .{ 122 + .next_scheduled_start_time = next_scheduled_start_time, 123 + }) catch { 124 json_util.sendStatus(r, "{\"detail\":\"insert failed\"}", .internal_server_error); 125 return; 126 }; ··· 141 .tags = "[]", 142 .run_count = 0, 143 .expected_start_time = null, 144 + .next_scheduled_start_time = next_scheduled_start_time, 145 .start_time = null, 146 .end_time = null, 147 .total_run_time = 0.0, ··· 150 .work_queue_name = null, 151 .work_queue_id = null, 152 .auto_scheduled = false, 153 + .idempotency_key = null, 154 }; 155 156 const resp = writeFlowRun(alloc, run, state_id) catch { ··· 177 json_util.send(r, resp); 178 } 179 180 + fn patch(r: zap.Request, id: []const u8) !void { 181 + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 182 + defer arena.deinit(); 183 + const alloc = arena.allocator(); 184 + 185 + // verify run exists 186 + const run = db.getFlowRun(alloc, id) catch null orelse { 187 + json_util.sendStatus(r, "{\"detail\":\"flow run not found\"}", .not_found); 188 + return; 189 + }; 190 + 191 + const body = r.body orelse { 192 + json_util.sendStatus(r, "{\"detail\":\"failed to read body\"}", .bad_request); 193 + return; 194 + }; 195 + 196 + const parsed = json.parseFromSlice(json.Value, alloc, body, .{}) catch { 197 + json_util.sendStatus(r, "{\"detail\":\"invalid json\"}", .bad_request); 198 + return; 199 + }; 200 + 201 + const obj = parsed.value.object; 202 + 203 + // extract optional fields for update 204 + const infrastructure_pid = if (obj.get("infrastructure_pid")) |v| switch (v) { 205 + .string => |s| s, 206 + .integer => |i| std.fmt.allocPrint(alloc, "{d}", .{i}) catch null, 207 + else => null, 208 + } else null; 209 + 210 + // update flow run with patched fields 211 + db.flow_runs.patch(id, infrastructure_pid) catch { 212 + json_util.sendStatus(r, "{\"detail\":\"update failed\"}", .internal_server_error); 213 + return; 214 + }; 215 + 216 + // return updated run 217 + const updated_run = db.getFlowRun(alloc, id) catch null orelse run; 218 + const resp = writeFlowRun(alloc, updated_run, null) catch { 219 + json_util.sendStatus(r, "{\"detail\":\"serialize error\"}", .internal_server_error); 220 + return; 221 + }; 222 + json_util.send(r, resp); 223 + } 224 + 225 fn setState(r: zap.Request, id: []const u8) !void { 226 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 227 defer arena.deinit(); ··· 252 // get current run state for orchestration 253 const current_run = db.getFlowRun(alloc, id) catch null; 254 255 + const initial_state_type: ?orchestration.StateType = if (current_run) |run| 256 + if (run.state_type.len > 0) orchestration.StateType.fromString(run.state_type) else null 257 + else 258 + null; 259 + 260 + const proposed_state_type = orchestration.StateType.fromString(state_type); 261 + 262 + // apply orchestration rules (policy) 263 + var rule_ctx = orchestration.RuleContext{ 264 + .initial_state = initial_state_type, 265 + .proposed_state = proposed_state_type, 266 + .initial_state_timestamp = if (current_run) |run| 267 + if (run.state_timestamp.len > 0) run.state_timestamp else null 268 else 269 null, 270 + .proposed_state_timestamp = now, 271 + // for CopyScheduledTime: pass scheduled_time from SCHEDULED state 272 + .initial_scheduled_time = if (current_run) |run| run.next_scheduled_start_time else null, 273 + .run_id = id, 274 + .flow_id = if (current_run) |run| run.flow_id else null, 275 + .deployment_id = if (current_run) |run| run.deployment_id else null, 276 + }; 277 + orchestration.applyPolicy(&orchestration.CoreFlowPolicy, &rule_ctx); 278 + 279 + // if rules rejected/waited/aborted, return without committing state 280 + if (!rule_ctx.isAccepted()) { 281 + const resp = writeStateResponse( 282 + alloc, 283 + rule_ctx.result.status, 284 + rule_ctx.result.details, 285 + // for REJECT, return current state; for WAIT/ABORT, return proposed 286 + if (rule_ctx.result.status == .REJECT and current_run != null) 287 + current_run.?.state_type 288 + else 289 + state_type, 290 + if (rule_ctx.result.status == .REJECT and current_run != null) 291 + current_run.?.state_name 292 + else 293 + state_name, 294 + if (rule_ctx.result.status == .REJECT and current_run != null) 295 + current_run.?.state_timestamp 296 + else 297 + now, 298 + state_id, 299 + ) catch { 300 + json_util.sendStatus(r, "{\"detail\":\"serialize error\"}", .internal_server_error); 301 + return; 302 + }; 303 + json_util.send(r, resp); 304 + return; 305 + } 306 + 307 + // apply orchestration bookkeeping transforms 308 + var bookkeeping_ctx = orchestration.TransitionContext{ 309 + .current_state_type = initial_state_type, 310 .current_state_timestamp = if (current_run) |run| 311 if (run.state_timestamp.len > 0) run.state_timestamp else null 312 else ··· 315 .end_time = if (current_run) |run| run.end_time else null, 316 .run_count = if (current_run) |run| run.run_count else 0, 317 .total_run_time = if (current_run) |run| run.total_run_time else 0.0, 318 + .proposed_state_type = proposed_state_type, 319 .proposed_state_timestamp = now, 320 }; 321 + orchestration.applyBookkeeping(&bookkeeping_ctx); 322 323 // atomic state transition with orchestration data 324 + db.setFlowRunState(id, state_id, state_type, state_name, now, bookkeeping_ctx.new_start_time, bookkeeping_ctx.new_end_time, bookkeeping_ctx.new_run_count, bookkeeping_ctx.new_total_run_time, rule_ctx.new_expected_start_time) catch { 325 json_util.sendStatus(r, "{\"detail\":\"update failed\"}", .internal_server_error); 326 return; 327 }; 328 329 + const resp = writeStateResponse(alloc, .ACCEPT, .{}, state_type, state_name, now, state_id) catch { 330 json_util.sendStatus(r, "{\"detail\":\"serialize error\"}", .internal_server_error); 331 return; 332 }; ··· 448 try jw.endObject(); 449 } 450 451 + fn writeStateResponse( 452 + alloc: std.mem.Allocator, 453 + status: orchestration.ResponseStatus, 454 + details: orchestration.ResponseDetails, 455 + state_type: []const u8, 456 + state_name: []const u8, 457 + timestamp: []const u8, 458 + state_id: []const u8, 459 + ) ![]const u8 { 460 var output: std.Io.Writer.Allocating = .init(alloc); 461 var jw: json.Stringify = .{ .writer = &output.writer }; 462 463 try jw.beginObject(); 464 465 try jw.objectField("status"); 466 + try jw.write(status.toString()); 467 468 try jw.objectField("details"); 469 try jw.beginObject(); 470 + if (details.reason) |reason| { 471 + try jw.objectField("reason"); 472 + try jw.write(reason); 473 + } 474 + if (details.retry_after) |retry_after| { 475 + try jw.objectField("retry_after_seconds"); 476 + try jw.write(retry_after); 477 + } 478 try jw.endObject(); 479 480 try jw.objectField("state");

+2 -2

src/broker/mod.zig src/broker.zig

··· 2 // Supports memory (in-process) and redis backends 3 // 4 // Usage: 5 - // const broker = @import("broker"); 6 // try broker.initBroker(allocator, .memory); 7 // defer broker.deinitBroker(); 8 // ··· 14 // const handle = try b.subscribe("events", myHandler); 15 // defer b.unsubscribe(handle); 16 17 - const core = @import("core.zig"); 18 19 // Re-export main types 20 pub const Broker = core.Broker;

··· 2 // Supports memory (in-process) and redis backends 3 // 4 // Usage: 5 + // const broker = @import("broker.zig"); 6 // try broker.initBroker(allocator, .memory); 7 // defer broker.deinitBroker(); 8 // ··· 14 // const handle = try b.subscribe("events", myHandler); 15 // defer b.unsubscribe(handle); 16 17 + const core = @import("broker/core.zig"); 18 19 // Re-export main types 20 pub const Broker = core.Broker;

+81 -711

src/broker/redis.zig

··· 1 const std = @import("std"); 2 - const net = std.net; 3 const Allocator = std.mem.Allocator; 4 const Thread = std.Thread; 5 6 const log = @import("../logging.zig"); 7 const broker = @import("core.zig"); 8 9 // ============================================================================ 10 - // Minimal RESP (Redis Serialization Protocol) Client 11 - // Only implements what we need for Redis Streams 12 - // ============================================================================ 13 - 14 - pub const RedisClient = struct { 15 - const Self = @This(); 16 - 17 - stream: net.Stream, 18 - alloc: Allocator, 19 - read_buf: []u8, 20 - buf_len: usize = 0, 21 - buf_pos: usize = 0, 22 - cmd_stack_buf: [16384]u8 = undefined, 23 - 24 - const DEFAULT_BUF_SIZE = 65536; 25 - 26 - pub fn connect(alloc: Allocator, host: []const u8, port: u16) !Self { 27 - // try numeric IP first, fall back to DNS resolution 28 - const address = net.Address.parseIp(host, port) catch blk: { 29 - const list = net.getAddressList(alloc, host, port) catch |err| { 30 - log.err("redis", "failed to resolve {s}: {}", .{ host, err }); 31 - return error.AddressResolutionFailed; 32 - }; 33 - defer list.deinit(); 34 - if (list.addrs.len == 0) { 35 - log.err("redis", "no addresses found for {s}", .{host}); 36 - return error.AddressResolutionFailed; 37 - } 38 - break :blk list.addrs[0]; 39 - }; 40 - const stream = try net.tcpConnectToAddress(address); 41 - const read_buf = try alloc.alloc(u8, DEFAULT_BUF_SIZE); 42 - return .{ 43 - .stream = stream, 44 - .alloc = alloc, 45 - .read_buf = read_buf, 46 - }; 47 - } 48 - 49 - pub fn close(self: *Self) void { 50 - self.alloc.free(self.read_buf); 51 - self.stream.close(); 52 - } 53 - 54 - /// Send a command and get raw response (handles large/fragmented responses) 55 - pub fn sendCommand(self: *Self, args: []const []const u8) ![]const u8 { 56 - // Calculate total size needed for RESP array 57 - var total_size: usize = 16; // header overhead 58 - for (args) |arg| { 59 - total_size += 16 + arg.len; // $<len>\r\n<data>\r\n 60 - } 61 - 62 - // Use stack buffer for small commands, heap for large 63 - var heap_buf: ?[]u8 = null; 64 - defer if (heap_buf) |hb| self.alloc.free(hb); 65 - 66 - const cmd_buf = if (total_size <= 16384) blk: { 67 - break :blk @as([]u8, &self.cmd_stack_buf); 68 - } else blk: { 69 - heap_buf = try self.alloc.alloc(u8, total_size); 70 - break :blk heap_buf.?; 71 - }; 72 - 73 - var pos: usize = 0; 74 - 75 - // Array header: *<count>\r\n 76 - pos += (std.fmt.bufPrint(cmd_buf[pos..], "*{d}\r\n", .{args.len}) catch return error.BufferTooSmall).len; 77 - 78 - // Each argument as bulk string: $<len>\r\n<data>\r\n 79 - for (args) |arg| { 80 - pos += (std.fmt.bufPrint(cmd_buf[pos..], "${d}\r\n", .{arg.len}) catch return error.BufferTooSmall).len; 81 - if (pos + arg.len + 2 > cmd_buf.len) return error.BufferTooSmall; 82 - @memcpy(cmd_buf[pos..][0..arg.len], arg); 83 - pos += arg.len; 84 - cmd_buf[pos] = '\r'; 85 - cmd_buf[pos + 1] = '\n'; 86 - pos += 2; 87 - } 88 - 89 - // Send command 90 - _ = try self.stream.write(cmd_buf[0..pos]); 91 - 92 - // Reset buffer state for new response 93 - self.buf_len = 0; 94 - self.buf_pos = 0; 95 - 96 - // Read response (may need multiple reads for large responses) 97 - return self.readFullResponse(); 98 - } 99 - 100 - /// Read a complete RESP response, handling fragmentation 101 - fn readFullResponse(self: *Self) ![]const u8 { 102 - // Read initial chunk 103 - const n = try self.stream.read(self.read_buf); 104 - if (n == 0) return error.ConnectionClosed; 105 - self.buf_len = n; 106 - 107 - // For simple responses, check if complete 108 - const first_char = self.read_buf[0]; 109 - switch (first_char) { 110 - '+', '-', ':' => { 111 - // Simple string, error, or integer - read until \r\n 112 - return self.ensureLineComplete(); 113 - }, 114 - '$' => { 115 - // Bulk string - need to read the length then the data 116 - return self.ensureBulkStringComplete(); 117 - }, 118 - '*' => { 119 - // Array - more complex, need to track nested elements 120 - return self.ensureArrayComplete(); 121 - }, 122 - else => return self.read_buf[0..self.buf_len], 123 - } 124 - } 125 - 126 - fn ensureLineComplete(self: *Self) ![]const u8 { 127 - while (true) { 128 - // Look for \r\n 129 - if (std.mem.indexOf(u8, self.read_buf[0..self.buf_len], "\r\n")) |_| { 130 - return self.read_buf[0..self.buf_len]; 131 - } 132 - // Need more data 133 - try self.readMore(); 134 - } 135 - } 136 - 137 - fn ensureBulkStringComplete(self: *Self) ![]const u8 { 138 - // Find the length line 139 - const crlf_pos = std.mem.indexOf(u8, self.read_buf[0..self.buf_len], "\r\n") orelse { 140 - try self.readMore(); 141 - return self.ensureBulkStringComplete(); 142 - }; 143 - 144 - // Parse length 145 - const len_str = self.read_buf[1..crlf_pos]; 146 - const len = std.fmt.parseInt(i64, len_str, 10) catch return error.InvalidResponse; 147 - 148 - if (len == -1) { 149 - // Null bulk string 150 - return self.read_buf[0..self.buf_len]; 151 - } 152 - 153 - // Need: $<len>\r\n<data>\r\n 154 - const total_needed = crlf_pos + 2 + @as(usize, @intCast(len)) + 2; 155 - while (self.buf_len < total_needed) { 156 - try self.readMore(); 157 - } 158 - return self.read_buf[0..self.buf_len]; 159 - } 160 - 161 - fn ensureArrayComplete(self: *Self) ![]const u8 { 162 - // For now, use a heuristic: keep reading until we have a reasonable amount 163 - // and the response looks complete (ends with \r\n and balanced) 164 - // This is a simplified approach - production would track nested elements 165 - 166 - var attempts: usize = 0; 167 - const max_attempts = 100; 168 - 169 - while (attempts < max_attempts) { 170 - // Check if response looks complete (basic heuristic) 171 - if (self.buf_len >= 5) { 172 - // Check for null array 173 - if (std.mem.startsWith(u8, self.read_buf[0..self.buf_len], "*-1\r\n")) { 174 - return self.read_buf[0..self.buf_len]; 175 - } 176 - // Check if ends with \r\n and has reasonable structure 177 - if (self.read_buf[self.buf_len - 2] == '\r' and self.read_buf[self.buf_len - 1] == '\n') { 178 - // Try to verify completeness by counting array elements 179 - if (self.isArrayComplete()) { 180 - return self.read_buf[0..self.buf_len]; 181 - } 182 - } 183 - } 184 - 185 - // Try to read more 186 - const old_len = self.buf_len; 187 - self.readMore() catch |err| { 188 - if (err == error.WouldBlock) { 189 - // No more data available right now 190 - if (self.isArrayComplete()) { 191 - return self.read_buf[0..self.buf_len]; 192 - } 193 - } 194 - return err; 195 - }; 196 - 197 - // If no new data and looks complete, return what we have 198 - if (self.buf_len == old_len) { 199 - return self.read_buf[0..self.buf_len]; 200 - } 201 - attempts += 1; 202 - } 203 - 204 - return self.read_buf[0..self.buf_len]; 205 - } 206 - 207 - fn isArrayComplete(self: *Self) bool { 208 - // Simple heuristic: try to parse the array structure 209 - var pos: usize = 0; 210 - return self.parseElement(&pos); 211 - } 212 - 213 - fn parseElement(self: *Self, pos: *usize) bool { 214 - if (pos.* >= self.buf_len) return false; 215 - 216 - const c = self.read_buf[pos.*]; 217 - switch (c) { 218 - '*' => { 219 - // Array 220 - pos.* += 1; 221 - const count = self.parseInt(pos) orelse return false; 222 - if (!self.skipCrlf(pos)) return false; 223 - if (count < 0) return true; // null array 224 - var i: i64 = 0; 225 - while (i < count) : (i += 1) { 226 - if (!self.parseElement(pos)) return false; 227 - } 228 - return true; 229 - }, 230 - '$' => { 231 - // Bulk string 232 - pos.* += 1; 233 - const len = self.parseInt(pos) orelse return false; 234 - if (!self.skipCrlf(pos)) return false; 235 - if (len < 0) return true; // null string 236 - const ulen: usize = @intCast(len); 237 - if (pos.* + ulen + 2 > self.buf_len) return false; 238 - pos.* += ulen + 2; 239 - return true; 240 - }, 241 - '+', '-', ':' => { 242 - // Simple string, error, integer 243 - while (pos.* < self.buf_len) { 244 - if (self.read_buf[pos.*] == '\r') { 245 - if (!self.skipCrlf(pos)) return false; 246 - return true; 247 - } 248 - pos.* += 1; 249 - } 250 - return false; 251 - }, 252 - else => return false, 253 - } 254 - } 255 - 256 - fn parseInt(self: *Self, pos: *usize) ?i64 { 257 - const start = pos.*; 258 - while (pos.* < self.buf_len and self.read_buf[pos.*] != '\r') { 259 - pos.* += 1; 260 - } 261 - if (pos.* == start) return null; 262 - return std.fmt.parseInt(i64, self.read_buf[start..pos.*], 10) catch null; 263 - } 264 - 265 - fn skipCrlf(self: *Self, pos: *usize) bool { 266 - if (pos.* + 2 > self.buf_len) return false; 267 - if (self.read_buf[pos.*] == '\r' and self.read_buf[pos.* + 1] == '\n') { 268 - pos.* += 2; 269 - return true; 270 - } 271 - return false; 272 - } 273 - 274 - fn readMore(self: *Self) !void { 275 - if (self.buf_len >= self.read_buf.len) { 276 - // Buffer full, need to grow 277 - const new_buf = try self.alloc.realloc(self.read_buf, self.read_buf.len * 2); 278 - self.read_buf = new_buf; 279 - } 280 - const n = try self.stream.read(self.read_buf[self.buf_len..]); 281 - if (n == 0) return error.ConnectionClosed; 282 - self.buf_len += n; 283 - } 284 - 285 - /// XADD stream_key * field value ... 286 - pub fn xadd(self: *Self, stream_key: []const u8, fields: []const [2][]const u8) ![]const u8 { 287 - var args: [32][]const u8 = undefined; 288 - var i: usize = 0; 289 - 290 - args[i] = "XADD"; 291 - i += 1; 292 - args[i] = stream_key; 293 - i += 1; 294 - args[i] = "*"; // auto-generate ID 295 - i += 1; 296 - 297 - for (fields) |field| { 298 - args[i] = field[0]; 299 - i += 1; 300 - args[i] = field[1]; 301 - i += 1; 302 - } 303 - 304 - const response = try self.sendCommand(args[0..i]); 305 - // Response should be bulk string with message ID 306 - if (response[0] != '$') { 307 - if (response[0] == '-') { 308 - log.err("redis", "XADD error: {s}", .{response}); 309 - return error.RedisError; 310 - } 311 - } 312 - return response; 313 - } 314 - 315 - /// XGROUP CREATE stream_key group_name $ MKSTREAM 316 - pub fn xgroupCreate(self: *Self, stream_key: []const u8, group_name: []const u8, start_id: []const u8) !void { 317 - const args = [_][]const u8{ "XGROUP", "CREATE", stream_key, group_name, start_id, "MKSTREAM" }; 318 - const response = try self.sendCommand(&args); 319 - 320 - // OK or BUSYGROUP error (group already exists) are both fine 321 - if (response[0] == '-') { 322 - if (std.mem.indexOf(u8, response, "BUSYGROUP")) |_| { 323 - // Group already exists, that's fine 324 - return; 325 - } 326 - log.err("redis", "XGROUP CREATE error: {s}", .{response}); 327 - return error.RedisError; 328 - } 329 - } 330 - 331 - /// XGROUP DESTROY stream_key group_name - delete a consumer group 332 - pub fn xgroupDestroy(self: *Self, stream_key: []const u8, group_name: []const u8) !void { 333 - const args = [_][]const u8{ "XGROUP", "DESTROY", stream_key, group_name }; 334 - const response = try self.sendCommand(&args); 335 - 336 - // Returns integer (number of destroyed entries) or error 337 - if (response[0] == '-') { 338 - // NOGROUP error means group doesn't exist, which is fine for cleanup 339 - if (std.mem.indexOf(u8, response, "NOGROUP")) |_| { 340 - return; 341 - } 342 - log.err("redis", "XGROUP DESTROY error: {s}", .{response}); 343 - return error.RedisError; 344 - } 345 - } 346 - 347 - /// XREADGROUP GROUP group consumer BLOCK ms COUNT n STREAMS stream > 348 - pub fn xreadgroup( 349 - self: *Self, 350 - group_name: []const u8, 351 - consumer_name: []const u8, 352 - stream_key: []const u8, 353 - block_ms: u32, 354 - count: u32, 355 - start_id: []const u8, // ">" for new, "0" for pending 356 - ) !?StreamEntry { 357 - var block_buf: [16]u8 = undefined; 358 - var count_buf: [16]u8 = undefined; 359 - const block_str = std.fmt.bufPrint(&block_buf, "{d}", .{block_ms}) catch unreachable; 360 - const count_str = std.fmt.bufPrint(&count_buf, "{d}", .{count}) catch unreachable; 361 - 362 - const args = [_][]const u8{ 363 - "XREADGROUP", "GROUP", group_name, consumer_name, 364 - "BLOCK", block_str, "COUNT", count_str, 365 - "STREAMS", stream_key, start_id, 366 - }; 367 - 368 - const response = try self.sendCommand(&args); 369 - 370 - // Parse response 371 - // Null bulk string (*-1\r\n) means no messages 372 - if (response.len >= 4 and std.mem.eql(u8, response[0..4], "*-1\r")) { 373 - return null; 374 - } 375 - // Null array response 376 - if (response.len >= 5 and std.mem.eql(u8, response[0..5], "$-1\r\n")) { 377 - return null; 378 - } 379 - 380 - // Parse the nested array response 381 - return self.parseStreamEntry(response); 382 - } 383 - 384 - /// XAUTOCLAIM stream group consumer min-idle-time start [COUNT count] 385 - pub fn xautoclaim( 386 - self: *Self, 387 - stream_key: []const u8, 388 - group_name: []const u8, 389 - consumer_name: []const u8, 390 - min_idle_ms: u32, 391 - start_id: []const u8, 392 - count: u32, 393 - ) !?AutoclaimResult { 394 - var idle_buf: [16]u8 = undefined; 395 - var count_buf: [16]u8 = undefined; 396 - const idle_str = std.fmt.bufPrint(&idle_buf, "{d}", .{min_idle_ms}) catch unreachable; 397 - const count_str = std.fmt.bufPrint(&count_buf, "{d}", .{count}) catch unreachable; 398 - 399 - const args = [_][]const u8{ 400 - "XAUTOCLAIM", stream_key, group_name, consumer_name, 401 - idle_str, start_id, "COUNT", count_str, 402 - }; 403 - 404 - const response = try self.sendCommand(&args); 405 - 406 - if (response[0] == '-') { 407 - log.err("redis", "XAUTOCLAIM error: {s}", .{response}); 408 - return error.RedisError; 409 - } 410 - 411 - return self.parseAutoclaimResult(response); 412 - } 413 - 414 - /// XACK stream_key group_name message_id 415 - pub fn xack(self: *Self, stream_key: []const u8, group_name: []const u8, message_id: []const u8) !void { 416 - const args = [_][]const u8{ "XACK", stream_key, group_name, message_id }; 417 - const response = try self.sendCommand(&args); 418 - 419 - if (response[0] == '-') { 420 - log.err("redis", "XACK error: {s}", .{response}); 421 - return error.RedisError; 422 - } 423 - } 424 - 425 - /// Parse stream entry from XREADGROUP response 426 - fn parseStreamEntry(self: *Self, response: []const u8) !?StreamEntry { 427 - _ = self; 428 - // RESP response format for XREADGROUP: 429 - // *1\r\n (array of 1 stream) 430 - // *2\r\n (stream name + entries) 431 - // $<n>\r\n<stream_name>\r\n 432 - // *1\r\n (array of entries) 433 - // *2\r\n (entry: id + fields) 434 - // $<n>\r\n<message_id>\r\n 435 - // *<n>\r\n (field-value pairs) 436 - // $<n>\r\n<field>\r\n 437 - // $<n>\r\n<value>\r\n 438 - // ... 439 - 440 - var pos: usize = 0; 441 - 442 - // Skip outer arrays to get to message ID and data 443 - // This is a simplified parser - assumes single stream, single message 444 - var depth: usize = 0; 445 - var stream_entry_id: ?[]const u8 = null; 446 - var event_id: ?[]const u8 = null; 447 - var data: ?[]const u8 = null; 448 - var last_field: ?[]const u8 = null; 449 - 450 - while (pos < response.len) { 451 - const c = response[pos]; 452 - switch (c) { 453 - '*' => { 454 - // Array - skip the count 455 - pos += 1; 456 - while (pos < response.len and response[pos] != '\r') pos += 1; 457 - pos += 2; // skip \r\n 458 - depth += 1; 459 - }, 460 - '$' => { 461 - // Bulk string 462 - pos += 1; 463 - const len_start = pos; 464 - while (pos < response.len and response[pos] != '\r') pos += 1; 465 - const len = std.fmt.parseInt(usize, response[len_start..pos], 10) catch return null; 466 - pos += 2; // skip \r\n 467 - 468 - if (pos + len > response.len) return null; 469 - const value = response[pos..][0..len]; 470 - pos += len + 2; // skip value + \r\n 471 - 472 - // First bulk string at depth 4 that looks like a stream entry ID 473 - if (stream_entry_id == null and depth >= 3) { 474 - if (std.mem.indexOf(u8, value, "-")) |_| { 475 - stream_entry_id = value; 476 - continue; 477 - } 478 - } 479 - 480 - // Track field names and their values 481 - if (last_field == null) { 482 - last_field = value; 483 - } else { 484 - // This is a value for last_field 485 - if (std.mem.eql(u8, last_field.?, "id")) { 486 - event_id = value; 487 - } else if (std.mem.eql(u8, last_field.?, "data")) { 488 - data = value; 489 - } 490 - last_field = null; 491 - } 492 - }, 493 - else => pos += 1, 494 - } 495 - } 496 - 497 - if (stream_entry_id != null and data != null) { 498 - return StreamEntry{ 499 - .stream_id = stream_entry_id.?, 500 - .event_id = event_id, // may be null if not in message 501 - .data = data.?, 502 - }; 503 - } 504 - 505 - return null; 506 - } 507 - 508 - fn parseAutoclaimResult(self: *Self, response: []const u8) !?AutoclaimResult { 509 - _ = self; 510 - // XAUTOCLAIM returns: [next_start_id, [[id, [field, value, ...]], ...], [deleted_ids]] 511 - // We need the next_start_id and any claimed messages 512 - 513 - var pos: usize = 0; 514 - var next_id: ?[]const u8 = null; 515 - var entry: ?StreamEntry = null; 516 - 517 - // Skip outer array marker 518 - if (response[pos] != '*') return null; 519 - pos += 1; 520 - while (pos < response.len and response[pos] != '\r') pos += 1; 521 - pos += 2; 522 - 523 - // First element: next start ID (bulk string) 524 - if (pos < response.len and response[pos] == '$') { 525 - pos += 1; 526 - const len_start = pos; 527 - while (pos < response.len and response[pos] != '\r') pos += 1; 528 - const len = std.fmt.parseInt(usize, response[len_start..pos], 10) catch return null; 529 - pos += 2; 530 - if (pos + len <= response.len) { 531 - next_id = response[pos..][0..len]; 532 - pos += len + 2; 533 - } 534 - } 535 - 536 - // Second element: claimed messages array 537 - if (pos < response.len and response[pos] == '*') { 538 - pos += 1; 539 - const count_start = pos; 540 - while (pos < response.len and response[pos] != '\r') pos += 1; 541 - const count = std.fmt.parseInt(i64, response[count_start..pos], 10) catch 0; 542 - pos += 2; 543 - 544 - if (count > 0) { 545 - // Parse first message (simplified - just get first one) 546 - var stream_id: ?[]const u8 = null; 547 - var event_id: ?[]const u8 = null; 548 - var data: ?[]const u8 = null; 549 - var last_field: ?[]const u8 = null; 550 - var depth: usize = 0; 551 - 552 - while (pos < response.len) { 553 - const c = response[pos]; 554 - switch (c) { 555 - '*' => { 556 - pos += 1; 557 - while (pos < response.len and response[pos] != '\r') pos += 1; 558 - pos += 2; 559 - depth += 1; 560 - if (depth > 3) break; // Only parse first message 561 - }, 562 - '$' => { 563 - pos += 1; 564 - const len_start = pos; 565 - while (pos < response.len and response[pos] != '\r') pos += 1; 566 - const len = std.fmt.parseInt(usize, response[len_start..pos], 10) catch break; 567 - pos += 2; 568 - if (pos + len > response.len) break; 569 - const value = response[pos..][0..len]; 570 - pos += len + 2; 571 - 572 - if (stream_id == null and std.mem.indexOf(u8, value, "-") != null) { 573 - stream_id = value; 574 - continue; 575 - } 576 - 577 - if (last_field == null) { 578 - last_field = value; 579 - } else { 580 - if (std.mem.eql(u8, last_field.?, "id")) { 581 - event_id = value; 582 - } else if (std.mem.eql(u8, last_field.?, "data")) { 583 - data = value; 584 - } 585 - last_field = null; 586 - } 587 - }, 588 - else => pos += 1, 589 - } 590 - } 591 - 592 - if (stream_id != null and data != null) { 593 - entry = StreamEntry{ 594 - .stream_id = stream_id.?, 595 - .event_id = event_id, 596 - .data = data.?, 597 - }; 598 - } 599 - } 600 - } 601 - 602 - return AutoclaimResult{ 603 - .next_id = next_id orelse "0-0", 604 - .entry = entry, 605 - }; 606 - } 607 - 608 - /// PING 609 - pub fn ping(self: *Self) !bool { 610 - const args = [_][]const u8{"PING"}; 611 - const response = try self.sendCommand(&args); 612 - return response.len >= 5 and std.mem.eql(u8, response[0..5], "+PONG"); 613 - } 614 - 615 - /// AUTH username password 616 - pub fn auth(self: *Self, username: []const u8, password: []const u8) !void { 617 - const args = [_][]const u8{ "AUTH", username, password }; 618 - const response = try self.sendCommand(&args); 619 - 620 - if (response[0] == '-') { 621 - log.err("redis", "AUTH error: {s}", .{response}); 622 - return error.AuthFailed; 623 - } 624 - } 625 - 626 - /// SELECT db 627 - pub fn selectDb(self: *Self, db_num: u8) !void { 628 - var db_buf: [4]u8 = undefined; 629 - const db_str = std.fmt.bufPrint(&db_buf, "{d}", .{db_num}) catch unreachable; 630 - const args = [_][]const u8{ "SELECT", db_str }; 631 - const response = try self.sendCommand(&args); 632 - 633 - if (response[0] == '-') { 634 - log.err("redis", "SELECT error: {s}", .{response}); 635 - return error.SelectFailed; 636 - } 637 - } 638 - }; 639 - 640 - pub const StreamEntry = struct { 641 - stream_id: []const u8, // Redis stream entry ID (e.g., "1234567890-0") 642 - event_id: ?[]const u8, // Original event ID from message data 643 - data: []const u8, 644 - }; 645 - 646 - pub const AutoclaimResult = struct { 647 - next_id: []const u8, 648 - entry: ?StreamEntry, 649 - }; 650 - 651 - // ============================================================================ 652 // Redis Broker Implementation 653 // ============================================================================ 654 ··· 682 }; 683 684 // Test connection 685 - var client = RedisClient.connect(alloc, config.host, config.port) catch |err| { 686 log.err("redis", "failed to connect to {s}:{d}: {}", .{ config.host, config.port, err }); 687 alloc.destroy(self); 688 return error.ConnectionFailed; 689 }; 690 defer client.close(); 691 692 - // Authenticate if password is set 693 - if (config.password.len > 0) { 694 - try client.auth(config.username, config.password); 695 - } 696 - 697 - // Select database if not 0 698 - if (config.db != 0) { 699 - try client.selectDb(config.db); 700 - } 701 - 702 if (!try client.ping()) { 703 log.err("redis", "ping failed", .{}); 704 alloc.destroy(self); ··· 731 self.alloc.destroy(self); 732 } 733 734 - fn connectAndAuth(self: *Self) !RedisClient { 735 - var client = try RedisClient.connect(self.alloc, self.config.host, self.config.port); 736 - errdefer client.close(); 737 - 738 - if (self.config.password.len > 0) { 739 - try client.auth(self.config.username, self.config.password); 740 - } 741 - if (self.config.db != 0) { 742 - try client.selectDb(self.config.db); 743 - } 744 - return client; 745 } 746 747 pub fn publish(self: *Self, topic: []const u8, id: []const u8, data: []const u8) !void { 748 - var client = try self.connectAndAuth(); 749 defer client.close(); 750 751 const fields = [_][2][]const u8{ ··· 753 .{ "data", data }, 754 }; 755 756 - _ = try client.xadd(topic, &fields); 757 } 758 759 /// Subscribe to a topic with a specific consumer group (for fan-out) ··· 767 768 // Create consumer group if needed (use "0" to read from beginning for new groups) 769 { 770 - var client = try self.connectAndAuth(); 771 defer client.close(); 772 - try client.xgroupCreate(topic, group, "0"); 773 } 774 775 // Create running flag ··· 823 824 // Create consumer group with "$" (only new messages) 825 { 826 - var client = try self.connectAndAuth(); 827 defer client.close(); 828 - try client.xgroupCreate(topic, group, "$"); 829 } 830 831 // Create running flag ··· 876 877 // Destroy ephemeral groups to prevent accumulation 878 if (consumer.ephemeral) { 879 - var client = self.connectAndAuth() catch return; 880 defer client.close(); 881 - client.xgroupDestroy(consumer.topic, consumer.group) catch |err| { 882 log.warn("redis", "failed to destroy ephemeral group '{s}': {}", .{ consumer.group, err }); 883 }; 884 log.debug("redis", "destroyed ephemeral group '{s}'", .{consumer.group}); ··· 905 const MIN_IDLE_MS: u32 = 5000; // 5 seconds before claiming pending messages 906 907 while (running.*) { 908 - var client = self.connectAndAuth() catch |err| { 909 log.err("redis", "consumer connect error: {}", .{err}); 910 Thread.sleep(1 * std.time.ns_per_s); 911 continue; 912 }; 913 defer client.close(); 914 915 // First, try to claim any pending messages (for crash recovery) 916 var pending_start_id: [32]u8 = undefined; 917 @memcpy(pending_start_id[0..3], "0-0"); 918 var pending_id_len: usize = 3; 919 920 while (running.*) { 921 - const autoclaim_result = client.xautoclaim( 922 topic, 923 group, 924 consumer_name, ··· 931 }; 932 933 if (autoclaim_result) |result| { 934 - if (result.entry) |e| { 935 - // Process pending message 936 - const message = broker.Message{ 937 - .id = e.event_id orelse e.stream_id, 938 - .topic = topic, 939 - .data = e.data, 940 - .timestamp = std.time.milliTimestamp(), 941 - }; 942 943 - handler(&message) catch |err| { 944 - log.err("redis", "handler error (pending): {}", .{err}); 945 - // Don't ack - will be reclaimed later 946 - continue; 947 - }; 948 949 - // Acknowledge 950 - client.xack(topic, group, e.stream_id) catch |err| { 951 - log.err("redis", "xack error: {}", .{err}); 952 - }; 953 } 954 955 // Update start ID for next iteration ··· 966 } 967 968 // Now read new messages 969 - const entry = client.xreadgroup(group, consumer_name, topic, 1000, 1, ">") catch |err| { 970 log.err("redis", "xreadgroup error: {}", .{err}); 971 Thread.sleep(100 * std.time.ns_per_ms); 972 continue; 973 }; 974 975 if (entry) |e| { 976 - // Create message with the actual event ID, not stream entry ID 977 - const message = broker.Message{ 978 - .id = e.event_id orelse e.stream_id, 979 - .topic = topic, 980 - .data = e.data, 981 - .timestamp = std.time.milliTimestamp(), 982 - }; 983 984 - handler(&message) catch |err| { 985 - log.err("redis", "handler error: {}", .{err}); 986 - // Don't ack - will be reclaimed via XAUTOCLAIM later 987 - continue; 988 - }; 989 990 - // Acknowledge message 991 - client.xack(topic, group, e.stream_id) catch |err| { 992 - log.err("redis", "xack error: {}", .{err}); 993 - }; 994 } 995 } 996 ··· 1002 // Tests 1003 // ============================================================================ 1004 1005 - test "resp command building" { 1006 - // Just verify the client can be instantiated 1007 // Actual Redis tests require a running server 1008 }

··· 1 + //! Redis broker implementation using the external redis library 2 + //! 3 + //! Uses Redis Streams for message broker functionality: 4 + //! - XADD for publish 5 + //! - XREADGROUP for consume (with consumer groups) 6 + //! - XAUTOCLAIM for pending recovery on startup 7 + //! - XACK after successful processing 8 + 9 const std = @import("std"); 10 const Allocator = std.mem.Allocator; 11 const Thread = std.Thread; 12 13 + const redis = @import("redis"); 14 const log = @import("../logging.zig"); 15 const broker = @import("core.zig"); 16 17 // ============================================================================ 18 // Redis Broker Implementation 19 // ============================================================================ 20 ··· 48 }; 49 50 // Test connection 51 + var client = connectAndAuth(alloc, config) catch |err| { 52 log.err("redis", "failed to connect to {s}:{d}: {}", .{ config.host, config.port, err }); 53 alloc.destroy(self); 54 return error.ConnectionFailed; 55 }; 56 defer client.close(); 57 58 if (!try client.ping()) { 59 log.err("redis", "ping failed", .{}); 60 alloc.destroy(self); ··· 87 self.alloc.destroy(self); 88 } 89 90 + fn connectAndAuth(alloc: Allocator, config: broker.RedisConfig) !redis.Client { 91 + return redis.Client.connectWithConfig(alloc, .{ 92 + .host = config.host, 93 + .port = config.port, 94 + .username = config.username, 95 + .password = config.password, 96 + .db = config.db, 97 + }); 98 } 99 100 pub fn publish(self: *Self, topic: []const u8, id: []const u8, data: []const u8) !void { 101 + var client = try connectAndAuth(self.alloc, self.config); 102 defer client.close(); 103 104 const fields = [_][2][]const u8{ ··· 106 .{ "data", data }, 107 }; 108 109 + var streams = client.streams(); 110 + _ = try streams.xadd(topic, .auto, &fields); 111 } 112 113 /// Subscribe to a topic with a specific consumer group (for fan-out) ··· 121 122 // Create consumer group if needed (use "0" to read from beginning for new groups) 123 { 124 + var client = try connectAndAuth(self.alloc, self.config); 125 defer client.close(); 126 + var streams = client.streams(); 127 + try streams.xgroupCreate(topic, group, "0"); 128 } 129 130 // Create running flag ··· 178 179 // Create consumer group with "$" (only new messages) 180 { 181 + var client = try connectAndAuth(self.alloc, self.config); 182 defer client.close(); 183 + var streams = client.streams(); 184 + try streams.xgroupCreate(topic, group, "$"); 185 } 186 187 // Create running flag ··· 232 233 // Destroy ephemeral groups to prevent accumulation 234 if (consumer.ephemeral) { 235 + var client = connectAndAuth(self.alloc, self.config) catch return; 236 defer client.close(); 237 + var streams = client.streams(); 238 + _ = streams.xgroupDestroy(consumer.topic, consumer.group) catch |err| { 239 log.warn("redis", "failed to destroy ephemeral group '{s}': {}", .{ consumer.group, err }); 240 }; 241 log.debug("redis", "destroyed ephemeral group '{s}'", .{consumer.group}); ··· 262 const MIN_IDLE_MS: u32 = 5000; // 5 seconds before claiming pending messages 263 264 while (running.*) { 265 + var client = connectAndAuth(self.alloc, self.config) catch |err| { 266 log.err("redis", "consumer connect error: {}", .{err}); 267 Thread.sleep(1 * std.time.ns_per_s); 268 continue; 269 }; 270 defer client.close(); 271 272 + var streams = client.streams(); 273 + 274 // First, try to claim any pending messages (for crash recovery) 275 var pending_start_id: [32]u8 = undefined; 276 @memcpy(pending_start_id[0..3], "0-0"); 277 var pending_id_len: usize = 3; 278 279 while (running.*) { 280 + const autoclaim_result = streams.xautoclaim( 281 topic, 282 group, 283 consumer_name, ··· 290 }; 291 292 if (autoclaim_result) |result| { 293 + if (result.entries.len > 0) { 294 + const e = result.entries[0]; 295 + // Extract id and data from fields 296 + const event_id = e.get("id"); 297 + const data = e.get("data"); 298 299 + if (data) |d| { 300 + const message = broker.Message{ 301 + .id = event_id orelse e.id, 302 + .topic = topic, 303 + .data = d, 304 + .timestamp = std.time.milliTimestamp(), 305 + }; 306 + 307 + handler(&message) catch |err| { 308 + log.err("redis", "handler error (pending): {}", .{err}); 309 + // Don't ack - will be reclaimed later 310 + continue; 311 + }; 312 313 + // Acknowledge 314 + _ = streams.xack(topic, group, &.{e.id}) catch |err| { 315 + log.err("redis", "xack error: {}", .{err}); 316 + }; 317 + } 318 } 319 320 // Update start ID for next iteration ··· 331 } 332 333 // Now read new messages 334 + const entry = streams.xreadgroup(group, consumer_name, topic, 1000, 1, ">") catch |err| { 335 log.err("redis", "xreadgroup error: {}", .{err}); 336 Thread.sleep(100 * std.time.ns_per_ms); 337 continue; 338 }; 339 340 if (entry) |e| { 341 + // Extract id and data from fields 342 + const event_id = e.get("id"); 343 + const data = e.get("data"); 344 + 345 + if (data) |d| { 346 + const message = broker.Message{ 347 + .id = event_id orelse e.id, 348 + .topic = topic, 349 + .data = d, 350 + .timestamp = std.time.milliTimestamp(), 351 + }; 352 353 + handler(&message) catch |err| { 354 + log.err("redis", "handler error: {}", .{err}); 355 + // Don't ack - will be reclaimed via XAUTOCLAIM later 356 + continue; 357 + }; 358 359 + // Acknowledge message 360 + _ = streams.xack(topic, group, &.{e.id}) catch |err| { 361 + log.err("redis", "xack error: {}", .{err}); 362 + }; 363 + } 364 } 365 } 366 ··· 372 // Tests 373 // ============================================================================ 374 375 + test "redis broker compiles" { 376 + // Just verify the module compiles 377 // Actual Redis tests require a running server 378 }

+26 -52

src/db/block_documents.zig

··· 43 } 44 45 pub fn getById(alloc: Allocator, id: []const u8) !?BlockDocumentRow { 46 - var r = backend.db.row( 47 \\SELECT id, created, updated, name, data, is_anonymous, 48 \\ block_type_id, block_type_name, block_schema_id 49 \\FROM block_document WHERE id = ? 50 , .{id}) catch return null; 51 52 - if (r) |*row| { 53 - defer row.deinit(); 54 - return BlockDocumentRow{ 55 - .id = try alloc.dupe(u8, row.text(0)), 56 - .created = try alloc.dupe(u8, row.text(1)), 57 - .updated = try alloc.dupe(u8, row.text(2)), 58 - .name = if (row.text(3).len > 0) try alloc.dupe(u8, row.text(3)) else null, 59 - .data = try alloc.dupe(u8, row.text(4)), 60 - .is_anonymous = row.int(5) != 0, 61 - .block_type_id = try alloc.dupe(u8, row.text(6)), 62 - .block_type_name = if (row.text(7).len > 0) try alloc.dupe(u8, row.text(7)) else null, 63 - .block_schema_id = try alloc.dupe(u8, row.text(8)), 64 - }; 65 } 66 return null; 67 } ··· 71 block_type_slug: []const u8, 72 name: []const u8, 73 ) !?BlockDocumentRow { 74 - var r = backend.db.row( 75 \\SELECT bd.id, bd.created, bd.updated, bd.name, bd.data, bd.is_anonymous, 76 \\ bd.block_type_id, bd.block_type_name, bd.block_schema_id 77 \\FROM block_document bd 78 \\JOIN block_type bt ON bd.block_type_id = bt.id 79 \\WHERE bt.slug = ? AND bd.name = ? 80 , .{ block_type_slug, name }) catch return null; 81 82 - if (r) |*row| { 83 - defer row.deinit(); 84 - return BlockDocumentRow{ 85 - .id = try alloc.dupe(u8, row.text(0)), 86 - .created = try alloc.dupe(u8, row.text(1)), 87 - .updated = try alloc.dupe(u8, row.text(2)), 88 - .name = if (row.text(3).len > 0) try alloc.dupe(u8, row.text(3)) else null, 89 - .data = try alloc.dupe(u8, row.text(4)), 90 - .is_anonymous = row.int(5) != 0, 91 - .block_type_id = try alloc.dupe(u8, row.text(6)), 92 - .block_type_name = if (row.text(7).len > 0) try alloc.dupe(u8, row.text(7)) else null, 93 - .block_schema_id = try alloc.dupe(u8, row.text(8)), 94 - }; 95 } 96 return null; 97 } ··· 144 }; 145 defer rows.deinit(); 146 147 - while (rows.next()) |r| { 148 - try results.append(alloc, .{ 149 - .id = try alloc.dupe(u8, r.text(0)), 150 - .created = try alloc.dupe(u8, r.text(1)), 151 - .updated = try alloc.dupe(u8, r.text(2)), 152 - .name = if (r.text(3).len > 0) try alloc.dupe(u8, r.text(3)) else null, 153 - .data = try alloc.dupe(u8, r.text(4)), 154 - .is_anonymous = r.int(5) != 0, 155 - .block_type_id = try alloc.dupe(u8, r.text(6)), 156 - .block_type_name = if (r.text(7).len > 0) try alloc.dupe(u8, r.text(7)) else null, 157 - .block_schema_id = try alloc.dupe(u8, r.text(8)), 158 - }); 159 } 160 161 return results.toOwnedSlice(alloc); ··· 189 }; 190 defer rows.deinit(); 191 192 - while (rows.next()) |r| { 193 - try results.append(alloc, .{ 194 - .id = try alloc.dupe(u8, r.text(0)), 195 - .created = try alloc.dupe(u8, r.text(1)), 196 - .updated = try alloc.dupe(u8, r.text(2)), 197 - .name = if (r.text(3).len > 0) try alloc.dupe(u8, r.text(3)) else null, 198 - .data = try alloc.dupe(u8, r.text(4)), 199 - .is_anonymous = r.int(5) != 0, 200 - .block_type_id = try alloc.dupe(u8, r.text(6)), 201 - .block_type_name = if (r.text(7).len > 0) try alloc.dupe(u8, r.text(7)) else null, 202 - .block_schema_id = try alloc.dupe(u8, r.text(8)), 203 - }); 204 } 205 206 return results.toOwnedSlice(alloc); 207 }

··· 43 } 44 45 pub fn getById(alloc: Allocator, id: []const u8) !?BlockDocumentRow { 46 + var rows = backend.db.query( 47 \\SELECT id, created, updated, name, data, is_anonymous, 48 \\ block_type_id, block_type_name, block_schema_id 49 \\FROM block_document WHERE id = ? 50 , .{id}) catch return null; 51 + defer rows.deinit(); 52 53 + if (rows.next()) |row| { 54 + return rowToBlockDocument(alloc, row); 55 } 56 return null; 57 } ··· 61 block_type_slug: []const u8, 62 name: []const u8, 63 ) !?BlockDocumentRow { 64 + var rows = backend.db.query( 65 \\SELECT bd.id, bd.created, bd.updated, bd.name, bd.data, bd.is_anonymous, 66 \\ bd.block_type_id, bd.block_type_name, bd.block_schema_id 67 \\FROM block_document bd 68 \\JOIN block_type bt ON bd.block_type_id = bt.id 69 \\WHERE bt.slug = ? AND bd.name = ? 70 , .{ block_type_slug, name }) catch return null; 71 + defer rows.deinit(); 72 73 + if (rows.next()) |row| { 74 + return rowToBlockDocument(alloc, row); 75 } 76 return null; 77 } ··· 124 }; 125 defer rows.deinit(); 126 127 + while (rows.next()) |row| { 128 + try results.append(alloc, rowToBlockDocument(alloc, row)); 129 } 130 131 return results.toOwnedSlice(alloc); ··· 159 }; 160 defer rows.deinit(); 161 162 + while (rows.next()) |row| { 163 + try results.append(alloc, rowToBlockDocument(alloc, row)); 164 } 165 166 return results.toOwnedSlice(alloc); 167 } 168 + 169 + fn rowToBlockDocument(alloc: Allocator, row: anytype) BlockDocumentRow { 170 + return .{ 171 + .id = alloc.dupe(u8, row.text(0)) catch "", 172 + .created = alloc.dupe(u8, row.text(1)) catch "", 173 + .updated = alloc.dupe(u8, row.text(2)) catch "", 174 + .name = if (row.text(3).len > 0) alloc.dupe(u8, row.text(3)) catch null else null, 175 + .data = alloc.dupe(u8, row.text(4)) catch "{}", 176 + .is_anonymous = row.int(5) != 0, 177 + .block_type_id = alloc.dupe(u8, row.text(6)) catch "", 178 + .block_type_name = if (row.text(7).len > 0) alloc.dupe(u8, row.text(7)) catch null else null, 179 + .block_schema_id = alloc.dupe(u8, row.text(8)) catch "", 180 + }; 181 + }

+33 -41

src/db/block_schemas.zig

··· 37 checksum: []const u8, 38 version: ?[]const u8, 39 ) !?BlockSchemaRow { 40 - var r = if (version) |v| 41 - backend.db.row( 42 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 43 \\FROM block_schema WHERE checksum = ? AND version = ? 44 - , .{ checksum, v }) catch return null 45 - else 46 - backend.db.row( 47 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 48 \\FROM block_schema WHERE checksum = ? 49 , .{checksum}) catch return null; 50 51 - if (r) |*row| { 52 - defer row.deinit(); 53 - return BlockSchemaRow{ 54 - .id = try alloc.dupe(u8, row.text(0)), 55 - .created = try alloc.dupe(u8, row.text(1)), 56 - .updated = try alloc.dupe(u8, row.text(2)), 57 - .checksum = try alloc.dupe(u8, row.text(3)), 58 - .fields = try alloc.dupe(u8, row.text(4)), 59 - .capabilities = try alloc.dupe(u8, row.text(5)), 60 - .version = try alloc.dupe(u8, row.text(6)), 61 - .block_type_id = try alloc.dupe(u8, row.text(7)), 62 - }; 63 } 64 return null; 65 } 66 67 pub fn getById(alloc: Allocator, id: []const u8) !?BlockSchemaRow { 68 - var r = backend.db.row( 69 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 70 \\FROM block_schema WHERE id = ? 71 , .{id}) catch return null; 72 73 - if (r) |*row| { 74 - defer row.deinit(); 75 - return BlockSchemaRow{ 76 - .id = try alloc.dupe(u8, row.text(0)), 77 - .created = try alloc.dupe(u8, row.text(1)), 78 - .updated = try alloc.dupe(u8, row.text(2)), 79 - .checksum = try alloc.dupe(u8, row.text(3)), 80 - .fields = try alloc.dupe(u8, row.text(4)), 81 - .capabilities = try alloc.dupe(u8, row.text(5)), 82 - .version = try alloc.dupe(u8, row.text(6)), 83 - .block_type_id = try alloc.dupe(u8, row.text(7)), 84 - }; 85 } 86 return null; 87 } ··· 99 }; 100 defer rows.deinit(); 101 102 - while (rows.next()) |r| { 103 - try results.append(alloc, .{ 104 - .id = try alloc.dupe(u8, r.text(0)), 105 - .created = try alloc.dupe(u8, r.text(1)), 106 - .updated = try alloc.dupe(u8, r.text(2)), 107 - .checksum = try alloc.dupe(u8, r.text(3)), 108 - .fields = try alloc.dupe(u8, r.text(4)), 109 - .capabilities = try alloc.dupe(u8, r.text(5)), 110 - .version = try alloc.dupe(u8, r.text(6)), 111 - .block_type_id = try alloc.dupe(u8, r.text(7)), 112 - }); 113 } 114 115 return results.toOwnedSlice(alloc); 116 }

··· 37 checksum: []const u8, 38 version: ?[]const u8, 39 ) !?BlockSchemaRow { 40 + if (version) |v| { 41 + var rows = backend.db.query( 42 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 43 \\FROM block_schema WHERE checksum = ? AND version = ? 44 + , .{ checksum, v }) catch return null; 45 + defer rows.deinit(); 46 + 47 + if (rows.next()) |row| { 48 + return rowToBlockSchema(alloc, row); 49 + } 50 + } else { 51 + var rows = backend.db.query( 52 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 53 \\FROM block_schema WHERE checksum = ? 54 , .{checksum}) catch return null; 55 + defer rows.deinit(); 56 57 + if (rows.next()) |row| { 58 + return rowToBlockSchema(alloc, row); 59 + } 60 } 61 return null; 62 } 63 64 pub fn getById(alloc: Allocator, id: []const u8) !?BlockSchemaRow { 65 + var rows = backend.db.query( 66 \\SELECT id, created, updated, checksum, fields, capabilities, version, block_type_id 67 \\FROM block_schema WHERE id = ? 68 , .{id}) catch return null; 69 + defer rows.deinit(); 70 71 + if (rows.next()) |row| { 72 + return rowToBlockSchema(alloc, row); 73 } 74 return null; 75 } ··· 87 }; 88 defer rows.deinit(); 89 90 + while (rows.next()) |row| { 91 + try results.append(alloc, rowToBlockSchema(alloc, row)); 92 } 93 94 return results.toOwnedSlice(alloc); 95 } 96 + 97 + fn rowToBlockSchema(alloc: Allocator, row: anytype) BlockSchemaRow { 98 + return .{ 99 + .id = alloc.dupe(u8, row.text(0)) catch "", 100 + .created = alloc.dupe(u8, row.text(1)) catch "", 101 + .updated = alloc.dupe(u8, row.text(2)) catch "", 102 + .checksum = alloc.dupe(u8, row.text(3)) catch "", 103 + .fields = alloc.dupe(u8, row.text(4)) catch "{}", 104 + .capabilities = alloc.dupe(u8, row.text(5)) catch "[]", 105 + .version = alloc.dupe(u8, row.text(6)) catch "1", 106 + .block_type_id = alloc.dupe(u8, row.text(7)) catch "", 107 + }; 108 + }

+25 -43

src/db/block_types.zig

··· 46 } 47 48 pub fn getBySlug(alloc: Allocator, slug: []const u8) !?BlockTypeRow { 49 - var r = backend.db.row( 50 \\SELECT id, created, updated, name, slug, logo_url, documentation_url, 51 \\ description, code_example, is_protected 52 \\FROM block_type WHERE slug = ? 53 , .{slug}) catch return null; 54 55 - if (r) |*row| { 56 - defer row.deinit(); 57 - return BlockTypeRow{ 58 - .id = try alloc.dupe(u8, row.text(0)), 59 - .created = try alloc.dupe(u8, row.text(1)), 60 - .updated = try alloc.dupe(u8, row.text(2)), 61 - .name = try alloc.dupe(u8, row.text(3)), 62 - .slug = try alloc.dupe(u8, row.text(4)), 63 - .logo_url = if (row.text(5).len > 0) try alloc.dupe(u8, row.text(5)) else null, 64 - .documentation_url = if (row.text(6).len > 0) try alloc.dupe(u8, row.text(6)) else null, 65 - .description = if (row.text(7).len > 0) try alloc.dupe(u8, row.text(7)) else null, 66 - .code_example = if (row.text(8).len > 0) try alloc.dupe(u8, row.text(8)) else null, 67 - .is_protected = row.int(9) != 0, 68 - }; 69 } 70 return null; 71 } 72 73 pub fn getById(alloc: Allocator, id: []const u8) !?BlockTypeRow { 74 - var r = backend.db.row( 75 \\SELECT id, created, updated, name, slug, logo_url, documentation_url, 76 \\ description, code_example, is_protected 77 \\FROM block_type WHERE id = ? 78 , .{id}) catch return null; 79 80 - if (r) |*row| { 81 - defer row.deinit(); 82 - return BlockTypeRow{ 83 - .id = try alloc.dupe(u8, row.text(0)), 84 - .created = try alloc.dupe(u8, row.text(1)), 85 - .updated = try alloc.dupe(u8, row.text(2)), 86 - .name = try alloc.dupe(u8, row.text(3)), 87 - .slug = try alloc.dupe(u8, row.text(4)), 88 - .logo_url = if (row.text(5).len > 0) try alloc.dupe(u8, row.text(5)) else null, 89 - .documentation_url = if (row.text(6).len > 0) try alloc.dupe(u8, row.text(6)) else null, 90 - .description = if (row.text(7).len > 0) try alloc.dupe(u8, row.text(7)) else null, 91 - .code_example = if (row.text(8).len > 0) try alloc.dupe(u8, row.text(8)) else null, 92 - .is_protected = row.int(9) != 0, 93 - }; 94 } 95 return null; 96 } ··· 132 }; 133 defer rows.deinit(); 134 135 - while (rows.next()) |r| { 136 - try results.append(alloc, .{ 137 - .id = try alloc.dupe(u8, r.text(0)), 138 - .created = try alloc.dupe(u8, r.text(1)), 139 - .updated = try alloc.dupe(u8, r.text(2)), 140 - .name = try alloc.dupe(u8, r.text(3)), 141 - .slug = try alloc.dupe(u8, r.text(4)), 142 - .logo_url = if (r.text(5).len > 0) try alloc.dupe(u8, r.text(5)) else null, 143 - .documentation_url = if (r.text(6).len > 0) try alloc.dupe(u8, r.text(6)) else null, 144 - .description = if (r.text(7).len > 0) try alloc.dupe(u8, r.text(7)) else null, 145 - .code_example = if (r.text(8).len > 0) try alloc.dupe(u8, r.text(8)) else null, 146 - .is_protected = r.int(9) != 0, 147 - }); 148 } 149 150 return results.toOwnedSlice(alloc); 151 }

··· 46 } 47 48 pub fn getBySlug(alloc: Allocator, slug: []const u8) !?BlockTypeRow { 49 + var rows = backend.db.query( 50 \\SELECT id, created, updated, name, slug, logo_url, documentation_url, 51 \\ description, code_example, is_protected 52 \\FROM block_type WHERE slug = ? 53 , .{slug}) catch return null; 54 + defer rows.deinit(); 55 56 + if (rows.next()) |row| { 57 + return rowToBlockType(alloc, row); 58 } 59 return null; 60 } 61 62 pub fn getById(alloc: Allocator, id: []const u8) !?BlockTypeRow { 63 + var rows = backend.db.query( 64 \\SELECT id, created, updated, name, slug, logo_url, documentation_url, 65 \\ description, code_example, is_protected 66 \\FROM block_type WHERE id = ? 67 , .{id}) catch return null; 68 + defer rows.deinit(); 69 70 + if (rows.next()) |row| { 71 + return rowToBlockType(alloc, row); 72 } 73 return null; 74 } ··· 110 }; 111 defer rows.deinit(); 112 113 + while (rows.next()) |row| { 114 + try results.append(alloc, rowToBlockType(alloc, row)); 115 } 116 117 return results.toOwnedSlice(alloc); 118 } 119 + 120 + fn rowToBlockType(alloc: Allocator, row: anytype) BlockTypeRow { 121 + return .{ 122 + .id = alloc.dupe(u8, row.text(0)) catch "", 123 + .created = alloc.dupe(u8, row.text(1)) catch "", 124 + .updated = alloc.dupe(u8, row.text(2)) catch "", 125 + .name = alloc.dupe(u8, row.text(3)) catch "", 126 + .slug = alloc.dupe(u8, row.text(4)) catch "", 127 + .logo_url = if (row.text(5).len > 0) alloc.dupe(u8, row.text(5)) catch null else null, 128 + .documentation_url = if (row.text(6).len > 0) alloc.dupe(u8, row.text(6)) catch null else null, 129 + .description = if (row.text(7).len > 0) alloc.dupe(u8, row.text(7)) catch null else null, 130 + .code_example = if (row.text(8).len > 0) alloc.dupe(u8, row.text(8)) catch null else null, 131 + .is_protected = row.int(9) != 0, 132 + }; 133 + }

+1 -1

src/db/deployments.zig

··· 352 var r = backend.db.row("SELECT COUNT(*) FROM deployment", .{}) catch return 0; 353 if (r) |*row| { 354 defer row.deinit(); 355 - return @intCast(row.int(0)); 356 } 357 return 0; 358 }

··· 352 var r = backend.db.row("SELECT COUNT(*) FROM deployment", .{}) catch return 0; 353 if (r) |*row| { 354 defer row.deinit(); 355 + return @intCast(row.bigint(0)); 356 } 357 return 0; 358 }

+1 -1

src/db/events.zig

··· 64 var r = backend.db.row("SELECT COUNT(*) FROM events", .{}) catch return 0; 65 if (r) |*row| { 66 defer row.deinit(); 67 - return @intCast(row.int(0)); 68 } 69 return 0; 70 }

··· 64 var r = backend.db.row("SELECT COUNT(*) FROM events", .{}) catch return 0; 65 if (r) |*row| { 66 defer row.deinit(); 67 + return @intCast(row.bigint(0)); 68 } 69 return 0; 70 }

+156 -120

src/db/flow_runs.zig

··· 3 4 const backend = @import("backend.zig"); 5 const log = @import("../logging.zig"); 6 7 pub const FlowRunRow = struct { 8 id: []const u8, ··· 17 tags: []const u8, 18 run_count: i64, 19 expected_start_time: ?[]const u8, 20 start_time: ?[]const u8, 21 end_time: ?[]const u8, 22 total_run_time: f64, ··· 26 work_queue_name: ?[]const u8, 27 work_queue_id: ?[]const u8, 28 auto_scheduled: bool, 29 }; 30 31 pub const InsertParams = struct { ··· 35 work_queue_id: ?[]const u8 = null, 36 auto_scheduled: bool = false, 37 expected_start_time: ?[]const u8 = null, 38 }; 39 40 pub fn insert( ··· 48 ) !void { 49 backend.db.exec( 50 \\INSERT INTO flow_run (id, flow_id, name, state_type, state_name, state_timestamp, 51 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, expected_start_time) 52 - \\VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 53 , .{ 54 id, 55 flow_id, ··· 63 params.work_queue_id, 64 @as(i64, if (params.auto_scheduled) 1 else 0), 65 params.expected_start_time, 66 }) catch |err| { 67 log.err("database", "insert flow_run error: {}", .{err}); 68 return err; 69 }; 70 } 71 72 pub fn get(alloc: Allocator, id: []const u8) !?FlowRunRow { 73 - var r = backend.db.row( 74 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 75 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 76 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 77 \\FROM flow_run WHERE id = ? 78 , .{id}) catch return null; 79 80 - if (r) |*row| { 81 - defer row.deinit(); 82 - return FlowRunRow{ 83 - .id = try alloc.dupe(u8, row.text(0)), 84 - .created = try alloc.dupe(u8, row.text(1)), 85 - .updated = try alloc.dupe(u8, row.text(2)), 86 - .flow_id = try alloc.dupe(u8, row.text(3)), 87 - .name = try alloc.dupe(u8, row.text(4)), 88 - .state_type = try alloc.dupe(u8, row.text(5)), 89 - .state_name = try alloc.dupe(u8, row.text(6)), 90 - .state_timestamp = try alloc.dupe(u8, row.text(7)), 91 - .parameters = try alloc.dupe(u8, row.text(8)), 92 - .tags = try alloc.dupe(u8, row.text(9)), 93 - .run_count = row.bigint(10), 94 - .expected_start_time = if (row.text(11).len > 0) try alloc.dupe(u8, row.text(11)) else null, 95 - .start_time = if (row.text(12).len > 0) try alloc.dupe(u8, row.text(12)) else null, 96 - .end_time = if (row.text(13).len > 0) try alloc.dupe(u8, row.text(13)) else null, 97 - .total_run_time = row.float(14), 98 - .deployment_id = if (row.text(15).len > 0) try alloc.dupe(u8, row.text(15)) else null, 99 - .deployment_version = if (row.text(16).len > 0) try alloc.dupe(u8, row.text(16)) else null, 100 - .work_queue_name = if (row.text(17).len > 0) try alloc.dupe(u8, row.text(17)) else null, 101 - .work_queue_id = if (row.text(18).len > 0) try alloc.dupe(u8, row.text(18)) else null, 102 - .auto_scheduled = row.bigint(19) != 0, 103 - }; 104 } 105 return null; 106 } ··· 115 end_time: ?[]const u8, 116 run_count: i64, 117 total_run_time: f64, 118 ) !void { 119 // Lock mutex only for SQLite (postgres pool handles concurrency) 120 if (backend.db.dialect == .sqlite) { ··· 132 errdefer txn.rollback(); 133 134 // Execute within transaction (uses same connection for postgres) 135 - txn.exec( 136 - \\UPDATE flow_run SET state_id = ?, state_type = ?, state_name = ?, state_timestamp = ?, updated = ?, 137 - \\ start_time = ?, end_time = ?, run_count = ?, total_run_time = ? 138 - \\WHERE id = ? 139 - , .{ 140 - state_id, state_type, state_name, timestamp, timestamp, 141 - start_time, end_time, run_count, total_run_time, run_id, 142 - }) catch |err| { 143 - log.err("database", "update flow_run error: {}", .{err}); 144 - return err; 145 - }; 146 147 txn.exec( 148 \\INSERT INTO flow_run_state (id, flow_run_id, type, name, timestamp) ··· 164 165 var rows = backend.db.query( 166 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 167 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 168 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 169 \\FROM flow_run ORDER BY created DESC LIMIT ? 170 , .{@as(i64, @intCast(limit))}) catch |err| { 171 log.err("database", "list flow_runs error: {}", .{err}); ··· 174 defer rows.deinit(); 175 176 while (rows.next()) |r| { 177 - try results.append(alloc, .{ 178 - .id = try alloc.dupe(u8, r.text(0)), 179 - .created = try alloc.dupe(u8, r.text(1)), 180 - .updated = try alloc.dupe(u8, r.text(2)), 181 - .flow_id = try alloc.dupe(u8, r.text(3)), 182 - .name = try alloc.dupe(u8, r.text(4)), 183 - .state_type = try alloc.dupe(u8, r.text(5)), 184 - .state_name = try alloc.dupe(u8, r.text(6)), 185 - .state_timestamp = try alloc.dupe(u8, r.text(7)), 186 - .parameters = try alloc.dupe(u8, r.text(8)), 187 - .tags = try alloc.dupe(u8, r.text(9)), 188 - .run_count = r.bigint(10), 189 - .expected_start_time = if (r.text(11).len > 0) try alloc.dupe(u8, r.text(11)) else null, 190 - .start_time = if (r.text(12).len > 0) try alloc.dupe(u8, r.text(12)) else null, 191 - .end_time = if (r.text(13).len > 0) try alloc.dupe(u8, r.text(13)) else null, 192 - .total_run_time = r.float(14), 193 - .deployment_id = if (r.text(15).len > 0) try alloc.dupe(u8, r.text(15)) else null, 194 - .deployment_version = if (r.text(16).len > 0) try alloc.dupe(u8, r.text(16)) else null, 195 - .work_queue_name = if (r.text(17).len > 0) try alloc.dupe(u8, r.text(17)) else null, 196 - .work_queue_id = if (r.text(18).len > 0) try alloc.dupe(u8, r.text(18)) else null, 197 - .auto_scheduled = r.bigint(19) != 0, 198 - }); 199 } 200 201 return results.toOwnedSlice(alloc); ··· 208 209 var rows = backend.db.query( 210 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 211 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 212 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 213 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 214 \\ORDER BY expected_start_time ASC LIMIT ? 215 , .{ deployment_id, @as(i64, @intCast(limit)) }) catch |err| { ··· 219 defer rows.deinit(); 220 221 while (rows.next()) |r| { 222 - try results.append(alloc, .{ 223 - .id = try alloc.dupe(u8, r.text(0)), 224 - .created = try alloc.dupe(u8, r.text(1)), 225 - .updated = try alloc.dupe(u8, r.text(2)), 226 - .flow_id = try alloc.dupe(u8, r.text(3)), 227 - .name = try alloc.dupe(u8, r.text(4)), 228 - .state_type = try alloc.dupe(u8, r.text(5)), 229 - .state_name = try alloc.dupe(u8, r.text(6)), 230 - .state_timestamp = try alloc.dupe(u8, r.text(7)), 231 - .parameters = try alloc.dupe(u8, r.text(8)), 232 - .tags = try alloc.dupe(u8, r.text(9)), 233 - .run_count = r.bigint(10), 234 - .expected_start_time = if (r.text(11).len > 0) try alloc.dupe(u8, r.text(11)) else null, 235 - .start_time = if (r.text(12).len > 0) try alloc.dupe(u8, r.text(12)) else null, 236 - .end_time = if (r.text(13).len > 0) try alloc.dupe(u8, r.text(13)) else null, 237 - .total_run_time = r.float(14), 238 - .deployment_id = if (r.text(15).len > 0) try alloc.dupe(u8, r.text(15)) else null, 239 - .deployment_version = if (r.text(16).len > 0) try alloc.dupe(u8, r.text(16)) else null, 240 - .work_queue_name = if (r.text(17).len > 0) try alloc.dupe(u8, r.text(17)) else null, 241 - .work_queue_id = if (r.text(18).len > 0) try alloc.dupe(u8, r.text(18)) else null, 242 - .auto_scheduled = r.bigint(19) != 0, 243 - }); 244 } 245 246 return results.toOwnedSlice(alloc); ··· 266 if (results.items.len >= limit) break; 267 } 268 269 - // Sort by expected_start_time 270 const items = results.items; 271 std.mem.sort(FlowRunRow, items, {}, struct { 272 fn lessThan(_: void, a: FlowRunRow, b: FlowRunRow) bool { 273 - const a_time = a.expected_start_time orelse ""; 274 - const b_time = b.expected_start_time orelse ""; 275 - return std.mem.lessThan(u8, a_time, b_time); 276 } 277 }.lessThan); 278 ··· 284 return results.toOwnedSlice(alloc); 285 } 286 287 - /// Get scheduled flow runs for a deployment with optional time filter 288 fn getScheduledByDeploymentBefore( 289 alloc: Allocator, 290 deployment_id: []const u8, ··· 297 if (scheduled_before) |before| { 298 var rows = backend.db.query( 299 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 300 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 301 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 302 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 303 - \\ AND (expected_start_time IS NULL OR expected_start_time <= ?) 304 - \\ORDER BY expected_start_time ASC LIMIT ? 305 , .{ deployment_id, before, @as(i64, @intCast(limit)) }) catch |err| { 306 log.err("database", "get scheduled flow_runs error: {}", .{err}); 307 return err; ··· 314 } else { 315 var rows = backend.db.query( 316 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 317 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 318 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 319 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 320 - \\ORDER BY expected_start_time ASC LIMIT ? 321 , .{ deployment_id, @as(i64, @intCast(limit)) }) catch |err| { 322 log.err("database", "get scheduled flow_runs error: {}", .{err}); 323 return err; ··· 332 return results.toOwnedSlice(alloc); 333 } 334 335 - /// Get scheduled flow runs for a work queue with optional time filter 336 pub fn getScheduledByWorkQueue( 337 alloc: Allocator, 338 work_queue_id: []const u8, ··· 345 if (scheduled_before) |before| { 346 var rows = backend.db.query( 347 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 348 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 349 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 350 \\FROM flow_run WHERE work_queue_id = ? AND state_type = 'SCHEDULED' 351 - \\ AND (expected_start_time IS NULL OR expected_start_time <= ?) 352 - \\ORDER BY expected_start_time ASC LIMIT ? 353 , .{ work_queue_id, before, @as(i64, @intCast(limit)) }) catch |err| { 354 log.err("database", "get scheduled flow_runs by queue error: {}", .{err}); 355 return err; ··· 362 } else { 363 var rows = backend.db.query( 364 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 365 - \\ parameters, tags, run_count, expected_start_time, start_time, end_time, total_run_time, 366 - \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled 367 \\FROM flow_run WHERE work_queue_id = ? AND state_type = 'SCHEDULED' 368 - \\ORDER BY expected_start_time ASC LIMIT ? 369 , .{ work_queue_id, @as(i64, @intCast(limit)) }) catch |err| { 370 log.err("database", "get scheduled flow_runs by queue error: {}", .{err}); 371 return err; ··· 400 if (results.items.len >= limit) break; 401 } 402 403 - // Sort by expected_start_time 404 const items = results.items; 405 std.mem.sort(FlowRunRow, items, {}, struct { 406 fn lessThan(_: void, a: FlowRunRow, b: FlowRunRow) bool { 407 - const a_time = a.expected_start_time orelse ""; 408 - const b_time = b.expected_start_time orelse ""; 409 - return std.mem.lessThan(u8, a_time, b_time); 410 } 411 }.lessThan); 412 ··· 432 .tags = alloc.dupe(u8, r.text(9)) catch "[]", 433 .run_count = r.bigint(10), 434 .expected_start_time = if (r.text(11).len > 0) alloc.dupe(u8, r.text(11)) catch null else null, 435 - .start_time = if (r.text(12).len > 0) alloc.dupe(u8, r.text(12)) catch null else null, 436 - .end_time = if (r.text(13).len > 0) alloc.dupe(u8, r.text(13)) catch null else null, 437 - .total_run_time = r.float(14), 438 - .deployment_id = if (r.text(15).len > 0) alloc.dupe(u8, r.text(15)) catch null else null, 439 - .deployment_version = if (r.text(16).len > 0) alloc.dupe(u8, r.text(16)) catch null else null, 440 - .work_queue_name = if (r.text(17).len > 0) alloc.dupe(u8, r.text(17)) catch null else null, 441 - .work_queue_id = if (r.text(18).len > 0) alloc.dupe(u8, r.text(18)) catch null else null, 442 - .auto_scheduled = r.bigint(19) != 0, 443 }; 444 }

··· 3 4 const backend = @import("backend.zig"); 5 const log = @import("../logging.zig"); 6 + const time_util = @import("../utilities/time.zig"); 7 8 pub const FlowRunRow = struct { 9 id: []const u8, ··· 18 tags: []const u8, 19 run_count: i64, 20 expected_start_time: ?[]const u8, 21 + next_scheduled_start_time: ?[]const u8, 22 start_time: ?[]const u8, 23 end_time: ?[]const u8, 24 total_run_time: f64, ··· 28 work_queue_name: ?[]const u8, 29 work_queue_id: ?[]const u8, 30 auto_scheduled: bool, 31 + idempotency_key: ?[]const u8, 32 }; 33 34 pub const InsertParams = struct { ··· 38 work_queue_id: ?[]const u8 = null, 39 auto_scheduled: bool = false, 40 expected_start_time: ?[]const u8 = null, 41 + next_scheduled_start_time: ?[]const u8 = null, 42 + idempotency_key: ?[]const u8 = null, 43 + parameters: ?[]const u8 = null, 44 }; 45 46 pub fn insert( ··· 54 ) !void { 55 backend.db.exec( 56 \\INSERT INTO flow_run (id, flow_id, name, state_type, state_name, state_timestamp, 57 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, 58 + \\ expected_start_time, next_scheduled_start_time, idempotency_key, parameters) 59 + \\VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 60 , .{ 61 id, 62 flow_id, ··· 70 params.work_queue_id, 71 @as(i64, if (params.auto_scheduled) 1 else 0), 72 params.expected_start_time, 73 + params.next_scheduled_start_time, 74 + params.idempotency_key, 75 + params.parameters orelse "{}", 76 }) catch |err| { 77 log.err("database", "insert flow_run error: {}", .{err}); 78 return err; 79 }; 80 } 81 82 + /// Insert a flow run idempotently - silently ignores duplicates based on (flow_id, idempotency_key). 83 + /// Used by the scheduler to safely create runs without duplicates. 84 + pub fn insertOrIgnore( 85 + id: []const u8, 86 + flow_id: []const u8, 87 + name: []const u8, 88 + state_type: []const u8, 89 + state_name: []const u8, 90 + timestamp: []const u8, 91 + params: InsertParams, 92 + ) !bool { 93 + // Use INSERT OR IGNORE for SQLite, INSERT ... ON CONFLICT DO NOTHING for PostgreSQL 94 + const sql = if (backend.db.dialect == .sqlite) 95 + \\INSERT OR IGNORE INTO flow_run (id, flow_id, name, state_type, state_name, state_timestamp, 96 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, 97 + \\ expected_start_time, next_scheduled_start_time, idempotency_key, parameters) 98 + \\VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 99 + else 100 + \\INSERT INTO flow_run (id, flow_id, name, state_type, state_name, state_timestamp, 101 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, 102 + \\ expected_start_time, next_scheduled_start_time, idempotency_key, parameters) 103 + \\VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 104 + \\ON CONFLICT (flow_id, idempotency_key) WHERE idempotency_key IS NOT NULL DO NOTHING 105 + ; 106 + 107 + backend.db.exec(sql, .{ 108 + id, 109 + flow_id, 110 + name, 111 + state_type, 112 + state_name, 113 + timestamp, 114 + params.deployment_id, 115 + params.deployment_version, 116 + params.work_queue_name, 117 + params.work_queue_id, 118 + @as(i64, if (params.auto_scheduled) 1 else 0), 119 + params.expected_start_time, 120 + params.next_scheduled_start_time, 121 + params.idempotency_key, 122 + params.parameters orelse "{}", 123 + }) catch |err| { 124 + log.err("database", "insertOrIgnore flow_run error: {}", .{err}); 125 + return err; 126 + }; 127 + 128 + // For SQLite, check if insert happened via changes() 129 + // For PostgreSQL, we'd need to check affected rows - for now assume success 130 + return true; 131 + } 132 + 133 pub fn get(alloc: Allocator, id: []const u8) !?FlowRunRow { 134 + var rows = backend.db.query( 135 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 136 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 137 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 138 \\FROM flow_run WHERE id = ? 139 , .{id}) catch return null; 140 + defer rows.deinit(); 141 142 + if (rows.next()) |row| { 143 + return rowToFlowRun(alloc, row); 144 } 145 return null; 146 } ··· 155 end_time: ?[]const u8, 156 run_count: i64, 157 total_run_time: f64, 158 + expected_start_time: ?[]const u8, 159 ) !void { 160 // Lock mutex only for SQLite (postgres pool handles concurrency) 161 if (backend.db.dialect == .sqlite) { ··· 173 errdefer txn.rollback(); 174 175 // Execute within transaction (uses same connection for postgres) 176 + // Only update expected_start_time if provided (from CopyScheduledTime rule) 177 + if (expected_start_time) |est| { 178 + txn.exec( 179 + \\UPDATE flow_run SET state_id = ?, state_type = ?, state_name = ?, state_timestamp = ?, updated = ?, 180 + \\ start_time = ?, end_time = ?, run_count = ?, total_run_time = ?, expected_start_time = ? 181 + \\WHERE id = ? 182 + , .{ 183 + state_id, state_type, state_name, timestamp, timestamp, 184 + start_time, end_time, run_count, total_run_time, est, 185 + run_id, 186 + }) catch |err| { 187 + log.err("database", "update flow_run error: {}", .{err}); 188 + return err; 189 + }; 190 + } else { 191 + txn.exec( 192 + \\UPDATE flow_run SET state_id = ?, state_type = ?, state_name = ?, state_timestamp = ?, updated = ?, 193 + \\ start_time = ?, end_time = ?, run_count = ?, total_run_time = ? 194 + \\WHERE id = ? 195 + , .{ 196 + state_id, state_type, state_name, timestamp, timestamp, 197 + start_time, end_time, run_count, total_run_time, run_id, 198 + }) catch |err| { 199 + log.err("database", "update flow_run error: {}", .{err}); 200 + return err; 201 + }; 202 + } 203 204 txn.exec( 205 \\INSERT INTO flow_run_state (id, flow_run_id, type, name, timestamp) ··· 221 222 var rows = backend.db.query( 223 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 224 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 225 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 226 \\FROM flow_run ORDER BY created DESC LIMIT ? 227 , .{@as(i64, @intCast(limit))}) catch |err| { 228 log.err("database", "list flow_runs error: {}", .{err}); ··· 231 defer rows.deinit(); 232 233 while (rows.next()) |r| { 234 + try results.append(alloc, rowToFlowRun(alloc, r)); 235 } 236 237 return results.toOwnedSlice(alloc); ··· 244 245 var rows = backend.db.query( 246 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 247 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 248 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 249 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 250 \\ORDER BY expected_start_time ASC LIMIT ? 251 , .{ deployment_id, @as(i64, @intCast(limit)) }) catch |err| { ··· 255 defer rows.deinit(); 256 257 while (rows.next()) |r| { 258 + try results.append(alloc, rowToFlowRun(alloc, r)); 259 } 260 261 return results.toOwnedSlice(alloc); ··· 281 if (results.items.len >= limit) break; 282 } 283 284 + // Sort by next_scheduled_start_time using proper timestamp parsing 285 const items = results.items; 286 std.mem.sort(FlowRunRow, items, {}, struct { 287 fn lessThan(_: void, a: FlowRunRow, b: FlowRunRow) bool { 288 + const a_time = a.next_scheduled_start_time orelse a.expected_start_time orelse ""; 289 + const b_time = b.next_scheduled_start_time orelse b.expected_start_time orelse ""; 290 + // Parse to microseconds for proper comparison 291 + const a_us = time_util.parse(a_time) orelse 0; 292 + const b_us = time_util.parse(b_time) orelse 0; 293 + return a_us < b_us; 294 } 295 }.lessThan); 296 ··· 302 return results.toOwnedSlice(alloc); 303 } 304 305 + /// Get scheduled flow runs for a deployment with optional time filter. 306 + /// Filters on next_scheduled_start_time to match Python Prefect behavior. 307 fn getScheduledByDeploymentBefore( 308 alloc: Allocator, 309 deployment_id: []const u8, ··· 316 if (scheduled_before) |before| { 317 var rows = backend.db.query( 318 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 319 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 320 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 321 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 322 + \\ AND (next_scheduled_start_time IS NULL OR next_scheduled_start_time <= ?) 323 + \\ORDER BY next_scheduled_start_time ASC LIMIT ? 324 , .{ deployment_id, before, @as(i64, @intCast(limit)) }) catch |err| { 325 log.err("database", "get scheduled flow_runs error: {}", .{err}); 326 return err; ··· 333 } else { 334 var rows = backend.db.query( 335 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 336 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 337 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 338 \\FROM flow_run WHERE deployment_id = ? AND state_type = 'SCHEDULED' 339 + \\ORDER BY next_scheduled_start_time ASC LIMIT ? 340 , .{ deployment_id, @as(i64, @intCast(limit)) }) catch |err| { 341 log.err("database", "get scheduled flow_runs error: {}", .{err}); 342 return err; ··· 351 return results.toOwnedSlice(alloc); 352 } 353 354 + /// Get scheduled flow runs for a work queue with optional time filter. 355 + /// Filters on next_scheduled_start_time to match Python Prefect behavior. 356 pub fn getScheduledByWorkQueue( 357 alloc: Allocator, 358 work_queue_id: []const u8, ··· 365 if (scheduled_before) |before| { 366 var rows = backend.db.query( 367 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 368 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 369 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 370 \\FROM flow_run WHERE work_queue_id = ? AND state_type = 'SCHEDULED' 371 + \\ AND (next_scheduled_start_time IS NULL OR next_scheduled_start_time <= ?) 372 + \\ORDER BY next_scheduled_start_time ASC LIMIT ? 373 , .{ work_queue_id, before, @as(i64, @intCast(limit)) }) catch |err| { 374 log.err("database", "get scheduled flow_runs by queue error: {}", .{err}); 375 return err; ··· 382 } else { 383 var rows = backend.db.query( 384 \\SELECT id, created, updated, flow_id, name, state_type, state_name, state_timestamp, 385 + \\ parameters, tags, run_count, expected_start_time, next_scheduled_start_time, start_time, end_time, total_run_time, 386 + \\ deployment_id, deployment_version, work_queue_name, work_queue_id, auto_scheduled, idempotency_key 387 \\FROM flow_run WHERE work_queue_id = ? AND state_type = 'SCHEDULED' 388 + \\ORDER BY next_scheduled_start_time ASC LIMIT ? 389 , .{ work_queue_id, @as(i64, @intCast(limit)) }) catch |err| { 390 log.err("database", "get scheduled flow_runs by queue error: {}", .{err}); 391 return err; ··· 420 if (results.items.len >= limit) break; 421 } 422 423 + // Sort by next_scheduled_start_time using proper timestamp parsing 424 const items = results.items; 425 std.mem.sort(FlowRunRow, items, {}, struct { 426 fn lessThan(_: void, a: FlowRunRow, b: FlowRunRow) bool { 427 + const a_time = a.next_scheduled_start_time orelse a.expected_start_time orelse ""; 428 + const b_time = b.next_scheduled_start_time orelse b.expected_start_time orelse ""; 429 + // Parse to microseconds for proper comparison 430 + const a_us = time_util.parse(a_time) orelse 0; 431 + const b_us = time_util.parse(b_time) orelse 0; 432 + return a_us < b_us; 433 } 434 }.lessThan); 435 ··· 455 .tags = alloc.dupe(u8, r.text(9)) catch "[]", 456 .run_count = r.bigint(10), 457 .expected_start_time = if (r.text(11).len > 0) alloc.dupe(u8, r.text(11)) catch null else null, 458 + .next_scheduled_start_time = if (r.text(12).len > 0) alloc.dupe(u8, r.text(12)) catch null else null, 459 + .start_time = if (r.text(13).len > 0) alloc.dupe(u8, r.text(13)) catch null else null, 460 + .end_time = if (r.text(14).len > 0) alloc.dupe(u8, r.text(14)) catch null else null, 461 + .total_run_time = r.float(15), 462 + .deployment_id = if (r.text(16).len > 0) alloc.dupe(u8, r.text(16)) catch null else null, 463 + .deployment_version = if (r.text(17).len > 0) alloc.dupe(u8, r.text(17)) catch null else null, 464 + .work_queue_name = if (r.text(18).len > 0) alloc.dupe(u8, r.text(18)) catch null else null, 465 + .work_queue_id = if (r.text(19).len > 0) alloc.dupe(u8, r.text(19)) catch null else null, 466 + .auto_scheduled = r.int(20) != 0, 467 + .idempotency_key = if (r.text(21).len > 0) alloc.dupe(u8, r.text(21)) catch null else null, 468 }; 469 } 470 + 471 + /// Patch a flow run with optional fields 472 + /// Currently supports: infrastructure_pid 473 + pub fn patch(id: []const u8, infrastructure_pid: ?[]const u8) !void { 474 + // For now, just accept the PATCH request without actually updating 475 + // The runner uses PATCH to set infrastructure metadata, but we can 476 + // skip this for basic functionality - the state transitions are what matter 477 + _ = id; 478 + _ = infrastructure_pid; 479 + // TODO: add infrastructure_pid column and update it 480 + }

+20 -28

src/db/flows.zig

··· 13 }; 14 15 pub fn getByName(alloc: Allocator, name: []const u8) !?FlowRow { 16 - var r = backend.db.row( 17 "SELECT id, created, updated, name, tags FROM flow WHERE name = ?", 18 .{name}, 19 ) catch return null; 20 21 - if (r) |*row| { 22 - defer row.deinit(); 23 - return FlowRow{ 24 - .id = try alloc.dupe(u8, row.text(0)), 25 - .created = try alloc.dupe(u8, row.text(1)), 26 - .updated = try alloc.dupe(u8, row.text(2)), 27 - .name = try alloc.dupe(u8, row.text(3)), 28 - .tags = try alloc.dupe(u8, row.text(4)), 29 - }; 30 } 31 return null; 32 } 33 34 pub fn getById(alloc: Allocator, id: []const u8) !?FlowRow { 35 - var r = backend.db.row( 36 "SELECT id, created, updated, name, tags FROM flow WHERE id = ?", 37 .{id}, 38 ) catch return null; 39 40 - if (r) |*row| { 41 - defer row.deinit(); 42 - return FlowRow{ 43 - .id = try alloc.dupe(u8, row.text(0)), 44 - .created = try alloc.dupe(u8, row.text(1)), 45 - .updated = try alloc.dupe(u8, row.text(2)), 46 - .name = try alloc.dupe(u8, row.text(3)), 47 - .tags = try alloc.dupe(u8, row.text(4)), 48 - }; 49 } 50 return null; 51 } ··· 73 }; 74 defer rows.deinit(); 75 76 - while (rows.next()) |r| { 77 - try results.append(alloc, .{ 78 - .id = try alloc.dupe(u8, r.text(0)), 79 - .created = try alloc.dupe(u8, r.text(1)), 80 - .updated = try alloc.dupe(u8, r.text(2)), 81 - .name = try alloc.dupe(u8, r.text(3)), 82 - .tags = try alloc.dupe(u8, r.text(4)), 83 - }); 84 } 85 86 return results.toOwnedSlice(alloc); 87 }

··· 13 }; 14 15 pub fn getByName(alloc: Allocator, name: []const u8) !?FlowRow { 16 + var rows = backend.db.query( 17 "SELECT id, created, updated, name, tags FROM flow WHERE name = ?", 18 .{name}, 19 ) catch return null; 20 + defer rows.deinit(); 21 22 + if (rows.next()) |row| { 23 + return rowToFlow(alloc, row); 24 } 25 return null; 26 } 27 28 pub fn getById(alloc: Allocator, id: []const u8) !?FlowRow { 29 + var rows = backend.db.query( 30 "SELECT id, created, updated, name, tags FROM flow WHERE id = ?", 31 .{id}, 32 ) catch return null; 33 + defer rows.deinit(); 34 35 + if (rows.next()) |row| { 36 + return rowToFlow(alloc, row); 37 } 38 return null; 39 } ··· 61 }; 62 defer rows.deinit(); 63 64 + while (rows.next()) |row| { 65 + try results.append(alloc, rowToFlow(alloc, row)); 66 } 67 68 return results.toOwnedSlice(alloc); 69 } 70 + 71 + fn rowToFlow(alloc: Allocator, row: anytype) FlowRow { 72 + return .{ 73 + .id = alloc.dupe(u8, row.text(0)) catch "", 74 + .created = alloc.dupe(u8, row.text(1)) catch "", 75 + .updated = alloc.dupe(u8, row.text(2)) catch "", 76 + .name = alloc.dupe(u8, row.text(3)) catch "", 77 + .tags = alloc.dupe(u8, row.text(4)) catch "[]", 78 + }; 79 + }

+5 -1

src/db/schema/postgres.zig

··· 34 \\ state_timestamp TEXT, 35 \\ run_count BIGINT DEFAULT 0, 36 \\ expected_start_time TEXT, 37 \\ start_time TEXT, 38 \\ end_time TEXT, 39 \\ total_run_time DOUBLE PRECISION DEFAULT 0.0, ··· 41 \\ deployment_version TEXT, 42 \\ work_queue_name TEXT, 43 \\ work_queue_id TEXT, 44 - \\ auto_scheduled INTEGER DEFAULT 0 45 \\) 46 , .{}); 47 ··· 264 // indexes 265 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__state_type ON flow_run(state_type)", .{}); 266 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__flow_id ON flow_run(flow_id)", .{}); 267 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run_state__flow_run_id ON flow_run_state(flow_run_id)", .{}); 268 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__flow_run_id ON task_run(flow_run_id)", .{}); 269 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__task_key_dynamic_key ON task_run(task_key, dynamic_key)", .{});

··· 34 \\ state_timestamp TEXT, 35 \\ run_count BIGINT DEFAULT 0, 36 \\ expected_start_time TEXT, 37 + \\ next_scheduled_start_time TEXT, 38 \\ start_time TEXT, 39 \\ end_time TEXT, 40 \\ total_run_time DOUBLE PRECISION DEFAULT 0.0, ··· 42 \\ deployment_version TEXT, 43 \\ work_queue_name TEXT, 44 \\ work_queue_id TEXT, 45 + \\ auto_scheduled INTEGER DEFAULT 0, 46 + \\ idempotency_key TEXT 47 \\) 48 , .{}); 49 ··· 266 // indexes 267 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__state_type ON flow_run(state_type)", .{}); 268 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__flow_id ON flow_run(flow_id)", .{}); 269 + try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__next_scheduled_start_time ON flow_run(next_scheduled_start_time)", .{}); 270 + try backend.db.exec("CREATE UNIQUE INDEX IF NOT EXISTS uq_flow_run__flow_id_idempotency_key ON flow_run(flow_id, idempotency_key) WHERE idempotency_key IS NOT NULL", .{}); 271 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run_state__flow_run_id ON flow_run_state(flow_run_id)", .{}); 272 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__flow_run_id ON task_run(flow_run_id)", .{}); 273 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__task_key_dynamic_key ON task_run(task_key, dynamic_key)", .{});

+5 -1

src/db/schema/sqlite.zig

··· 31 \\ state_timestamp TEXT, 32 \\ run_count INTEGER DEFAULT 0, 33 \\ expected_start_time TEXT, 34 \\ start_time TEXT, 35 \\ end_time TEXT, 36 \\ total_run_time REAL DEFAULT 0.0, ··· 38 \\ deployment_version TEXT, 39 \\ work_queue_name TEXT, 40 \\ work_queue_id TEXT, 41 - \\ auto_scheduled INTEGER DEFAULT 0 42 \\) 43 , .{}); 44 ··· 257 // indexes 258 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__state_type ON flow_run(state_type)", .{}); 259 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__flow_id ON flow_run(flow_id)", .{}); 260 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run_state__flow_run_id ON flow_run_state(flow_run_id)", .{}); 261 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__flow_run_id ON task_run(flow_run_id)", .{}); 262 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__task_key_dynamic_key ON task_run(task_key, dynamic_key)", .{});

··· 31 \\ state_timestamp TEXT, 32 \\ run_count INTEGER DEFAULT 0, 33 \\ expected_start_time TEXT, 34 + \\ next_scheduled_start_time TEXT, 35 \\ start_time TEXT, 36 \\ end_time TEXT, 37 \\ total_run_time REAL DEFAULT 0.0, ··· 39 \\ deployment_version TEXT, 40 \\ work_queue_name TEXT, 41 \\ work_queue_id TEXT, 42 + \\ auto_scheduled INTEGER DEFAULT 0, 43 + \\ idempotency_key TEXT 44 \\) 45 , .{}); 46 ··· 259 // indexes 260 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__state_type ON flow_run(state_type)", .{}); 261 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__flow_id ON flow_run(flow_id)", .{}); 262 + try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run__next_scheduled_start_time ON flow_run(next_scheduled_start_time)", .{}); 263 + try backend.db.exec("CREATE UNIQUE INDEX IF NOT EXISTS uq_flow_run__flow_id_idempotency_key ON flow_run(flow_id, idempotency_key) WHERE idempotency_key IS NOT NULL", .{}); 264 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_flow_run_state__flow_run_id ON flow_run_state(flow_run_id)", .{}); 265 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__flow_run_id ON task_run(flow_run_id)", .{}); 266 try backend.db.exec("CREATE INDEX IF NOT EXISTS ix_task_run__task_key_dynamic_key ON task_run(task_key, dynamic_key)", .{});

+37 -53

src/db/task_runs.zig

··· 41 } 42 43 pub fn get(alloc: Allocator, id: []const u8) !?TaskRunRow { 44 - var r = backend.db.row( 45 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 46 \\ state_type, state_name, state_timestamp, tags, run_count 47 \\FROM task_run WHERE id = ? 48 , .{id}) catch return null; 49 50 - if (r) |*row| { 51 - defer row.deinit(); 52 - return TaskRunRow{ 53 - .id = try alloc.dupe(u8, row.text(0)), 54 - .created = try alloc.dupe(u8, row.text(1)), 55 - .updated = try alloc.dupe(u8, row.text(2)), 56 - .flow_run_id = try alloc.dupe(u8, row.text(3)), 57 - .name = try alloc.dupe(u8, row.text(4)), 58 - .task_key = try alloc.dupe(u8, row.text(5)), 59 - .dynamic_key = try alloc.dupe(u8, row.text(6)), 60 - .state_type = try alloc.dupe(u8, row.text(7)), 61 - .state_name = try alloc.dupe(u8, row.text(8)), 62 - .state_timestamp = try alloc.dupe(u8, row.text(9)), 63 - .tags = try alloc.dupe(u8, row.text(10)), 64 - .run_count = row.bigint(11), 65 - }; 66 } 67 return null; 68 } ··· 73 task_key: []const u8, 74 dynamic_key: []const u8, 75 ) !?TaskRunRow { 76 - var r = if (flow_run_id) |frid| 77 - backend.db.row( 78 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 79 \\ state_type, state_name, state_timestamp, tags, run_count 80 \\FROM task_run WHERE flow_run_id = ? AND task_key = ? AND dynamic_key = ? 81 - , .{ frid, task_key, dynamic_key }) catch return null 82 - else 83 - backend.db.row( 84 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 85 \\ state_type, state_name, state_timestamp, tags, run_count 86 \\FROM task_run WHERE flow_run_id IS NULL AND task_key = ? AND dynamic_key = ? 87 , .{ task_key, dynamic_key }) catch return null; 88 89 - if (r) |*row| { 90 - defer row.deinit(); 91 - return TaskRunRow{ 92 - .id = try alloc.dupe(u8, row.text(0)), 93 - .created = try alloc.dupe(u8, row.text(1)), 94 - .updated = try alloc.dupe(u8, row.text(2)), 95 - .flow_run_id = try alloc.dupe(u8, row.text(3)), 96 - .name = try alloc.dupe(u8, row.text(4)), 97 - .task_key = try alloc.dupe(u8, row.text(5)), 98 - .dynamic_key = try alloc.dupe(u8, row.text(6)), 99 - .state_type = try alloc.dupe(u8, row.text(7)), 100 - .state_name = try alloc.dupe(u8, row.text(8)), 101 - .state_timestamp = try alloc.dupe(u8, row.text(9)), 102 - .tags = try alloc.dupe(u8, row.text(10)), 103 - .run_count = row.bigint(11), 104 - }; 105 } 106 return null; 107 } ··· 136 }; 137 defer rows.deinit(); 138 139 - while (rows.next()) |r| { 140 - try results.append(alloc, .{ 141 - .id = try alloc.dupe(u8, r.text(0)), 142 - .created = try alloc.dupe(u8, r.text(1)), 143 - .updated = try alloc.dupe(u8, r.text(2)), 144 - .flow_run_id = try alloc.dupe(u8, r.text(3)), 145 - .name = try alloc.dupe(u8, r.text(4)), 146 - .task_key = try alloc.dupe(u8, r.text(5)), 147 - .dynamic_key = try alloc.dupe(u8, r.text(6)), 148 - .state_type = try alloc.dupe(u8, r.text(7)), 149 - .state_name = try alloc.dupe(u8, r.text(8)), 150 - .state_timestamp = try alloc.dupe(u8, r.text(9)), 151 - .tags = try alloc.dupe(u8, r.text(10)), 152 - .run_count = r.bigint(11), 153 - }); 154 } 155 156 return results.toOwnedSlice(alloc); 157 }

··· 41 } 42 43 pub fn get(alloc: Allocator, id: []const u8) !?TaskRunRow { 44 + var rows = backend.db.query( 45 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 46 \\ state_type, state_name, state_timestamp, tags, run_count 47 \\FROM task_run WHERE id = ? 48 , .{id}) catch return null; 49 + defer rows.deinit(); 50 51 + if (rows.next()) |row| { 52 + return rowToTaskRun(alloc, row); 53 } 54 return null; 55 } ··· 60 task_key: []const u8, 61 dynamic_key: []const u8, 62 ) !?TaskRunRow { 63 + if (flow_run_id) |frid| { 64 + var rows = backend.db.query( 65 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 66 \\ state_type, state_name, state_timestamp, tags, run_count 67 \\FROM task_run WHERE flow_run_id = ? AND task_key = ? AND dynamic_key = ? 68 + , .{ frid, task_key, dynamic_key }) catch return null; 69 + defer rows.deinit(); 70 + 71 + if (rows.next()) |row| { 72 + return rowToTaskRun(alloc, row); 73 + } 74 + } else { 75 + var rows = backend.db.query( 76 \\SELECT id, created, updated, flow_run_id, name, task_key, dynamic_key, 77 \\ state_type, state_name, state_timestamp, tags, run_count 78 \\FROM task_run WHERE flow_run_id IS NULL AND task_key = ? AND dynamic_key = ? 79 , .{ task_key, dynamic_key }) catch return null; 80 + defer rows.deinit(); 81 82 + if (rows.next()) |row| { 83 + return rowToTaskRun(alloc, row); 84 + } 85 } 86 return null; 87 } ··· 116 }; 117 defer rows.deinit(); 118 119 + while (rows.next()) |row| { 120 + try results.append(alloc, rowToTaskRun(alloc, row)); 121 } 122 123 return results.toOwnedSlice(alloc); 124 } 125 + 126 + fn rowToTaskRun(alloc: Allocator, row: anytype) TaskRunRow { 127 + return .{ 128 + .id = alloc.dupe(u8, row.text(0)) catch "", 129 + .created = alloc.dupe(u8, row.text(1)) catch "", 130 + .updated = alloc.dupe(u8, row.text(2)) catch "", 131 + .flow_run_id = alloc.dupe(u8, row.text(3)) catch "", 132 + .name = alloc.dupe(u8, row.text(4)) catch "", 133 + .task_key = alloc.dupe(u8, row.text(5)) catch "", 134 + .dynamic_key = alloc.dupe(u8, row.text(6)) catch "", 135 + .state_type = alloc.dupe(u8, row.text(7)) catch "", 136 + .state_name = alloc.dupe(u8, row.text(8)) catch "", 137 + .state_timestamp = alloc.dupe(u8, row.text(9)) catch "", 138 + .tags = alloc.dupe(u8, row.text(10)) catch "[]", 139 + .run_count = row.bigint(11), 140 + }; 141 + }

+1 -1

src/db/variables.zig

··· 139 var r = backend.db.row("SELECT COUNT(*) FROM variable", .{}) catch return 0; 140 if (r) |*row| { 141 defer row.deinit(); 142 - return @intCast(row.int(0)); 143 } 144 return 0; 145 }

··· 139 var r = backend.db.row("SELECT COUNT(*) FROM variable", .{}) catch return 0; 140 if (r) |*row| { 141 defer row.deinit(); 142 + return @intCast(row.bigint(0)); 143 } 144 return 0; 145 }

+2 -2

src/db/work_pools.zig

··· 218 var r = backend.db.row("SELECT COUNT(*) FROM work_pool", .{}) catch return 0; 219 if (r) |*row| { 220 defer row.deinit(); 221 - return @intCast(row.int(0)); 222 } 223 return 0; 224 } ··· 231 ) catch return false; 232 if (r) |*row| { 233 defer row.deinit(); 234 - return row.int(0) > 0; 235 } 236 return false; 237 }

··· 218 var r = backend.db.row("SELECT COUNT(*) FROM work_pool", .{}) catch return 0; 219 if (r) |*row| { 220 defer row.deinit(); 221 + return @intCast(row.bigint(0)); 222 } 223 return 0; 224 } ··· 231 ) catch return false; 232 if (r) |*row| { 233 defer row.deinit(); 234 + return row.bigint(0) > 0; 235 } 236 return false; 237 }

+1 -1

src/db/work_queues.zig

··· 208 var r = backend.db.row("SELECT COUNT(*) FROM work_queue WHERE work_pool_id = ?", .{pool_id}) catch return 0; 209 if (r) |*row| { 210 defer row.deinit(); 211 - return @intCast(row.int(0)); 212 } 213 return 0; 214 }

··· 208 var r = backend.db.row("SELECT COUNT(*) FROM work_queue WHERE work_pool_id = ?", .{pool_id}) catch return 0; 209 if (r) |*row| { 210 defer row.deinit(); 211 + return @intCast(row.bigint(0)); 212 } 213 return 0; 214 }

+2 -2

src/db/workers.zig

··· 176 var r = backend.db.row("SELECT COUNT(*) FROM worker WHERE work_pool_id = ?", .{pool_id}) catch return 0; 177 if (r) |*row| { 178 defer row.deinit(); 179 - return @intCast(row.int(0)); 180 } 181 return 0; 182 } ··· 188 ) catch return 0; 189 if (r) |*row| { 190 defer row.deinit(); 191 - return @intCast(row.int(0)); 192 } 193 return 0; 194 }

··· 176 var r = backend.db.row("SELECT COUNT(*) FROM worker WHERE work_pool_id = ?", .{pool_id}) catch return 0; 177 if (r) |*row| { 178 defer row.deinit(); 179 + return @intCast(row.bigint(0)); 180 } 181 return 0; 182 } ··· 188 ) catch return 0; 189 if (r) |*row| { 190 defer row.deinit(); 191 + return @intCast(row.bigint(0)); 192 } 193 return 0; 194 }

+6 -2

src/main.zig

··· 4 5 const db = @import("db/sqlite.zig"); 6 const backend = @import("db/backend.zig"); 7 - const broker = @import("broker/mod.zig"); 8 const routes = @import("api/routes.zig"); 9 const events = @import("api/events.zig"); 10 const log = @import("logging.zig"); 11 - const services = @import("services/mod.zig"); 12 13 fn onRequest(r: zap.Request) !void { 14 const method = r.method orelse "?"; ··· 81 _ = @import("db/backend.zig"); 82 _ = @import("db/dialect.zig"); 83 _ = @import("utilities/hashing.zig"); 84 }

··· 4 5 const db = @import("db/sqlite.zig"); 6 const backend = @import("db/backend.zig"); 7 + const broker = @import("broker.zig"); 8 const routes = @import("api/routes.zig"); 9 const events = @import("api/events.zig"); 10 const log = @import("logging.zig"); 11 + const services = @import("services.zig"); 12 13 fn onRequest(r: zap.Request) !void { 14 const method = r.method orelse "?"; ··· 81 _ = @import("db/backend.zig"); 82 _ = @import("db/dialect.zig"); 83 _ = @import("utilities/hashing.zig"); 84 + _ = @import("utilities/time.zig"); 85 + _ = @import("orchestration/types.zig"); 86 + _ = @import("orchestration/flow_rules.zig"); 87 + _ = @import("orchestration/transforms.zig"); 88 }

+31

src/orchestration.zig

···

··· 1 + // orchestration.zig - state transition orchestration 2 + // 3 + // re-exports from submodules for convenient access. 4 + // 5 + // structure: 6 + // - orchestration/types.zig: core types (StateType, ResponseStatus, etc.) 7 + // - orchestration/rules.zig: rule abstraction (OrchestrationRule, RuleContext, applyPolicy) 8 + // - orchestration/flow_rules.zig: flow run rules (PreventPendingTransitions, CopyScheduledTime, WaitForScheduledTime, CoreFlowPolicy) 9 + // - orchestration/transforms.zig: bookkeeping transforms (TransitionContext, applyBookkeeping) 10 + 11 + // types 12 + pub const StateType = @import("orchestration/types.zig").StateType; 13 + pub const StateTypeSet = @import("orchestration/types.zig").StateTypeSet; 14 + pub const ResponseStatus = @import("orchestration/types.zig").ResponseStatus; 15 + pub const ResponseDetails = @import("orchestration/types.zig").ResponseDetails; 16 + pub const OrchestrationResult = @import("orchestration/types.zig").OrchestrationResult; 17 + 18 + // rules 19 + pub const OrchestrationRule = @import("orchestration/rules.zig").OrchestrationRule; 20 + pub const RuleContext = @import("orchestration/rules.zig").RuleContext; 21 + pub const applyPolicy = @import("orchestration/rules.zig").applyPolicy; 22 + 23 + // flow rules 24 + pub const PreventPendingTransitions = @import("orchestration/flow_rules.zig").PreventPendingTransitions; 25 + pub const CopyScheduledTime = @import("orchestration/flow_rules.zig").CopyScheduledTime; 26 + pub const WaitForScheduledTime = @import("orchestration/flow_rules.zig").WaitForScheduledTime; 27 + pub const CoreFlowPolicy = @import("orchestration/flow_rules.zig").CoreFlowPolicy; 28 + 29 + // transforms 30 + pub const TransitionContext = @import("orchestration/transforms.zig").TransitionContext; 31 + pub const applyBookkeeping = @import("orchestration/transforms.zig").applyBookkeeping;

+390

src/orchestration/flow_rules.zig

···

··· 1 + // flow_rules.zig - orchestration rules for flow runs 2 + // 3 + // rules that apply to flow run state transitions 4 + 5 + const std = @import("std"); 6 + const rules = @import("rules.zig"); 7 + const time_util = @import("../utilities/time.zig"); 8 + 9 + const OrchestrationRule = rules.OrchestrationRule; 10 + const RuleContext = rules.RuleContext; 11 + const StateTypeSet = rules.StateTypeSet; 12 + 13 + // ============================================================================ 14 + // PreventPendingTransitions 15 + // ============================================================================ 16 + 17 + /// PreventPendingTransitions: prevent invalid transitions to PENDING state 18 + /// 19 + /// this rule prevents race conditions by rejecting transitions from 20 + /// PENDING, RUNNING, CANCELLING, or CANCELLED to PENDING. 21 + /// 22 + /// allowed: SCHEDULED → PENDING (normal scheduled run start) 23 + /// allowed: null → PENDING (initial run creation) 24 + /// rejected: PENDING → PENDING, RUNNING → PENDING, etc. 25 + pub const PreventPendingTransitions = OrchestrationRule{ 26 + .name = "PreventPendingTransitions", 27 + .from_states = StateTypeSet.init(&.{ .PENDING, .RUNNING, .CANCELLING, .CANCELLED }), 28 + .to_states = StateTypeSet.init(&.{.PENDING}), 29 + .before_transition = preventPendingTransitionsFn, 30 + }; 31 + 32 + fn preventPendingTransitionsFn(ctx: *RuleContext) void { 33 + // if we get here, the transition matches FROM_STATES and TO_STATES 34 + // so we should reject it 35 + ctx.reject("Cannot transition to PENDING from current state"); 36 + } 37 + 38 + // ============================================================================ 39 + // CopyScheduledTime 40 + // ============================================================================ 41 + 42 + /// CopyScheduledTime: copy scheduled_time from SCHEDULED state to flow run 43 + /// 44 + /// when transitioning SCHEDULED → PENDING, this rule copies the scheduled_time 45 + /// from the SCHEDULED state to expected_start_time on the flow run. 46 + /// 47 + /// if the proposed PENDING state already has a scheduled_time, it is preserved 48 + /// (allows overriding the scheduled time during transition). 49 + pub const CopyScheduledTime = OrchestrationRule{ 50 + .name = "CopyScheduledTime", 51 + .from_states = StateTypeSet.init(&.{.SCHEDULED}), 52 + .to_states = StateTypeSet.init(&.{.PENDING}), 53 + .before_transition = copyScheduledTimeFn, 54 + }; 55 + 56 + fn copyScheduledTimeFn(ctx: *RuleContext) void { 57 + // if proposed state already has a scheduled_time, don't override 58 + if (ctx.proposed_scheduled_time != null) { 59 + ctx.new_expected_start_time = ctx.proposed_scheduled_time; 60 + return; 61 + } 62 + 63 + // copy from initial SCHEDULED state's scheduled_time 64 + if (ctx.initial_scheduled_time) |scheduled_time| { 65 + ctx.new_expected_start_time = scheduled_time; 66 + } 67 + } 68 + 69 + // ============================================================================ 70 + // WaitForScheduledTime 71 + // ============================================================================ 72 + 73 + /// WaitForScheduledTime: delay transitions to RUNNING if scheduled_time is in the future 74 + /// 75 + /// this rule prevents runs from starting before their scheduled time. 76 + /// when transitioning SCHEDULED/PENDING → RUNNING, if initial_scheduled_time 77 + /// is in the future, the transition is delayed with WAIT status and retry_after. 78 + /// 79 + /// the client should retry the transition after the delay. 80 + pub const WaitForScheduledTime = OrchestrationRule{ 81 + .name = "WaitForScheduledTime", 82 + .from_states = StateTypeSet.init(&.{ .SCHEDULED, .PENDING }), 83 + .to_states = StateTypeSet.init(&.{.RUNNING}), 84 + .before_transition = waitForScheduledTimeFn, 85 + }; 86 + 87 + fn waitForScheduledTimeFn(ctx: *RuleContext) void { 88 + const scheduled_time = ctx.initial_scheduled_time orelse return; 89 + 90 + // parse scheduled_time to microseconds since epoch 91 + const scheduled_us = time_util.parse(scheduled_time) orelse return; 92 + 93 + // get current time in microseconds since epoch 94 + const now_us = time_util.nowMicros(); 95 + 96 + // if scheduled_time is in the future, delay the transition 97 + if (scheduled_us > now_us) { 98 + const delay_us = scheduled_us - now_us; 99 + const delay_seconds = @as(f64, @floatFromInt(delay_us)) / 1_000_000.0; 100 + ctx.wait("Scheduled time is in the future", delay_seconds); 101 + } 102 + } 103 + 104 + // ============================================================================ 105 + // CoreFlowPolicy 106 + // ============================================================================ 107 + 108 + /// CoreFlowPolicy: ordered list of rules for flow run state transitions 109 + pub const CoreFlowPolicy = [_]OrchestrationRule{ 110 + PreventPendingTransitions, 111 + CopyScheduledTime, 112 + WaitForScheduledTime, 113 + // future rules will be added here in priority order: 114 + // PreventDuplicateTransitions, 115 + // HandleFlowTerminalStateTransitions, 116 + // RetryFailedFlows, 117 + // etc. 118 + }; 119 + 120 + // ============================================================================ 121 + // Tests 122 + // ============================================================================ 123 + 124 + test "OrchestrationRule.appliesTo" { 125 + const testing = std.testing; 126 + 127 + // PreventPendingTransitions applies to PENDING/RUNNING/CANCELLING/CANCELLED → PENDING 128 + try testing.expect(PreventPendingTransitions.appliesTo(.PENDING, .PENDING)); 129 + try testing.expect(PreventPendingTransitions.appliesTo(.RUNNING, .PENDING)); 130 + try testing.expect(PreventPendingTransitions.appliesTo(.CANCELLING, .PENDING)); 131 + try testing.expect(PreventPendingTransitions.appliesTo(.CANCELLED, .PENDING)); 132 + 133 + // does NOT apply to SCHEDULED → PENDING (allowed transition) 134 + try testing.expect(!PreventPendingTransitions.appliesTo(.SCHEDULED, .PENDING)); 135 + 136 + // does NOT apply to null → PENDING (initial creation) 137 + try testing.expect(!PreventPendingTransitions.appliesTo(null, .PENDING)); 138 + 139 + // does NOT apply when going to non-PENDING states 140 + try testing.expect(!PreventPendingTransitions.appliesTo(.PENDING, .RUNNING)); 141 + try testing.expect(!PreventPendingTransitions.appliesTo(.RUNNING, .COMPLETED)); 142 + } 143 + 144 + test "PreventPendingTransitions rejects PENDING → PENDING" { 145 + const testing = std.testing; 146 + 147 + var ctx = RuleContext{ 148 + .initial_state = .PENDING, 149 + .proposed_state = .PENDING, 150 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 151 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 152 + .run_id = "test-run-id", 153 + }; 154 + 155 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 156 + 157 + try testing.expectEqual(rules.ResponseStatus.REJECT, ctx.result.status); 158 + try testing.expect(ctx.result.details.reason != null); 159 + } 160 + 161 + test "PreventPendingTransitions rejects RUNNING → PENDING" { 162 + const testing = std.testing; 163 + 164 + var ctx = RuleContext{ 165 + .initial_state = .RUNNING, 166 + .proposed_state = .PENDING, 167 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 168 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 169 + .run_id = "test-run-id", 170 + }; 171 + 172 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 173 + 174 + try testing.expectEqual(rules.ResponseStatus.REJECT, ctx.result.status); 175 + } 176 + 177 + test "PreventPendingTransitions allows SCHEDULED → PENDING" { 178 + const testing = std.testing; 179 + 180 + var ctx = RuleContext{ 181 + .initial_state = .SCHEDULED, 182 + .proposed_state = .PENDING, 183 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 184 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 185 + .run_id = "test-run-id", 186 + }; 187 + 188 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 189 + 190 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 191 + } 192 + 193 + test "PreventPendingTransitions allows null → PENDING (initial creation)" { 194 + const testing = std.testing; 195 + 196 + var ctx = RuleContext{ 197 + .initial_state = null, 198 + .proposed_state = .PENDING, 199 + .initial_state_timestamp = null, 200 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 201 + .run_id = "test-run-id", 202 + }; 203 + 204 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 205 + 206 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 207 + } 208 + 209 + test "PreventPendingTransitions allows PENDING → RUNNING" { 210 + const testing = std.testing; 211 + 212 + var ctx = RuleContext{ 213 + .initial_state = .PENDING, 214 + .proposed_state = .RUNNING, 215 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 216 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 217 + .run_id = "test-run-id", 218 + }; 219 + 220 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 221 + 222 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 223 + } 224 + 225 + // ============================================================================ 226 + // CopyScheduledTime Tests 227 + // ============================================================================ 228 + 229 + test "CopyScheduledTime.appliesTo" { 230 + const testing = std.testing; 231 + 232 + // applies to SCHEDULED → PENDING 233 + try testing.expect(CopyScheduledTime.appliesTo(.SCHEDULED, .PENDING)); 234 + 235 + // does NOT apply to other transitions 236 + try testing.expect(!CopyScheduledTime.appliesTo(.PENDING, .RUNNING)); 237 + try testing.expect(!CopyScheduledTime.appliesTo(.SCHEDULED, .RUNNING)); 238 + try testing.expect(!CopyScheduledTime.appliesTo(null, .PENDING)); 239 + } 240 + 241 + test "CopyScheduledTime copies initial_scheduled_time to new_expected_start_time" { 242 + const testing = std.testing; 243 + 244 + var ctx = RuleContext{ 245 + .initial_state = .SCHEDULED, 246 + .proposed_state = .PENDING, 247 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 248 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 249 + .initial_scheduled_time = "2024-01-20T10:00:00Z", 250 + .run_id = "test-run-id", 251 + }; 252 + 253 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 254 + 255 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 256 + try testing.expectEqualStrings("2024-01-20T10:00:00Z", ctx.new_expected_start_time.?); 257 + } 258 + 259 + test "CopyScheduledTime preserves proposed_scheduled_time if set" { 260 + const testing = std.testing; 261 + 262 + var ctx = RuleContext{ 263 + .initial_state = .SCHEDULED, 264 + .proposed_state = .PENDING, 265 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 266 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 267 + .initial_scheduled_time = "2024-01-20T10:00:00Z", 268 + .proposed_scheduled_time = "2024-01-20T12:00:00Z", // override 269 + .run_id = "test-run-id", 270 + }; 271 + 272 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 273 + 274 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 275 + // should use proposed_scheduled_time, not initial 276 + try testing.expectEqualStrings("2024-01-20T12:00:00Z", ctx.new_expected_start_time.?); 277 + } 278 + 279 + test "CopyScheduledTime handles missing scheduled_time gracefully" { 280 + const testing = std.testing; 281 + 282 + var ctx = RuleContext{ 283 + .initial_state = .SCHEDULED, 284 + .proposed_state = .PENDING, 285 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 286 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 287 + // no scheduled_time on either state 288 + .run_id = "test-run-id", 289 + }; 290 + 291 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 292 + 293 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 294 + try testing.expect(ctx.new_expected_start_time == null); 295 + } 296 + 297 + // ============================================================================ 298 + // WaitForScheduledTime Tests 299 + // ============================================================================ 300 + 301 + test "WaitForScheduledTime.appliesTo" { 302 + const testing = std.testing; 303 + 304 + // applies to SCHEDULED → RUNNING 305 + try testing.expect(WaitForScheduledTime.appliesTo(.SCHEDULED, .RUNNING)); 306 + 307 + // applies to PENDING → RUNNING 308 + try testing.expect(WaitForScheduledTime.appliesTo(.PENDING, .RUNNING)); 309 + 310 + // does NOT apply to other transitions 311 + try testing.expect(!WaitForScheduledTime.appliesTo(.SCHEDULED, .PENDING)); 312 + try testing.expect(!WaitForScheduledTime.appliesTo(.PENDING, .COMPLETED)); 313 + try testing.expect(!WaitForScheduledTime.appliesTo(.RUNNING, .COMPLETED)); 314 + try testing.expect(!WaitForScheduledTime.appliesTo(null, .RUNNING)); 315 + } 316 + 317 + test "WaitForScheduledTime allows transition when scheduled_time is in past" { 318 + const testing = std.testing; 319 + 320 + // use a time clearly in the past 321 + var ctx = RuleContext{ 322 + .initial_state = .PENDING, 323 + .proposed_state = .RUNNING, 324 + .initial_state_timestamp = "2020-01-01T00:00:00Z", 325 + .proposed_state_timestamp = "2020-01-01T00:00:01Z", 326 + .initial_scheduled_time = "2020-01-01T00:00:00Z", // past 327 + .run_id = "test-run-id", 328 + }; 329 + 330 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 331 + 332 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 333 + } 334 + 335 + test "WaitForScheduledTime delays transition when scheduled_time is in future" { 336 + const testing = std.testing; 337 + 338 + // use a time clearly in the future (year 2099) 339 + var ctx = RuleContext{ 340 + .initial_state = .PENDING, 341 + .proposed_state = .RUNNING, 342 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 343 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 344 + .initial_scheduled_time = "2099-01-01T00:00:00Z", // far future 345 + .run_id = "test-run-id", 346 + }; 347 + 348 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 349 + 350 + try testing.expectEqual(rules.ResponseStatus.WAIT, ctx.result.status); 351 + try testing.expect(ctx.result.details.reason != null); 352 + try testing.expect(ctx.result.details.retry_after != null); 353 + // retry_after should be positive (many seconds in the future) 354 + try testing.expect(ctx.result.details.retry_after.? > 0); 355 + } 356 + 357 + test "WaitForScheduledTime allows transition when no scheduled_time" { 358 + const testing = std.testing; 359 + 360 + var ctx = RuleContext{ 361 + .initial_state = .PENDING, 362 + .proposed_state = .RUNNING, 363 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 364 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 365 + // no scheduled_time 366 + .run_id = "test-run-id", 367 + }; 368 + 369 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 370 + 371 + try testing.expectEqual(rules.ResponseStatus.ACCEPT, ctx.result.status); 372 + } 373 + 374 + test "WaitForScheduledTime works with SCHEDULED → RUNNING transition" { 375 + const testing = std.testing; 376 + 377 + // future time should delay 378 + var ctx = RuleContext{ 379 + .initial_state = .SCHEDULED, 380 + .proposed_state = .RUNNING, 381 + .initial_state_timestamp = "2024-01-19T16:30:00Z", 382 + .proposed_state_timestamp = "2024-01-19T16:30:01Z", 383 + .initial_scheduled_time = "2099-06-15T12:00:00Z", // future 384 + .run_id = "test-run-id", 385 + }; 386 + 387 + rules.applyPolicy(&CoreFlowPolicy, &ctx); 388 + 389 + try testing.expectEqual(rules.ResponseStatus.WAIT, ctx.result.status); 390 + }

-218

src/orchestration/orchestration.zig

··· 1 - // orchestration.zig - bookkeeping transforms for state transitions 2 - // 3 - // implements the global bookkeeping logic from prefect server: 4 - // - SetStartTime: set start_time when first entering RUNNING 5 - // - SetEndTime: set end_time when entering terminal state 6 - // - IncrementRunTime: accumulate total_run_time when exiting RUNNING 7 - // - IncrementRunCount: increment run_count when entering RUNNING 8 - 9 - const std = @import("std"); 10 - const log = @import("../logging.zig"); 11 - 12 - pub const StateType = enum { 13 - PENDING, 14 - RUNNING, 15 - COMPLETED, 16 - FAILED, 17 - CANCELLED, 18 - CRASHED, 19 - PAUSED, 20 - CANCELLING, 21 - SCHEDULED, 22 - 23 - pub fn fromString(s: []const u8) StateType { 24 - if (std.mem.eql(u8, s, "RUNNING")) return .RUNNING; 25 - if (std.mem.eql(u8, s, "COMPLETED")) return .COMPLETED; 26 - if (std.mem.eql(u8, s, "FAILED")) return .FAILED; 27 - if (std.mem.eql(u8, s, "CANCELLED")) return .CANCELLED; 28 - if (std.mem.eql(u8, s, "CRASHED")) return .CRASHED; 29 - if (std.mem.eql(u8, s, "PAUSED")) return .PAUSED; 30 - if (std.mem.eql(u8, s, "CANCELLING")) return .CANCELLING; 31 - if (std.mem.eql(u8, s, "SCHEDULED")) return .SCHEDULED; 32 - return .PENDING; 33 - } 34 - 35 - pub fn isRunning(self: StateType) bool { 36 - return self == .RUNNING; 37 - } 38 - 39 - pub fn isFinal(self: StateType) bool { 40 - return switch (self) { 41 - .COMPLETED, .FAILED, .CANCELLED, .CRASHED => true, 42 - else => false, 43 - }; 44 - } 45 - }; 46 - 47 - /// context for a state transition, holding info needed for bookkeeping 48 - pub const TransitionContext = struct { 49 - // current run state (from db) 50 - current_state_type: ?StateType, 51 - current_state_timestamp: ?[]const u8, 52 - start_time: ?[]const u8, 53 - end_time: ?[]const u8, 54 - run_count: i64, 55 - total_run_time: f64, 56 - 57 - // proposed new state 58 - proposed_state_type: StateType, 59 - proposed_state_timestamp: []const u8, 60 - 61 - // output: updated values to write to db 62 - new_start_time: ?[]const u8 = null, 63 - new_end_time: ?[]const u8 = null, 64 - new_run_count: i64 = 0, 65 - new_total_run_time: f64 = 0.0, 66 - }; 67 - 68 - /// apply all bookkeeping transforms to a state transition 69 - pub fn applyBookkeeping(ctx: *TransitionContext) void { 70 - // copy current values as baseline 71 - ctx.new_start_time = ctx.start_time; 72 - ctx.new_end_time = ctx.end_time; 73 - ctx.new_run_count = ctx.run_count; 74 - ctx.new_total_run_time = ctx.total_run_time; 75 - 76 - // SetStartTime: record when first entering RUNNING 77 - if (ctx.proposed_state_type.isRunning() and ctx.start_time == null) { 78 - ctx.new_start_time = ctx.proposed_state_timestamp; 79 - log.debug("orchestration", "setting start_time to {s}", .{ctx.proposed_state_timestamp}); 80 - } 81 - 82 - // SetEndTime: record when entering terminal state 83 - if (ctx.proposed_state_type.isFinal()) { 84 - if (ctx.start_time != null and ctx.end_time == null) { 85 - ctx.new_end_time = ctx.proposed_state_timestamp; 86 - log.debug("orchestration", "setting end_time to {s}", .{ctx.proposed_state_timestamp}); 87 - } 88 - } 89 - // clear end_time if exiting final state for non-final state 90 - if (ctx.current_state_type) |current| { 91 - if (current.isFinal() and !ctx.proposed_state_type.isFinal()) { 92 - ctx.new_end_time = null; 93 - log.debug("orchestration", "clearing end_time (exiting terminal state)", .{}); 94 - } 95 - } 96 - 97 - // IncrementRunTime: accumulate time spent in RUNNING 98 - if (ctx.current_state_type) |current| { 99 - if (current.isRunning()) { 100 - if (ctx.current_state_timestamp) |start_ts| { 101 - const duration = computeDuration(start_ts, ctx.proposed_state_timestamp); 102 - ctx.new_total_run_time = ctx.total_run_time + duration; 103 - log.debug("orchestration", "adding {d:.3}s to total_run_time (now {d:.3}s)", .{ duration, ctx.new_total_run_time }); 104 - } 105 - } 106 - } 107 - 108 - // IncrementRunCount: bump count when entering RUNNING 109 - if (ctx.proposed_state_type.isRunning()) { 110 - ctx.new_run_count = ctx.run_count + 1; 111 - log.debug("orchestration", "incrementing run_count to {d}", .{ctx.new_run_count}); 112 - } 113 - } 114 - 115 - /// compute duration in seconds between two ISO8601 timestamps 116 - fn computeDuration(start: []const u8, end: []const u8) f64 { 117 - const start_epoch = parseTimestamp(start) orelse return 0.0; 118 - const end_epoch = parseTimestamp(end) orelse return 0.0; 119 - 120 - if (end_epoch >= start_epoch) { 121 - return @as(f64, @floatFromInt(end_epoch - start_epoch)) / 1_000_000_000.0; 122 - } 123 - return 0.0; 124 - } 125 - 126 - /// parse ISO8601 timestamp to nanoseconds since epoch 127 - /// supports: 2024-01-19T16:30:00.123456Z or 2024-01-19T16:30:00Z 128 - fn parseTimestamp(ts: []const u8) ?i128 { 129 - // minimal parsing for ISO8601 - extract components 130 - if (ts.len < 19) return null; 131 - 132 - const year = std.fmt.parseInt(i32, ts[0..4], 10) catch return null; 133 - const month = std.fmt.parseInt(u8, ts[5..7], 10) catch return null; 134 - const day = std.fmt.parseInt(u8, ts[8..10], 10) catch return null; 135 - const hour = std.fmt.parseInt(u8, ts[11..13], 10) catch return null; 136 - const minute = std.fmt.parseInt(u8, ts[14..16], 10) catch return null; 137 - const second = std.fmt.parseInt(u8, ts[17..19], 10) catch return null; 138 - 139 - // parse fractional seconds if present 140 - var nanos: i64 = 0; 141 - if (ts.len > 20 and ts[19] == '.') { 142 - var end: usize = 20; 143 - while (end < ts.len and ts[end] >= '0' and ts[end] <= '9') : (end += 1) {} 144 - const frac_str = ts[20..end]; 145 - if (frac_str.len > 0) { 146 - const frac = std.fmt.parseInt(i64, frac_str, 10) catch 0; 147 - // scale to nanoseconds (pad or truncate to 9 digits) 148 - const digits = frac_str.len; 149 - if (digits < 9) { 150 - var scale: i64 = 1; 151 - for (0..(9 - digits)) |_| scale *= 10; 152 - nanos = frac * scale; 153 - } else { 154 - nanos = frac; 155 - } 156 - } 157 - } 158 - 159 - // convert to epoch nanoseconds (simplified - ignores leap seconds) 160 - // days since epoch (1970-01-01) 161 - var days: i64 = 0; 162 - 163 - // years 164 - var y: i32 = 1970; 165 - while (y < year) : (y += 1) { 166 - days += if (isLeapYear(y)) 366 else 365; 167 - } 168 - 169 - // months 170 - const days_in_month = [_]u8{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; 171 - var m: u8 = 1; 172 - while (m < month) : (m += 1) { 173 - days += days_in_month[m - 1]; 174 - if (m == 2 and isLeapYear(year)) days += 1; 175 - } 176 - 177 - // days 178 - days += day - 1; 179 - 180 - // total nanoseconds 181 - const secs: i128 = @as(i128, days) * 86400 + @as(i128, hour) * 3600 + @as(i128, minute) * 60 + @as(i128, second); 182 - return secs * 1_000_000_000 + nanos; 183 - } 184 - 185 - fn isLeapYear(year: i32) bool { 186 - if (@mod(year, 400) == 0) return true; 187 - if (@mod(year, 100) == 0) return false; 188 - if (@mod(year, 4) == 0) return true; 189 - return false; 190 - } 191 - 192 - // tests 193 - test "StateType.fromString" { 194 - const testing = std.testing; 195 - try testing.expectEqual(StateType.RUNNING, StateType.fromString("RUNNING")); 196 - try testing.expectEqual(StateType.COMPLETED, StateType.fromString("COMPLETED")); 197 - try testing.expectEqual(StateType.PENDING, StateType.fromString("UNKNOWN")); 198 - } 199 - 200 - test "StateType.isFinal" { 201 - const testing = std.testing; 202 - try testing.expect(StateType.COMPLETED.isFinal()); 203 - try testing.expect(StateType.FAILED.isFinal()); 204 - try testing.expect(!StateType.RUNNING.isFinal()); 205 - try testing.expect(!StateType.PENDING.isFinal()); 206 - } 207 - 208 - test "computeDuration" { 209 - const testing = std.testing; 210 - const duration = computeDuration("2024-01-19T16:30:00Z", "2024-01-19T16:30:05Z"); 211 - try testing.expectApproxEqAbs(@as(f64, 5.0), duration, 0.001); 212 - } 213 - 214 - test "computeDuration with fractional seconds" { 215 - const testing = std.testing; 216 - const duration = computeDuration("2024-01-19T16:30:00.000000Z", "2024-01-19T16:30:01.500000Z"); 217 - try testing.expectApproxEqAbs(@as(f64, 1.5), duration, 0.001); 218 - }

···

+114

src/orchestration/rules.zig

···

··· 1 + // rules.zig - orchestration rule abstraction 2 + // 3 + // defines the rule interface and policy application logic 4 + 5 + const std = @import("std"); 6 + const log = @import("../logging.zig"); 7 + const types = @import("types.zig"); 8 + 9 + pub const StateType = types.StateType; 10 + pub const StateTypeSet = types.StateTypeSet; 11 + pub const ResponseStatus = types.ResponseStatus; 12 + pub const ResponseDetails = types.ResponseDetails; 13 + pub const OrchestrationResult = types.OrchestrationResult; 14 + 15 + /// context passed to orchestration rules during state transition 16 + pub const RuleContext = struct { 17 + // transition info 18 + initial_state: ?StateType, 19 + proposed_state: StateType, 20 + initial_state_timestamp: ?[]const u8, 21 + proposed_state_timestamp: []const u8, 22 + 23 + // scheduling info (for CopyScheduledTime, WaitForScheduledTime) 24 + initial_scheduled_time: ?[]const u8 = null, // next_scheduled_start_time from SCHEDULED state 25 + proposed_scheduled_time: ?[]const u8 = null, // scheduled_time from proposed state (if any) 26 + 27 + // run metadata (for rules that need it) 28 + run_id: []const u8, 29 + flow_id: ?[]const u8 = null, 30 + deployment_id: ?[]const u8 = null, 31 + 32 + // orchestration result (modified by rules) 33 + result: OrchestrationResult = .{}, 34 + 35 + // output: values to write back to db (set by rules) 36 + new_expected_start_time: ?[]const u8 = null, 37 + 38 + /// reject the transition with a reason 39 + pub fn reject(self: *RuleContext, reason: []const u8) void { 40 + self.result.status = .REJECT; 41 + self.result.details.reason = reason; 42 + log.debug("orchestration", "rule rejected transition: {s}", .{reason}); 43 + } 44 + 45 + /// delay the transition (client should retry) 46 + pub fn wait(self: *RuleContext, reason: []const u8, retry_after: f64) void { 47 + self.result.status = .WAIT; 48 + self.result.details.reason = reason; 49 + self.result.details.retry_after = retry_after; 50 + log.debug("orchestration", "rule delayed transition: {s} (retry after {d}s)", .{ reason, retry_after }); 51 + } 52 + 53 + /// abort the transition completely 54 + pub fn abort(self: *RuleContext, reason: []const u8) void { 55 + self.result.status = .ABORT; 56 + self.result.details.reason = reason; 57 + log.debug("orchestration", "rule aborted transition: {s}", .{reason}); 58 + } 59 + 60 + /// check if transition is still accepted (not yet rejected/waited/aborted) 61 + pub fn isAccepted(self: *const RuleContext) bool { 62 + return self.result.status == .ACCEPT; 63 + } 64 + }; 65 + 66 + /// an orchestration rule that can modify or reject state transitions 67 + pub const OrchestrationRule = struct { 68 + /// rule name for logging/debugging 69 + name: []const u8, 70 + /// which initial states this rule applies to 71 + from_states: StateTypeSet, 72 + /// which proposed states this rule applies to 73 + to_states: StateTypeSet, 74 + /// the rule implementation - called before state is committed 75 + before_transition: *const fn (*RuleContext) void, 76 + 77 + /// check if this rule applies to the given transition 78 + pub fn appliesTo(self: OrchestrationRule, initial: ?StateType, proposed: StateType) bool { 79 + const from_matches = if (initial) |s| 80 + self.from_states.contains(s) 81 + else 82 + self.from_states.containsNull(); 83 + 84 + const to_matches = self.to_states.contains(proposed); 85 + return from_matches and to_matches; 86 + } 87 + }; 88 + 89 + /// apply all applicable rules from a policy to a transition 90 + pub fn applyPolicy( 91 + policy: []const OrchestrationRule, 92 + ctx: *RuleContext, 93 + ) void { 94 + for (policy) |rule| { 95 + // skip rules that don't apply to this transition 96 + if (!rule.appliesTo(ctx.initial_state, ctx.proposed_state)) { 97 + continue; 98 + } 99 + 100 + log.debug("orchestration", "applying rule: {s}", .{rule.name}); 101 + 102 + // apply the rule 103 + rule.before_transition(ctx); 104 + 105 + // if rule rejected/waited/aborted, stop processing 106 + if (!ctx.isAccepted()) { 107 + log.debug("orchestration", "rule {s} stopped transition with status {s}", .{ 108 + rule.name, 109 + ctx.result.status.toString(), 110 + }); 111 + return; 112 + } 113 + } 114 + }

+109

src/orchestration/transforms.zig

···

··· 1 + // transforms.zig - global bookkeeping transforms 2 + // 3 + // universal transforms that run on every state transition: 4 + // - SetStartTime: set start_time when first entering RUNNING 5 + // - SetEndTime: set end_time when entering terminal state 6 + // - IncrementRunTime: accumulate total_run_time when exiting RUNNING 7 + // - IncrementRunCount: increment run_count when entering RUNNING 8 + 9 + const std = @import("std"); 10 + const log = @import("../logging.zig"); 11 + const time_util = @import("../utilities/time.zig"); 12 + const types = @import("types.zig"); 13 + 14 + const StateType = types.StateType; 15 + 16 + /// context for a state transition, holding info needed for bookkeeping 17 + pub const TransitionContext = struct { 18 + // current run state (from db) 19 + current_state_type: ?StateType, 20 + current_state_timestamp: ?[]const u8, 21 + start_time: ?[]const u8, 22 + end_time: ?[]const u8, 23 + run_count: i64, 24 + total_run_time: f64, 25 + 26 + // proposed new state 27 + proposed_state_type: StateType, 28 + proposed_state_timestamp: []const u8, 29 + 30 + // output: updated values to write to db 31 + new_start_time: ?[]const u8 = null, 32 + new_end_time: ?[]const u8 = null, 33 + new_run_count: i64 = 0, 34 + new_total_run_time: f64 = 0.0, 35 + }; 36 + 37 + /// apply all bookkeeping transforms to a state transition 38 + pub fn applyBookkeeping(ctx: *TransitionContext) void { 39 + // copy current values as baseline 40 + ctx.new_start_time = ctx.start_time; 41 + ctx.new_end_time = ctx.end_time; 42 + ctx.new_run_count = ctx.run_count; 43 + ctx.new_total_run_time = ctx.total_run_time; 44 + 45 + // SetStartTime: record when first entering RUNNING 46 + if (ctx.proposed_state_type.isRunning() and ctx.start_time == null) { 47 + ctx.new_start_time = ctx.proposed_state_timestamp; 48 + log.debug("orchestration", "setting start_time to {s}", .{ctx.proposed_state_timestamp}); 49 + } 50 + 51 + // SetEndTime: record when entering terminal state 52 + if (ctx.proposed_state_type.isFinal()) { 53 + if (ctx.start_time != null and ctx.end_time == null) { 54 + ctx.new_end_time = ctx.proposed_state_timestamp; 55 + log.debug("orchestration", "setting end_time to {s}", .{ctx.proposed_state_timestamp}); 56 + } 57 + } 58 + // clear end_time if exiting final state for non-final state 59 + if (ctx.current_state_type) |current| { 60 + if (current.isFinal() and !ctx.proposed_state_type.isFinal()) { 61 + ctx.new_end_time = null; 62 + log.debug("orchestration", "clearing end_time (exiting terminal state)", .{}); 63 + } 64 + } 65 + 66 + // IncrementRunTime: accumulate time spent in RUNNING 67 + if (ctx.current_state_type) |current| { 68 + if (current.isRunning()) { 69 + if (ctx.current_state_timestamp) |start_ts| { 70 + const duration = computeDuration(start_ts, ctx.proposed_state_timestamp); 71 + ctx.new_total_run_time = ctx.total_run_time + duration; 72 + log.debug("orchestration", "adding {d:.3}s to total_run_time (now {d:.3}s)", .{ duration, ctx.new_total_run_time }); 73 + } 74 + } 75 + } 76 + 77 + // IncrementRunCount: bump count when entering RUNNING 78 + if (ctx.proposed_state_type.isRunning()) { 79 + ctx.new_run_count = ctx.run_count + 1; 80 + log.debug("orchestration", "incrementing run_count to {d}", .{ctx.new_run_count}); 81 + } 82 + } 83 + 84 + /// compute duration in seconds between two ISO8601 timestamps 85 + fn computeDuration(start: []const u8, end: []const u8) f64 { 86 + const start_us = time_util.parse(start) orelse return 0.0; 87 + const end_us = time_util.parse(end) orelse return 0.0; 88 + 89 + if (end_us >= start_us) { 90 + return @as(f64, @floatFromInt(end_us - start_us)) / 1_000_000.0; 91 + } 92 + return 0.0; 93 + } 94 + 95 + // ============================================================================ 96 + // Tests 97 + // ============================================================================ 98 + 99 + test "computeDuration" { 100 + const testing = std.testing; 101 + const duration = computeDuration("2024-01-19T16:30:00Z", "2024-01-19T16:30:05Z"); 102 + try testing.expectApproxEqAbs(@as(f64, 5.0), duration, 0.001); 103 + } 104 + 105 + test "computeDuration with fractional seconds" { 106 + const testing = std.testing; 107 + const duration = computeDuration("2024-01-19T16:30:00.000000Z", "2024-01-19T16:30:01.500000Z"); 108 + try testing.expectApproxEqAbs(@as(f64, 1.5), duration, 0.001); 109 + }

+172

src/orchestration/types.zig

···

··· 1 + // types.zig - core orchestration types 2 + // 3 + // shared types used across the orchestration system 4 + 5 + const std = @import("std"); 6 + 7 + // ============================================================================ 8 + // Response Types 9 + // ============================================================================ 10 + 11 + /// orchestration response status - determines what happens to the state transition 12 + pub const ResponseStatus = enum { 13 + /// transition is accepted, proposed state will be committed 14 + ACCEPT, 15 + /// transition is rejected, current state is returned with reason 16 + REJECT, 17 + /// transition is delayed, client should retry after specified time 18 + WAIT, 19 + /// transition is aborted, no state change occurs 20 + ABORT, 21 + 22 + pub fn toString(self: ResponseStatus) []const u8 { 23 + return switch (self) { 24 + .ACCEPT => "ACCEPT", 25 + .REJECT => "REJECT", 26 + .WAIT => "WAIT", 27 + .ABORT => "ABORT", 28 + }; 29 + } 30 + }; 31 + 32 + /// details about why a transition was rejected/delayed/aborted 33 + pub const ResponseDetails = struct { 34 + reason: ?[]const u8 = null, 35 + /// for WAIT responses: seconds to wait before retry 36 + retry_after: ?f64 = null, 37 + }; 38 + 39 + /// result of applying orchestration rules to a state transition 40 + pub const OrchestrationResult = struct { 41 + status: ResponseStatus = .ACCEPT, 42 + details: ResponseDetails = .{}, 43 + /// the state to return (may be modified from proposed) 44 + state_type: ?[]const u8 = null, 45 + state_name: ?[]const u8 = null, 46 + }; 47 + 48 + // ============================================================================ 49 + // State Types 50 + // ============================================================================ 51 + 52 + pub const StateType = enum { 53 + PENDING, 54 + RUNNING, 55 + COMPLETED, 56 + FAILED, 57 + CANCELLED, 58 + CRASHED, 59 + PAUSED, 60 + CANCELLING, 61 + SCHEDULED, 62 + 63 + pub fn fromString(s: []const u8) StateType { 64 + if (std.mem.eql(u8, s, "RUNNING")) return .RUNNING; 65 + if (std.mem.eql(u8, s, "COMPLETED")) return .COMPLETED; 66 + if (std.mem.eql(u8, s, "FAILED")) return .FAILED; 67 + if (std.mem.eql(u8, s, "CANCELLED")) return .CANCELLED; 68 + if (std.mem.eql(u8, s, "CRASHED")) return .CRASHED; 69 + if (std.mem.eql(u8, s, "PAUSED")) return .PAUSED; 70 + if (std.mem.eql(u8, s, "CANCELLING")) return .CANCELLING; 71 + if (std.mem.eql(u8, s, "SCHEDULED")) return .SCHEDULED; 72 + return .PENDING; 73 + } 74 + 75 + pub fn isRunning(self: StateType) bool { 76 + return self == .RUNNING; 77 + } 78 + 79 + pub fn isFinal(self: StateType) bool { 80 + return switch (self) { 81 + .COMPLETED, .FAILED, .CANCELLED, .CRASHED => true, 82 + else => false, 83 + }; 84 + } 85 + }; 86 + 87 + /// set of state types for filtering when rules apply 88 + pub const StateTypeSet = struct { 89 + bits: u16 = 0, 90 + 91 + pub const ALL = StateTypeSet{ .bits = 0x1FF }; // all 9 states 92 + pub const NONE = StateTypeSet{ .bits = 0 }; 93 + 94 + pub fn init(states: []const StateType) StateTypeSet { 95 + var set = StateTypeSet{}; 96 + for (states) |s| { 97 + set.bits |= @as(u16, 1) << @intFromEnum(s); 98 + } 99 + return set; 100 + } 101 + 102 + pub fn contains(self: StateTypeSet, state: ?StateType) bool { 103 + if (state) |s| { 104 + return (self.bits & (@as(u16, 1) << @intFromEnum(s))) != 0; 105 + } 106 + return false; 107 + } 108 + 109 + pub fn containsNull(self: StateTypeSet) bool { 110 + // bit 9 represents null state (no current state) 111 + return (self.bits & 0x200) != 0; 112 + } 113 + 114 + pub fn withNull(self: StateTypeSet) StateTypeSet { 115 + return StateTypeSet{ .bits = self.bits | 0x200 }; 116 + } 117 + }; 118 + 119 + // ============================================================================ 120 + // Tests 121 + // ============================================================================ 122 + 123 + test "StateType.fromString" { 124 + const testing = std.testing; 125 + try testing.expectEqual(StateType.RUNNING, StateType.fromString("RUNNING")); 126 + try testing.expectEqual(StateType.COMPLETED, StateType.fromString("COMPLETED")); 127 + try testing.expectEqual(StateType.PENDING, StateType.fromString("UNKNOWN")); 128 + } 129 + 130 + test "StateType.isFinal" { 131 + const testing = std.testing; 132 + try testing.expect(StateType.COMPLETED.isFinal()); 133 + try testing.expect(StateType.FAILED.isFinal()); 134 + try testing.expect(!StateType.RUNNING.isFinal()); 135 + try testing.expect(!StateType.PENDING.isFinal()); 136 + } 137 + 138 + test "StateTypeSet.contains" { 139 + const testing = std.testing; 140 + 141 + const pending_running = StateTypeSet.init(&.{ .PENDING, .RUNNING }); 142 + try testing.expect(pending_running.contains(.PENDING)); 143 + try testing.expect(pending_running.contains(.RUNNING)); 144 + try testing.expect(!pending_running.contains(.COMPLETED)); 145 + try testing.expect(!pending_running.contains(.SCHEDULED)); 146 + try testing.expect(!pending_running.contains(null)); 147 + } 148 + 149 + test "StateTypeSet.ALL contains all states" { 150 + const testing = std.testing; 151 + 152 + try testing.expect(StateTypeSet.ALL.contains(.PENDING)); 153 + try testing.expect(StateTypeSet.ALL.contains(.RUNNING)); 154 + try testing.expect(StateTypeSet.ALL.contains(.COMPLETED)); 155 + try testing.expect(StateTypeSet.ALL.contains(.FAILED)); 156 + try testing.expect(StateTypeSet.ALL.contains(.CANCELLED)); 157 + try testing.expect(StateTypeSet.ALL.contains(.CRASHED)); 158 + try testing.expect(StateTypeSet.ALL.contains(.PAUSED)); 159 + try testing.expect(StateTypeSet.ALL.contains(.CANCELLING)); 160 + try testing.expect(StateTypeSet.ALL.contains(.SCHEDULED)); 161 + } 162 + 163 + test "StateTypeSet.withNull" { 164 + const testing = std.testing; 165 + 166 + const pending = StateTypeSet.init(&.{.PENDING}); 167 + try testing.expect(!pending.containsNull()); 168 + 169 + const pending_or_null = pending.withNull(); 170 + try testing.expect(pending_or_null.containsNull()); 171 + try testing.expect(pending_or_null.contains(.PENDING)); 172 + }

+1 -1

src/services/CLAUDE.md

··· 1 # services 2 3 - background workers managed by mod.zig (startAll/stopAll). 4 5 ## event_persister 6

··· 1 # services 2 3 + background workers managed by services.zig (startAll/stopAll). 4 5 ## event_persister 6

+1 -1

src/services/event_broadcaster.zig

··· 5 const Thread = std.Thread; 6 const Mutex = Thread.Mutex; 7 const log = @import("../logging.zig"); 8 - const broker = @import("../broker/mod.zig"); 9 const events_api = @import("../api/events.zig"); 10 const zap = @import("zap"); 11

··· 5 const Thread = std.Thread; 6 const Mutex = Thread.Mutex; 7 const log = @import("../logging.zig"); 8 + const broker = @import("../broker.zig"); 9 const events_api = @import("../api/events.zig"); 10 const zap = @import("zap"); 11

+1 -1

src/services/event_persister.zig

··· 2 const Thread = std.Thread; 3 const log = @import("../logging.zig"); 4 const messaging = @import("../utilities/messaging.zig"); 5 - const broker = @import("../broker/mod.zig"); 6 const db = @import("../db/sqlite.zig"); 7 const time_util = @import("../utilities/time.zig"); 8 const events_api = @import("../api/events.zig");

··· 2 const Thread = std.Thread; 3 const log = @import("../logging.zig"); 4 const messaging = @import("../utilities/messaging.zig"); 5 + const broker = @import("../broker.zig"); 6 const db = @import("../db/sqlite.zig"); 7 const time_util = @import("../utilities/time.zig"); 8 const events_api = @import("../api/events.zig");

+4 -4

src/services/mod.zig src/services.zig

··· 1 const std = @import("std"); 2 - const log = @import("../logging.zig"); 3 4 - pub const event_persister = @import("event_persister.zig"); 5 - pub const event_broadcaster = @import("event_broadcaster.zig"); 6 - pub const scheduler = @import("scheduler.zig"); 7 8 pub const Service = struct { 9 name: []const u8,

··· 1 const std = @import("std"); 2 + const log = @import("logging.zig"); 3 4 + pub const event_persister = @import("services/event_persister.zig"); 5 + pub const event_broadcaster = @import("services/event_broadcaster.zig"); 6 + pub const scheduler = @import("services/scheduler.zig"); 7 8 pub const Service = struct { 9 name: []const u8,

+177 -7

src/services/scheduler.zig

··· 1 const std = @import("std"); 2 const Thread = std.Thread; 3 const json = std.json; 4 const log = @import("../logging.zig"); 5 const db = @import("../db/sqlite.zig"); 6 const time_util = @import("../utilities/time.zig"); 7 const uuid_util = @import("../utilities/uuid.zig"); 8 9 - // Configuration 10 - const SCHEDULER_INTERVAL_MS: u64 = 30_000; // 30 seconds 11 const MAX_SCHEDULED_RUNS: usize = 50; // Per schedule per tick 12 const BATCH_SIZE: usize = 100; // Max schedules to evaluate per tick 13 14 var scheduler_thread: ?Thread = null; 15 var running: bool = false; 16 var mutex: Thread.Mutex = .{}; ··· 22 if (running) return; 23 running = true; 24 25 - log.info("scheduler", "starting (interval: {}ms, max_runs: {})", .{ SCHEDULER_INTERVAL_MS, MAX_SCHEDULED_RUNS }); 26 scheduler_thread = try Thread.spawn(.{}, schedulerLoop, .{}); 27 } 28 ··· 43 } 44 45 fn schedulerLoop() void { 46 while (running) { 47 evaluateSchedules() catch |err| { 48 log.err("scheduler", "error evaluating schedules: {}", .{err}); 49 }; 50 - Thread.sleep(SCHEDULER_INTERVAL_MS * std.time.ns_per_ms); 51 } 52 } 53 ··· 90 if (obj.get("interval")) |interval_val| { 91 return evaluateIntervalSchedule(alloc, schedule, deployment, interval_val, now, now_micros); 92 } 93 - // TODO: cron and rrule schedules 94 return 0; 95 } 96 97 fn evaluateIntervalSchedule( 98 alloc: std.mem.Allocator, 99 schedule: db.deployment_schedules.DeploymentScheduleRow, ··· 119 const max_runs: usize = if (schedule.max_scheduled_runs) |m| @intCast(m) else MAX_SCHEDULED_RUNS; 120 if (existing_count >= max_runs) return 0; 121 122 // Find latest scheduled time 123 var latest_scheduled_micros: i64 = now_micros; 124 for (existing_runs) |run| { ··· 137 const max_future = now_micros + (interval_micros * @as(i64, @intCast(max_runs))); 138 139 while (runs_created < max_runs - existing_count and next_time <= max_future) { 140 - // Create the scheduled flow run 141 var id_buf: [36]u8 = undefined; 142 const run_id = uuid_util.generate(&id_buf); 143 ··· 150 var scheduled_time_buf: [32]u8 = undefined; 151 const scheduled_time = time_util.formatMicros(&scheduled_time_buf, next_time); 152 153 - db.flow_runs.insert(run_id, deployment.flow_id, run_name, "SCHEDULED", "Scheduled", now, .{ 154 .deployment_id = deployment.id, 155 .deployment_version = deployment.version, 156 .work_queue_name = deployment.work_queue_name, 157 .work_queue_id = deployment.work_queue_id, 158 .auto_scheduled = true, 159 .expected_start_time = scheduled_time, 160 }) catch { 161 break; 162 }; ··· 167 168 return runs_created; 169 }

··· 1 const std = @import("std"); 2 const Thread = std.Thread; 3 const json = std.json; 4 + const cron = @import("cron"); 5 const log = @import("../logging.zig"); 6 const db = @import("../db/sqlite.zig"); 7 const time_util = @import("../utilities/time.zig"); 8 const uuid_util = @import("../utilities/uuid.zig"); 9 10 + // Configuration - can be overridden via env 11 const MAX_SCHEDULED_RUNS: usize = 50; // Per schedule per tick 12 const BATCH_SIZE: usize = 100; // Max schedules to evaluate per tick 13 14 + fn getSchedulerIntervalMs() u64 { 15 + const env_val = std.posix.getenv("PREFECT_SERVER_SERVICES_SCHEDULER_LOOP_SECONDS"); 16 + if (env_val) |val| { 17 + const seconds = std.fmt.parseInt(u64, val, 10) catch return 5_000; 18 + return seconds * 1000; 19 + } 20 + return 5_000; // Default 5 seconds (fast for dev, configurable for prod) 21 + } 22 + 23 var scheduler_thread: ?Thread = null; 24 var running: bool = false; 25 var mutex: Thread.Mutex = .{}; ··· 31 if (running) return; 32 running = true; 33 34 + const interval_ms = getSchedulerIntervalMs(); 35 + log.info("scheduler", "starting (interval: {}ms, max_runs: {})", .{ interval_ms, MAX_SCHEDULED_RUNS }); 36 scheduler_thread = try Thread.spawn(.{}, schedulerLoop, .{}); 37 } 38 ··· 53 } 54 55 fn schedulerLoop() void { 56 + const interval_ms = getSchedulerIntervalMs(); 57 while (running) { 58 evaluateSchedules() catch |err| { 59 log.err("scheduler", "error evaluating schedules: {}", .{err}); 60 }; 61 + Thread.sleep(interval_ms * std.time.ns_per_ms); 62 } 63 } 64 ··· 101 if (obj.get("interval")) |interval_val| { 102 return evaluateIntervalSchedule(alloc, schedule, deployment, interval_val, now, now_micros); 103 } 104 + if (obj.get("cron")) |cron_val| { 105 + return evaluateCronSchedule(alloc, schedule, deployment, cron_val, obj, now, now_micros); 106 + } 107 + // TODO: rrule schedules 108 return 0; 109 } 110 111 + /// Merge schedule parameters on top of deployment parameters. 112 + /// Schedule params override deployment params for matching keys. 113 + fn mergeParameters(alloc: std.mem.Allocator, deployment_params: []const u8, schedule_params: ?[]const u8) ![]const u8 { 114 + // If no schedule params, return deployment params as-is 115 + const sched_params = schedule_params orelse return deployment_params; 116 + if (sched_params.len == 0 or std.mem.eql(u8, sched_params, "{}")) { 117 + return deployment_params; 118 + } 119 + 120 + // Parse both as JSON objects 121 + const dep_parsed = json.parseFromSlice(json.Value, alloc, deployment_params, .{}) catch { 122 + return sched_params; // If deployment params invalid, use schedule params 123 + }; 124 + const sched_parsed = json.parseFromSlice(json.Value, alloc, sched_params, .{}) catch { 125 + return deployment_params; // If schedule params invalid, use deployment params 126 + }; 127 + 128 + // Both must be objects 129 + if (dep_parsed.value != .object or sched_parsed.value != .object) { 130 + return deployment_params; 131 + } 132 + 133 + // Create merged object: start with deployment, overlay schedule 134 + var merged = json.ObjectMap.init(alloc); 135 + 136 + // Copy deployment params 137 + var dep_iter = dep_parsed.value.object.iterator(); 138 + while (dep_iter.next()) |entry| { 139 + try merged.put(entry.key_ptr.*, entry.value_ptr.*); 140 + } 141 + 142 + // Overlay schedule params (overwriting any matching keys) 143 + var sched_iter = sched_parsed.value.object.iterator(); 144 + while (sched_iter.next()) |entry| { 145 + try merged.put(entry.key_ptr.*, entry.value_ptr.*); 146 + } 147 + 148 + // Stringify the merged object 149 + return std.fmt.allocPrint(alloc, "{f}", .{json.fmt(json.Value{ .object = merged }, .{})}) catch deployment_params; 150 + } 151 + 152 fn evaluateIntervalSchedule( 153 alloc: std.mem.Allocator, 154 schedule: db.deployment_schedules.DeploymentScheduleRow, ··· 174 const max_runs: usize = if (schedule.max_scheduled_runs) |m| @intCast(m) else MAX_SCHEDULED_RUNS; 175 if (existing_count >= max_runs) return 0; 176 177 + // Merge parameters: schedule params override deployment params 178 + const merged_params = try mergeParameters(alloc, deployment.parameters, schedule.parameters); 179 + 180 // Find latest scheduled time 181 var latest_scheduled_micros: i64 = now_micros; 182 for (existing_runs) |run| { ··· 195 const max_future = now_micros + (interval_micros * @as(i64, @intCast(max_runs))); 196 197 while (runs_created < max_runs - existing_count and next_time <= max_future) { 198 var id_buf: [36]u8 = undefined; 199 const run_id = uuid_util.generate(&id_buf); 200 ··· 207 var scheduled_time_buf: [32]u8 = undefined; 208 const scheduled_time = time_util.formatMicros(&scheduled_time_buf, next_time); 209 210 + // Generate idempotency key: "scheduled {deployment_id} {schedule_id} {timestamp}" 211 + // This matches Python's format exactly for consistency 212 + var idem_key_buf: [256]u8 = undefined; 213 + const idempotency_key = std.fmt.bufPrint(&idem_key_buf, "scheduled {s} {s} {s}", .{ 214 + deployment.id, 215 + schedule.id, 216 + scheduled_time, 217 + }) catch continue; 218 + 219 + // Use idempotent insert - silently ignores duplicates 220 + _ = db.flow_runs.insertOrIgnore(run_id, deployment.flow_id, run_name, "SCHEDULED", "Scheduled", now, .{ 221 .deployment_id = deployment.id, 222 .deployment_version = deployment.version, 223 .work_queue_name = deployment.work_queue_name, 224 .work_queue_id = deployment.work_queue_id, 225 .auto_scheduled = true, 226 .expected_start_time = scheduled_time, 227 + .next_scheduled_start_time = scheduled_time, 228 + .idempotency_key = idempotency_key, 229 + .parameters = merged_params, 230 }) catch { 231 break; 232 }; ··· 237 238 return runs_created; 239 } 240 + 241 + fn evaluateCronSchedule( 242 + alloc: std.mem.Allocator, 243 + schedule: db.deployment_schedules.DeploymentScheduleRow, 244 + deployment: db.deployments.DeploymentRow, 245 + cron_val: json.Value, 246 + obj: json.ObjectMap, 247 + now: []const u8, 248 + now_micros: i64, 249 + ) !usize { 250 + const cron_expr = switch (cron_val) { 251 + .string => |s| s, 252 + else => return 0, 253 + }; 254 + 255 + // Parse cron expression with optional day_or setting 256 + const day_or = if (obj.get("day_or")) |v| switch (v) { 257 + .bool => |b| b, 258 + else => true, 259 + } else true; 260 + 261 + const parsed_cron = cron.Cron.parseWithOptions(cron_expr, .{ .day_or = day_or }) catch |err| { 262 + log.err("scheduler", "invalid cron expression '{s}': {}", .{ cron_expr, err }); 263 + return 0; 264 + }; 265 + 266 + // Get existing scheduled runs for this deployment 267 + const existing_runs = db.flow_runs.getScheduledByDeployment(alloc, deployment.id, MAX_SCHEDULED_RUNS) catch return 0; 268 + const existing_count = existing_runs.len; 269 + 270 + // Respect max_scheduled_runs limit 271 + const max_runs: usize = if (schedule.max_scheduled_runs) |m| @intCast(m) else MAX_SCHEDULED_RUNS; 272 + if (existing_count >= max_runs) return 0; 273 + 274 + // Merge parameters: schedule params override deployment params 275 + const merged_params = try mergeParameters(alloc, deployment.parameters, schedule.parameters); 276 + 277 + // Find latest scheduled time 278 + var latest_scheduled_micros: i64 = now_micros; 279 + for (existing_runs) |run| { 280 + if (run.expected_start_time) |est| { 281 + if (time_util.parse(est)) |ts| { 282 + if (ts > latest_scheduled_micros) { 283 + latest_scheduled_micros = ts; 284 + } 285 + } 286 + } 287 + } 288 + 289 + // Use cron iterator to find next occurrences 290 + var iter = parsed_cron.iter(latest_scheduled_micros); 291 + var runs_created: usize = 0; 292 + 293 + // Limit how far into the future we schedule (24 hours for cron) 294 + const max_future = now_micros + (24 * 60 * 60 * 1_000_000); 295 + 296 + while (runs_created < max_runs - existing_count) { 297 + const next_time = iter.next() orelse break; 298 + if (next_time > max_future) break; 299 + 300 + var id_buf: [36]u8 = undefined; 301 + const run_id = uuid_util.generate(&id_buf); 302 + 303 + var name_buf: [64]u8 = undefined; 304 + const run_name = std.fmt.bufPrint(&name_buf, "{s}-{s}", .{ 305 + deployment.name[0..@min(deployment.name.len, 20)], 306 + run_id[0..8], 307 + }) catch "scheduled-run"; 308 + 309 + var scheduled_time_buf: [32]u8 = undefined; 310 + const scheduled_time = time_util.formatMicros(&scheduled_time_buf, next_time); 311 + 312 + // Generate idempotency key: "scheduled {deployment_id} {schedule_id} {timestamp}" 313 + var idem_key_buf: [256]u8 = undefined; 314 + const idempotency_key = std.fmt.bufPrint(&idem_key_buf, "scheduled {s} {s} {s}", .{ 315 + deployment.id, 316 + schedule.id, 317 + scheduled_time, 318 + }) catch continue; 319 + 320 + // Use idempotent insert - silently ignores duplicates 321 + _ = db.flow_runs.insertOrIgnore(run_id, deployment.flow_id, run_name, "SCHEDULED", "Scheduled", now, .{ 322 + .deployment_id = deployment.id, 323 + .deployment_version = deployment.version, 324 + .work_queue_name = deployment.work_queue_name, 325 + .work_queue_id = deployment.work_queue_id, 326 + .auto_scheduled = true, 327 + .expected_start_time = scheduled_time, 328 + .next_scheduled_start_time = scheduled_time, 329 + .idempotency_key = idempotency_key, 330 + .parameters = merged_params, 331 + }) catch { 332 + break; 333 + }; 334 + 335 + runs_created += 1; 336 + } 337 + 338 + return runs_created; 339 + }