···4141- **[`hydrant::backfill`]**: A dedicated worker that fetches full repository CAR files. Uses LIFO prioritization and adaptive concurrency to manage backfill load efficiently.
4242- **[`hydrant::api`]**: An Axum-based XRPC server implementing repository read methods (`getRecord`, `listRecords`) and system stats. It also provides a WebSocket event stream and management APIs:
4343 - `/filter` (`GET`/`PATCH`): Configure indexing mode, signals, and collection patterns.
4444- - `/repos` (`GET`/`PUT`/`DELETE`): Bulk repository management using NDJSON or JSON arrays.
4444+ - `/repos` (`GET`/`PUT`/`DELETE`): Bulk repository management using NDJSON or JSON arrays. Supports pagination and partitioning via query parameters.
4545- Persistence worker (in `src/main.rs`): Manages periodic background flushes of the LSM-tree and cursor state.
46464747### Lazy event inflation
+4-1
README.md
···94949595### repository management
96969797-- `GET /repos`: get an NDJSON stream of all repositories and their sync status.
9797+- `GET /repos`: get an NDJSON stream of repositories and their sync status. Supports pagination and filtering:
9898+ - `limit`: max results (default 100, max 1000)
9999+ - `cursor`: DID or u64 index ID depending on partition
100100+ - `partition`: `all` (default), `pending` (backfill queue), or `resync` (retries)
98101- `PUT /repos`: explicitly track repositories. accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same).
99102- `DELETE /repos`: untrack repositories. accepts an NDJSON body of `{"did": "..."}` (or JSON array of the same). optionally include `"deleteData": true` to also purge the repository from the database.
100103
+137-14
src/api/repos.rs
···4747 pub delete_data: bool,
4848}
49495050+#[derive(Deserialize)]
5151+pub struct GetReposParams {
5252+ pub limit: Option<usize>,
5353+ pub cursor: Option<String>,
5454+ pub partition: Option<String>,
5555+}
5656+5057pub async fn handle_get_repos(
5158 State(state): State<Arc<AppState>>,
5959+ Query(params): Query<GetReposParams>,
5260) -> Result<Response, (StatusCode, String)> {
5353- let repos_ks = state.db.repos.clone();
6161+ let limit = params.limit.unwrap_or(100).min(1000);
6262+ let partition = params.partition.unwrap_or_else(|| "all".to_string());
6363+6464+ let items = tokio::task::spawn_blocking(move || {
6565+ let db = &state.db;
6666+6767+ let results = match partition.as_str() {
6868+ "all" => {
6969+ let start_bound = if let Some(cursor) = params.cursor {
7070+ let did = jacquard::types::did::Did::new_owned(&cursor)
7171+ .map_err(|_| (StatusCode::BAD_REQUEST, "invalid cursor DID".to_string()))?;
7272+ let did_key = keys::repo_key(&did);
7373+ std::ops::Bound::Excluded(did_key)
7474+ } else {
7575+ std::ops::Bound::Unbounded
7676+ };
7777+7878+ let mut items = Vec::new();
7979+ for item in db
8080+ .repos
8181+ .range((start_bound, std::ops::Bound::Unbounded))
8282+ .take(limit)
8383+ {
8484+ let (k, v) = item
8585+ .into_inner()
8686+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
8787+ let repo_state = crate::db::deser_repo_state(&v)
8888+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
8989+ let did = crate::db::types::TrimmedDid::try_from(k.as_ref())
9090+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
9191+ .to_did();
9292+9393+ items.push(RepoResponse {
9494+ did: did.to_string(),
9595+ status: repo_state.status.to_string(),
9696+ tracked: repo_state.tracked,
9797+ rev: repo_state.rev.as_ref().map(|r| r.to_string()),
9898+ last_updated_at: repo_state.last_updated_at,
9999+ });
100100+ }
101101+ Ok::<_, (StatusCode, String)>(items)
102102+ }
103103+ "resync" => {
104104+ let start_bound = if let Some(cursor) = params.cursor {
105105+ let did = jacquard::types::did::Did::new_owned(&cursor)
106106+ .map_err(|_| (StatusCode::BAD_REQUEST, "invalid cursor DID".to_string()))?;
107107+ let did_key = keys::repo_key(&did);
108108+ std::ops::Bound::Excluded(did_key)
109109+ } else {
110110+ std::ops::Bound::Unbounded
111111+ };
112112+113113+ let mut items = Vec::new();
114114+ for item in db
115115+ .resync
116116+ .range((start_bound, std::ops::Bound::Unbounded))
117117+ .take(limit)
118118+ {
119119+ let (k, _) = item
120120+ .into_inner()
121121+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
122122+123123+ if let Ok(Some(v)) = db.repos.get(&k) {
124124+ let repo_state = crate::db::deser_repo_state(&v)
125125+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
126126+ let did = crate::db::types::TrimmedDid::try_from(k.as_ref())
127127+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
128128+ .to_did();
129129+130130+ items.push(RepoResponse {
131131+ did: did.to_string(),
132132+ status: repo_state.status.to_string(),
133133+ tracked: repo_state.tracked,
134134+ rev: repo_state.rev.as_ref().map(|r| r.to_string()),
135135+ last_updated_at: repo_state.last_updated_at,
136136+ });
137137+ }
138138+ }
139139+ Ok(items)
140140+ }
141141+ "pending" => {
142142+ let start_bound = if let Some(cursor) = params.cursor {
143143+ let id = cursor
144144+ .parse::<u64>()
145145+ .map_err(|_| (StatusCode::BAD_REQUEST, "invalid cursor id".to_string()))?;
146146+ std::ops::Bound::Excluded(id.to_be_bytes().to_vec())
147147+ } else {
148148+ std::ops::Bound::Unbounded
149149+ };
150150+151151+ let mut items = Vec::new();
152152+ for item in db
153153+ .pending
154154+ .range((start_bound, std::ops::Bound::Unbounded))
155155+ .take(limit)
156156+ {
157157+ let (_, did_key) = item
158158+ .into_inner()
159159+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
160160+161161+ if let Ok(Some(v)) = db.repos.get(&did_key) {
162162+ let repo_state = crate::db::deser_repo_state(&v)
163163+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
164164+ let did = crate::db::types::TrimmedDid::try_from(did_key.as_ref())
165165+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
166166+ .to_did();
167167+168168+ items.push(RepoResponse {
169169+ did: did.to_string(),
170170+ status: repo_state.status.to_string(),
171171+ tracked: repo_state.tracked,
172172+ rev: repo_state.rev.as_ref().map(|r| r.to_string()),
173173+ last_updated_at: repo_state.last_updated_at,
174174+ });
175175+ }
176176+ }
177177+ Ok(items)
178178+ }
179179+ _ => Err((StatusCode::BAD_REQUEST, "invalid partition".to_string())),
180180+ }?;
541815555- let stream = futures::stream::iter(repos_ks.prefix(&[]).filter_map(|item| {
5656- let (k, v) = item.into_inner().ok()?;
5757- let did_str = std::str::from_utf8(&k[2..]).ok()?;
5858- let repo_state = crate::db::deser_repo_state(&v).ok()?;
182182+ Ok::<_, (StatusCode, String)>(results)
183183+ })
184184+ .await
185185+ .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))??;
591866060- let response = RepoResponse {
6161- did: did_str.to_string(),
6262- status: repo_state.status.to_string(),
6363- tracked: repo_state.tracked,
6464- rev: repo_state.rev.as_ref().map(|r| r.to_string()),
6565- last_updated_at: repo_state.last_updated_at,
6666- };
187187+ use futures::StreamExt;
671886868- let json = serde_json::to_string(&response).ok()?;
189189+ let stream = futures::stream::iter(items.into_iter().map(|item| {
190190+ let json = serde_json::to_string(&item).ok()?;
69191 Some(Ok::<_, std::io::Error>(format!("{json}\n")))
7070- }));
192192+ }))
193193+ .filter_map(|x| futures::future::ready(x));
7119472195 let body = Body::from_stream(stream);
73196