···3939tokio = { version = "1.47.1", features = ["full"] }
4040tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
4141tokio-stream = { version = "0.1.17", features = ["io-util"] }
4242+tokio-tungstenite = { version = "0.26", features = ["native-tls"] }
4243tokio-util = { version = "0.7.16", features = ["compat"] }
4344tracing = "0.1.41"
4445tracing-opentelemetry = "0.31.0"
···5354p256 = "0.13.2"
5455k256 = "0.13.4"
5556serde_ipld_dagcbor = "0.6.4"
5757+ordered-varint = "2.0.0"
5658
+2-2
readme.md
···1515 --wrap-pg "postgresql://user:pass@pg-host:5432/plc-db"
1616 ```
17171818-- Run a fully self-contained mirror using an embedded fjall database (no postgres needed):
1818+- Run a fully self-contained mirror using an embedded fjall database (no postgres or local plc server needed):
19192020 ```bash
2121 # backfill first
2222- allegedly backfill --to-fjall ./plc-data
2222+ allegedly backfill --no-bulk --to-fjall ./plc-data
23232424 # then run the mirror
2525 allegedly mirror --wrap-fjall ./plc-data
+16-33
src/bin/backfill.rs
···11use allegedly::{
22- Db, Dt, ExportPage, FjallDb, FolderSource, HttpSource, backfill, backfill_to_fjall,
33- backfill_to_pg,
22+ Db, Dt, ExportPage, FjallDb, FolderSource, HttpSource, SeqPage, backfill, backfill_to_pg,
43 bin::{GlobalArgs, bin_init},
55- full_pages, logo, pages_to_fjall, pages_to_pg, pages_to_stdout, poll_upstream,
44+ full_pages, full_pages_seq, logo, pages_to_pg, pages_to_stdout, poll_upstream,
55+ poll_upstream_seq, seq_pages_to_fjall,
66};
77use clap::Parser;
88use reqwest::Url;
···2323 /// Local folder to fetch bundles from (overrides `http`)
2424 #[arg(long)]
2525 dir: Option<PathBuf>,
2626- /// Local fjall database to fetch raw ops from (overrides `http` and `dir`)
2727- #[arg(long, conflicts_with_all = ["dir"])]
2828- from_fjall: Option<PathBuf>,
2926 /// Don't do weekly bulk-loading at all.
3027 ///
3128 /// overrides `http` and `dir`, makes catch_up redundant
···4946 /// only used if `--to-postgres` or `--to-fjall` is present
5047 #[arg(long, action)]
5148 reset: bool,
5252- /// Bulk load into a local fjall embedded database
4949+ /// Load into a local fjall embedded database
5050+ /// (doesnt support bulk yet unless loading from another fjall db)
5351 ///
5452 /// Pass a directory path for the fjall database
5553 #[arg(long, conflicts_with_all = ["to_postgres", "postgres_cert"])]
···7068 Args {
7169 http,
7270 dir,
7373- from_fjall,
7471 no_bulk,
7572 source_workers,
7673 to_postgres,
···9592 };
96939794 let (poll_tx, poll_out) = mpsc::channel::<ExportPage>(128); // normal/small pages
9898- let (full_tx, full_out) = mpsc::channel(1); // don't need to buffer at this filter
9595+ let (full_tx, full_out) = mpsc::channel::<ExportPage>(1); // don't need to buffer at this filter
999610097 // set up sources
10198 if no_bulk {
···114111 let mut upstream = upstream;
115112 upstream.set_path("/export");
116113 let throttle = Duration::from_millis(upstream_throttle_ms);
117117- tasks.spawn(poll_upstream(None, upstream, throttle, poll_tx));
118118- tasks.spawn(full_pages(poll_out, full_tx));
119114 if let Some(fjall_path) = to_fjall {
120115 log::trace!("opening fjall db at {fjall_path:?}...");
121116 let db = FjallDb::open(&fjall_path)?;
122117 log::trace!("opened fjall db");
123118124124- tasks.spawn(pages_to_fjall(db, full_out));
119119+ let (poll_tx, poll_out) = mpsc::channel::<SeqPage>(128); // normal/small pages
120120+ let (full_tx, full_out) = mpsc::channel::<SeqPage>(1); // don't need to buffer at this filter
121121+122122+ tasks.spawn(poll_upstream_seq(None, upstream, throttle, poll_tx));
123123+ tasks.spawn(full_pages_seq(poll_out, full_tx));
124124+ tasks.spawn(seq_pages_to_fjall(db, full_out));
125125 } else {
126126+ tasks.spawn(poll_upstream(None, upstream, throttle, poll_tx));
127127+ tasks.spawn(full_pages(poll_out, full_tx));
126128 tasks.spawn(pages_to_stdout(full_out, None));
127129 }
128130 } else {
129131 // fun mode
130132131133 // set up bulk sources
132132- if let Some(fjall_path) = from_fjall {
133133- log::trace!("opening source fjall db at {fjall_path:?}...");
134134- let db = FjallDb::open(&fjall_path)?;
135135- log::trace!("opened source fjall db");
136136- tasks.spawn(backfill(db, bulk_tx, source_workers.unwrap_or(4), until));
137137- } else if let Some(dir) = dir {
134134+ if let Some(dir) = dir {
138135 if http != DEFAULT_HTTP.parse()? {
139136 anyhow::bail!(
140137 "non-default bulk http setting can't be used with bulk dir setting ({dir:?})"
···167164 }
168165169166 // set up sinks
170170- if let Some(fjall_path) = to_fjall {
171171- log::trace!("opening fjall db at {fjall_path:?}...");
172172- let db = FjallDb::open(&fjall_path)?;
173173- log::trace!("opened fjall db");
174174-175175- tasks.spawn(backfill_to_fjall(
176176- db.clone(),
177177- reset,
178178- bulk_out,
179179- found_last_tx,
180180- ));
181181- if catch_up {
182182- tasks.spawn(pages_to_fjall(db, full_out));
183183- }
184184- } else if let Some(pg_url) = to_postgres {
167167+ if let Some(pg_url) = to_postgres {
185168 log::trace!("connecting to postgres...");
186169 let db = Db::new(pg_url.as_str(), postgres_cert).await?;
187170 log::trace!("connected to postgres");
+85-10
src/bin/mirror.rs
···11use allegedly::{
22 Db, ExperimentalConf, FjallDb, ListenConf,
33 bin::{GlobalArgs, InstrumentationArgs, bin_init},
44- logo, pages_to_fjall, pages_to_pg, poll_upstream, serve, serve_fjall,
44+ logo, pages_to_pg, poll_upstream, poll_upstream_seq, seq_pages_to_fjall, serve, serve_fjall,
55+ tail_upstream_stream,
56};
67use clap::Parser;
78use reqwest::Url;
···6970 /// accept writes! by forwarding them upstream
7071 #[arg(long, action, env = "ALLEGEDLY_EXPERIMENTAL_WRITE_UPSTREAM")]
7172 experimental_write_upstream: bool,
7373+ /// switch from polling to /export/stream once the latest op is within
7474+ /// this many days of now (plc.directory only supports ~1 week of backfill)
7575+ #[arg(long, env = "ALLEGEDLY_STREAM_CUTOVER_DAYS", default_value = "5")]
7676+ stream_cutover_days: u32,
7277}
73787479pub async fn run(
···8994 acme_ipv6,
9095 experimental_acme_domain,
9196 experimental_write_upstream,
9797+ stream_cutover_days,
9298 }: Args,
9399 sync: bool,
94100) -> anyhow::Result<()> {
···122128 let db = FjallDb::open(&fjall_path)?;
123129 if compact_fjall {
124130 log::info!("compacting fjall...");
125125- db.compact()?; // blocking here is fine, we didn't start anything yet
131131+ db.compact()?;
126132 }
127133128128- log::debug!("getting the latest op from fjall...");
129129- let latest = db
134134+ log::debug!("getting the latest seq from fjall...");
135135+ let latest_seq = db
130136 .get_latest()?
137137+ .map(|(seq, _)| seq)
131138 .expect("there to be at least one op in the db. did you backfill?");
132132- log::info!("starting polling from {latest}...");
139139+ log::info!("starting seq polling from seq {latest_seq}...");
133140134134- let (send_page, recv_page) = mpsc::channel(8);
141141+ let (send_page, recv_page) = mpsc::channel::<allegedly::SeqPage>(8);
135142136136- let mut poll_url = upstream.clone();
137137- poll_url.set_path("/export");
143143+ let mut export_url = upstream.clone();
144144+ export_url.set_path("/export");
145145+ let mut stream_url = upstream.clone();
146146+ stream_url.set_path("/export/stream");
138147 let throttle = Duration::from_millis(upstream_throttle_ms);
148148+ let cutover_age = Duration::from_secs(stream_cutover_days as u64 * 86_400);
149149+150150+ // the poll -> stream task: poll until we're caught up, then switch to stream.
151151+ // on stream disconnect, fall back to polling to resync.
152152+ let send_page_bg = send_page.clone();
153153+ tasks.spawn(async move {
154154+ let mut current_seq = latest_seq;
155155+ loop {
156156+ log::info!("seq polling from seq {current_seq}");
157157+ let (inner_tx, mut inner_rx) = mpsc::channel::<allegedly::SeqPage>(8);
158158+159159+ // run poller; it ends only when the channel closes
160160+ let poll_url = export_url.clone();
161161+ let poll_task = tokio::spawn(poll_upstream_seq(
162162+ Some(current_seq),
163163+ poll_url,
164164+ throttle,
165165+ inner_tx,
166166+ ));
167167+168168+ // drain pages from poller until the last op is within cutover_age of now,
169169+ // meaning we're close enough to the tip that the stream can cover the rest
170170+ let mut last_seq_from_poll = current_seq;
139171140140- tasks.spawn(poll_upstream(Some(latest), poll_url, throttle, send_page));
141141- tasks.spawn(pages_to_fjall(db.clone(), recv_page));
172172+ while let Some(page) = inner_rx.recv().await {
173173+ let near_tip = page.ops.last().map_or(false, |op| {
174174+ let age = chrono::Utc::now().signed_duration_since(op.created_at);
175175+ age.to_std().map_or(false, |d| d <= cutover_age)
176176+ });
177177+ if let Some(last) = page.ops.last() {
178178+ last_seq_from_poll = last.seq;
179179+ }
180180+ let _ = send_page_bg.send(page).await;
181181+ if near_tip {
182182+ break;
183183+ }
184184+ }
185185+186186+ poll_task.abort();
187187+ current_seq = last_seq_from_poll;
188188+189189+ // switch to streaming
190190+ log::info!("caught up at seq {current_seq}, switching to /export/stream");
191191+ let (stream_inner_tx, mut stream_inner_rx) = mpsc::channel::<allegedly::SeqPage>(8);
192192+ let stream_task = tokio::spawn(tail_upstream_stream(
193193+ Some(current_seq),
194194+ stream_url.clone(),
195195+ stream_inner_tx,
196196+ ));
197197+198198+ while let Some(page) = stream_inner_rx.recv().await {
199199+ if let Some(last) = page.ops.last() {
200200+ current_seq = last.seq;
201201+ }
202202+ if send_page_bg.send(page).await.is_err() {
203203+ stream_task.abort();
204204+ return anyhow::Ok("fjall-poll-stream (dest closed)");
205205+ }
206206+ }
207207+208208+ // stream ended/errored — loop back to polling to resync
209209+ match stream_task.await {
210210+ Ok(Ok(())) => log::info!("stream closed cleanly, resyncing via poll"),
211211+ Ok(Err(e)) => log::warn!("stream error: {e}, resyncing via poll"),
212212+ Err(e) => log::warn!("stream task join error: {e}"),
213213+ }
214214+ }
215215+ });
142216217217+ tasks.spawn(seq_pages_to_fjall(db.clone(), recv_page));
143218 tasks.spawn(serve_fjall(upstream, listen_conf, experimental_conf, db));
144219 } else {
145220 let wrap = wrap.ok_or(anyhow::anyhow!(
+72-3
src/lib.rs
···55mod backfill;
66mod cached_value;
77mod client;
88-mod crypto;
88+pub mod crypto;
99pub mod doc;
1010mod mirror;
1111mod plc_fjall;
···2121pub use client::{CLIENT, UA};
2222pub use mirror::{ExperimentalConf, ListenConf, serve, serve_fjall};
2323pub use plc_fjall::{
2424- FjallDb, audit as audit_fjall, backfill_to_fjall, fix_ops as fix_ops_fjall, pages_to_fjall,
2424+ FjallDb, audit as audit_fjall, backfill_to_fjall, fix_ops as fix_ops_fjall, seq_pages_to_fjall,
2525};
2626pub use plc_pg::{Db, backfill_to_pg, pages_to_pg};
2727-pub use poll::{PageBoundaryState, get_page, poll_upstream};
2727+pub use poll::{
2828+ PageBoundaryState, get_page, poll_upstream, poll_upstream_seq, tail_upstream_stream,
2929+};
2830pub use ratelimit::{CreatePlcOpLimiter, GovernorMiddleware, IpLimiters};
2931pub use weekly::{BundleSource, FolderSource, HttpSource, Week, pages_to_weeks, week_to_pages};
3032···8587 }
8688}
87899090+/// A PLC op from `/export?after=<seq>` or `/export/stream`
9191+///
9292+/// Both endpoints return the `seq` field per op, which is a globally monotonic
9393+/// unsigned integer assigned by the PLC directory.
9494+#[derive(Debug, Clone, Deserialize)]
9595+#[serde(rename_all = "camelCase")]
9696+pub struct SeqOp {
9797+ pub seq: u64,
9898+ pub did: String,
9999+ pub cid: String,
100100+ pub created_at: Dt,
101101+ #[serde(default)]
102102+ pub nullified: bool,
103103+ pub operation: Box<serde_json::value::RawValue>,
104104+}
105105+106106+impl From<SeqOp> for Op {
107107+ fn from(s: SeqOp) -> Self {
108108+ Op {
109109+ did: s.did,
110110+ cid: s.cid,
111111+ created_at: s.created_at,
112112+ nullified: s.nullified,
113113+ operation: s.operation,
114114+ }
115115+ }
116116+}
117117+118118+/// A page of sequenced ops from `/export?after=<seq>`
119119+#[derive(Debug)]
120120+pub struct SeqPage {
121121+ pub ops: Vec<SeqOp>,
122122+}
123123+124124+impl SeqPage {
125125+ pub fn is_empty(&self) -> bool {
126126+ self.ops.is_empty()
127127+ }
128128+}
129129+88130/// page forwarder who drops its channels on receipt of a small page
89131///
90132/// PLC will return up to 1000 ops on a page, and returns full pages until it
···92134pub async fn full_pages(
93135 mut rx: mpsc::Receiver<ExportPage>,
94136 tx: mpsc::Sender<ExportPage>,
137137+) -> anyhow::Result<&'static str> {
138138+ while let Some(page) = rx.recv().await {
139139+ let n = page.ops.len();
140140+ if n < 900 {
141141+ let last_age = page.ops.last().map(|op| chrono::Utc::now() - op.created_at);
142142+ let Some(age) = last_age else {
143143+ log::info!("full_pages done, empty final page");
144144+ return Ok("full pages (hmm)");
145145+ };
146146+ if age <= chrono::TimeDelta::hours(6) {
147147+ log::info!("full_pages done, final page of {n} ops");
148148+ } else {
149149+ log::warn!("full_pages finished with small page of {n} ops, but it's {age} old");
150150+ }
151151+ return Ok("full pages (cool)");
152152+ }
153153+ log::trace!("full_pages: continuing with page of {n} ops");
154154+ tx.send(page).await?;
155155+ }
156156+ Err(anyhow::anyhow!(
157157+ "full_pages ran out of source material, sender closed"
158158+ ))
159159+}
160160+161161+pub async fn full_pages_seq(
162162+ mut rx: mpsc::Receiver<SeqPage>,
163163+ tx: mpsc::Sender<SeqPage>,
95164) -> anyhow::Result<&'static str> {
96165 while let Some(page) = rx.recv().await {
97166 let n = page.ops.len();
+15-24
src/mirror/fjall.rs
···14141515#[derive(Clone)]
1616struct FjallSyncInfo {
1717- latest_at: CachedValue<Dt, GetFjallLatestAt>,
1717+ latest: CachedValue<(u64, Dt), GetFjallLatest>,
1818 upstream_status: CachedValue<PlcStatus, CheckUpstream>,
1919}
20202121#[derive(Clone)]
2222-struct GetFjallLatestAt(FjallDb);
2323-impl Fetcher<Dt> for GetFjallLatestAt {
2424- async fn fetch(&self) -> Result<Dt, Box<dyn std::error::Error>> {
2222+struct GetFjallLatest(FjallDb);
2323+impl Fetcher<(u64, Dt)> for GetFjallLatest {
2424+ async fn fetch(&self) -> Result<(u64, Dt), Box<dyn std::error::Error>> {
2525 let db = self.0.clone();
2626- let now = tokio::task::spawn_blocking(move || db.get_latest())
2626+ tokio::task::spawn_blocking(move || db.get_latest())
2727 .await??
2828- .ok_or(anyhow::anyhow!(
2929- "expected to find at least one thing in the db"
3030- ))?;
3131- Ok(now)
2828+ .ok_or_else(|| anyhow::anyhow!("db is empty").into())
3229 }
3330}
3431···116113 if !ok {
117114 overall_status = StatusCode::BAD_GATEWAY;
118115 }
119119- let latest = sync_info.latest_at.get().await.ok();
116116+ let latest = sync_info.latest.get().await.ok();
117117+ let latest_at = latest.map(|(_, dt)| dt);
118118+ let latest_seq = latest.map(|(seq, _)| seq);
120119121120 (
122121 overall_status,
···124123 "server": "allegedly (mirror/fjall)",
125124 "version": env!("CARGO_PKG_VERSION"),
126125 "upstream_plc": upstream_status,
127127- "latest_at": latest,
126126+ "latest_at": latest_at,
127127+ "latest_seq": latest_seq,
128128 })),
129129 )
130130}
···250250251251#[derive(Deserialize)]
252252struct ExportQuery {
253253- after: Option<String>,
253253+ after: Option<u64>,
254254 #[allow(dead_code)] // we just cap at 1000 for now, matching reference impl
255255 count: Option<usize>,
256256}
···261261 Query(query): Query<ExportQuery>,
262262 Data(FjallState { fjall, .. }): Data<&FjallState>,
263263) -> Result<Body> {
264264- let after = if let Some(a) = query.after {
265265- Some(
266266- chrono::DateTime::parse_from_rfc3339(&a)
267267- .map_err(|e| Error::from_string(e.to_string(), StatusCode::BAD_REQUEST))?
268268- .with_timezone(&chrono::Utc),
269269- )
270270- } else {
271271- None
272272- };
273273-264264+ let after = query.after.unwrap_or(0);
274265 let limit = 1000;
275266 let db = fjall.clone();
276267277268 let ops = tokio::task::spawn_blocking(move || {
278278- let iter = db.export_ops(after.unwrap_or(Dt::UNIX_EPOCH)..)?;
269269+ let iter = db.export_ops(after..)?;
279270 iter.take(limit).collect::<anyhow::Result<Vec<_>>>()
280271 })
281272 .await
···324315 .expect("reqwest client to build");
325316326317 let sync_info = FjallSyncInfo {
327327- latest_at: CachedValue::new(GetFjallLatestAt(fjall.clone()), Duration::from_secs(2)),
318318+ latest: CachedValue::new(GetFjallLatest(fjall.clone()), Duration::from_secs(2)),
328319 upstream_status: CachedValue::new(
329320 CheckUpstream(upstream.clone(), client.clone()),
330321 Duration::from_secs(6),
+151-178
src/plc_fjall.rs
···11use crate::{
22- BundleSource, Dt, ExportPage, InvalidOp, Op as CommonOp, PageBoundaryState, Week,
22+ Dt, InvalidOp, Op as CommonOp,
33 crypto::{AssuranceResults, DidKey, Signature, assure_valid_sig},
44};
55use anyhow::Context;
66use data_encoding::BASE32_NOPAD;
77use fjall::{Database, Keyspace, KeyspaceCreateOptions, PersistMode, config::BlockSizePolicy};
88-use futures::Future;
88+use ordered_varint::Variable;
99use serde::{Deserialize, Serialize};
1010use std::collections::BTreeMap;
1111use std::fmt;
1212use std::path::Path;
1313use std::sync::Arc;
1414use std::time::Instant;
1515-use tokio::io::{AsyncRead, AsyncWriteExt};
1615use tokio::sync::{mpsc, oneshot};
17161817const SEP: u8 = 0;
1818+1919+fn seq_key(seq: u64) -> Vec<u8> {
2020+ seq.to_variable_vec().expect("that seq number encodes")
2121+}
2222+2323+fn decode_seq_key(key: &[u8]) -> anyhow::Result<u64> {
2424+ u64::decode_variable(key).context("failed to decode seq key")
2525+}
19262027type IpldCid = cid::CidGeneric<64>;
2128···5461 format!("did:plc:{decoded}")
5562}
56635757-fn op_key(created_at: &Dt, cid_suffix: &[u8]) -> Vec<u8> {
5858- let micros = created_at.timestamp_micros() as u64;
5959- let mut key = Vec::with_capacity(8 + 1 + cid_suffix.len());
6060- key.extend_from_slice(µs.to_be_bytes());
6161- key.push(SEP);
6262- key.extend_from_slice(cid_suffix);
6363- key
6464-}
6565-6664fn by_did_prefix(did: &str) -> anyhow::Result<Vec<u8>> {
6765 let mut p = Vec::with_capacity(BASE32_NOPAD.decode_len(did.len())? + 1);
6866 encode_did(&mut p, did)?;
···7068 Ok(p)
7169}
72707373-fn by_did_key(did: &str, created_at: &Dt, cid_suffix: &[u8]) -> anyhow::Result<Vec<u8>> {
7171+/// by_did key: [15 bytes encoded did][SEP][seq varint]
7272+fn by_did_key(did: &str, seq: u64) -> anyhow::Result<Vec<u8>> {
7473 let mut key = by_did_prefix(did)?;
7575- let micros = created_at.timestamp_micros() as u64;
7676- key.extend_from_slice(µs.to_be_bytes());
7777- key.push(SEP);
7878- key.extend_from_slice(cid_suffix);
7474+ seq.encode_variable(&mut key)?;
7975 Ok(key)
8080-}
8181-8282-fn decode_timestamp(key: &[u8]) -> anyhow::Result<Dt> {
8383- let micros = u64::from_be_bytes(
8484- key.try_into()
8585- .map_err(|e| anyhow::anyhow!("invalid timestamp key {key:?}: {e}"))?,
8686- );
8787- Dt::from_timestamp_micros(micros as i64)
8888- .ok_or_else(|| anyhow::anyhow!("invalid timestamp {micros}"))
8976}
90779178/// CID string → binary CID bytes
···827814 Ok(results)
828815}
829816830830-// this is basically Op, but without the cid and created_at fields
831831-// since we have them in the key already
817817+// stored alongside the seq key in the ops keyspace
818818+// cid and created_at are in the value (not the key) in the new layout
832819#[derive(Debug, Deserialize, Serialize)]
833820#[serde(rename_all = "camelCase")]
834821struct DbOp {
835822 #[serde(with = "serde_bytes")]
836823 pub did: Vec<u8>,
837824 #[serde(with = "serde_bytes")]
838838- pub cid_prefix: Vec<u8>,
825825+ pub cid: Vec<u8>,
826826+ pub created_at: u64,
839827 pub nullified: bool,
840828 pub operation: StoredOp,
841829}
···857845858846struct FjallInner {
859847 db: Database,
848848+ /// primary keyspace: seq (varint) -> DbOp
860849 ops: Keyspace,
850850+ /// secondary index: [encoded_did][SEP][seq_varint] -> []
861851 by_did: Keyspace,
862852}
863853···915905 Ok(())
916906 }
917907918918- pub fn get_latest(&self) -> anyhow::Result<Option<Dt>> {
908908+ /// Returns `(seq, created_at)` for the last stored op, or `None` if empty.
909909+ pub fn get_latest(&self) -> anyhow::Result<Option<(u64, Dt)>> {
919910 let Some(guard) = self.inner.ops.last_key_value() else {
920911 return Ok(None);
921912 };
922922- let key = guard
923923- .key()
924924- .map_err(|e| anyhow::anyhow!("fjall key error: {e}"))?;
925925-926926- key.get(..8)
927927- .ok_or_else(|| anyhow::anyhow!("invalid timestamp key {key:?}"))
928928- .map(decode_timestamp)
929929- .flatten()
930930- .map(Some)
913913+ let (key, value) = guard
914914+ .into_inner()
915915+ .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?;
916916+ let seq = decode_seq_key(&key)?;
917917+ let db_op: DbOp = rmp_serde::from_slice(&value)?;
918918+ let dt = Dt::from_timestamp_micros(db_op.created_at as i64)
919919+ .ok_or_else(|| anyhow::anyhow!("invalid created_at in last op"))?;
920920+ Ok(Some((seq, dt)))
931921 }
932922933933- pub fn insert_op<const VERIFY: bool>(&self, op: &CommonOp) -> anyhow::Result<usize> {
923923+ pub fn insert_op<const VERIFY: bool>(&self, op: &CommonOp, seq: u64) -> anyhow::Result<usize> {
934924 let cid_bytes = decode_cid_str(&op.cid)?;
935935- let cid_prefix = cid_bytes
936936- .get(..30)
937937- .ok_or_else(|| anyhow::anyhow!("invalid cid length (prefix): {}", op.cid))?
938938- .to_vec();
939939- let cid_suffix = cid_bytes
940940- .get(30..)
941941- .ok_or_else(|| anyhow::anyhow!("invalid cid length (suffix): {}", op.cid))?;
942925943926 let op_json: serde_json::Value = serde_json::from_str(op.operation.get())?;
944927 let (stored, mut errors) = StoredOp::from_json_value(op_json);
···960943 .prev
961944 .as_ref()
962945 .map(|prev_cid| {
946946+ // TODO: we should have a cid -> seq lookup eventually maybe?
947947+ // this is probably fine though we will only iter over like 2 ops at most
948948+ // or so, its there to handle nullified...
949949+ // but a cid lookup would also help us avoid duplicate ops!
963950 self._ops_for_did(&op.did)
964951 .map(|ops| {
965952 ops.rev()
···1000987 encode_did(&mut encoded_did, &op.did)?;
1001988 encoded_did
1002989 },
10031003- cid_prefix,
990990+ cid: cid_bytes,
991991+ created_at: op.created_at.timestamp_micros() as u64,
1004992 nullified: op.nullified,
1005993 operation,
1006994 };
1007995996996+ let seq_val = rmp_serde::to_vec(&db_op)?;
997997+ let seq_key_bytes = seq_key(seq);
998998+ let by_did_key_bytes = by_did_key(&op.did, seq)?;
999999+10081000 let mut batch = self.inner.db.batch();
10091009- batch.insert(
10101010- &self.inner.ops,
10111011- op_key(&op.created_at, cid_suffix),
10121012- rmp_serde::to_vec(&db_op)?,
10131013- );
10141014- batch.insert(
10151015- &self.inner.by_did,
10161016- by_did_key(&op.did, &op.created_at, cid_suffix)?,
10171017- &[],
10181018- );
10011001+ batch.insert(&self.inner.ops, seq_key_bytes, seq_val);
10021002+ batch.insert(&self.inner.by_did, by_did_key_bytes, &[]);
10191003 batch.commit()?;
1020100410211005 Ok(1)
10221006 }
10071007+}
1023100810091009+impl FjallDb {
10101010+ /// Decode a `by_did` entry: extract the seq from the key suffix, then
10111011+ /// look up the full `DbOp` in the `ops` keyspace.
10241012 fn decode_by_did_entry(
10251013 &self,
10261026- by_did_key: &[u8],
10141014+ by_did_key_bytes: &[u8],
10271015 prefix_len: usize,
10281016 ) -> anyhow::Result<(Dt, PlcCid, DbOp)> {
10291029- let key_rest = by_did_key
10171017+ let key_suffix = by_did_key_bytes
10301018 .get(prefix_len..)
10311031- .ok_or_else(|| anyhow::anyhow!("invalid by_did key {by_did_key:?}"))?;
10321032-10331033- let ts_bytes = key_rest
10341034- .get(..8)
10351035- .ok_or_else(|| anyhow::anyhow!("invalid length: {key_rest:?}"))?;
10361036- let cid_suffix = key_rest
10371037- .get(9..)
10381038- .ok_or_else(|| anyhow::anyhow!("invalid length: {key_rest:?}"))?;
10191019+ .ok_or_else(|| anyhow::anyhow!("invalid by_did key {by_did_key_bytes:?}"))?;
1039102010401040- let op_key = [ts_bytes, &[SEP][..], cid_suffix].concat();
10411041- let ts = decode_timestamp(ts_bytes)?;
10211021+ let seq =
10221022+ u64::decode_variable(key_suffix).context("failed to decode seq from by_did key")?;
1042102310431024 let value = self
10441025 .inner
10451026 .ops
10461046- .get(&op_key)?
10471047- .ok_or_else(|| anyhow::anyhow!("op not found: {op_key:?}"))?;
10271027+ .get(seq_key(seq))?
10281028+ .ok_or_else(|| anyhow::anyhow!("op not found for seq {seq}"))?;
1048102910491030 let op: DbOp = rmp_serde::from_slice(&value)?;
10501050- let mut full_cid = op.cid_prefix.clone();
10511051- full_cid.extend_from_slice(cid_suffix);
10311031+ let ts = Dt::from_timestamp_micros(op.created_at as i64)
10321032+ .ok_or_else(|| anyhow::anyhow!("invalid created_at_micros {}", op.created_at))?;
10331033+ let cid = PlcCid(op.cid.clone());
1052103410531053- Ok((ts, PlcCid(full_cid), op))
10351035+ Ok((ts, cid, op))
10541036 }
1055103710561038 fn _ops_for_did(
···10741056 ) -> anyhow::Result<impl DoubleEndedIterator<Item = anyhow::Result<Op>> + '_> {
10751057 Ok(self._ops_for_did(did)?.map(|res| {
10761058 let (ts, cid, op) = res?;
10771077-10781059 let cid = decode_cid(&cid.0)?;
10791060 let did = decode_did(&op.did);
10801080-10811061 Ok(Op {
10821062 did,
10831063 cid,
···1090107010911071 pub fn export_ops(
10921072 &self,
10931093- range: impl std::ops::RangeBounds<Dt>,
10731073+ range: impl std::ops::RangeBounds<u64>,
10941074 ) -> anyhow::Result<impl Iterator<Item = anyhow::Result<Op>> + '_> {
10951075 use std::ops::Bound;
10961096- let map_bound = |b: Bound<&Dt>| -> Bound<[u8; 8]> {
10761076+ let map_bound = |b: Bound<&u64>| -> Bound<Vec<u8>> {
10971077 match b {
10981098- Bound::Included(dt) => Bound::Included(dt.timestamp_micros().to_be_bytes()),
10991099- Bound::Excluded(dt) => Bound::Excluded(dt.timestamp_micros().to_be_bytes()),
10781078+ Bound::Included(seq) => Bound::Included(seq_key(*seq)),
10791079+ Bound::Excluded(seq) => Bound::Excluded(seq_key(*seq)),
11001080 Bound::Unbounded => Bound::Unbounded,
11011081 }
11021082 };
11031083 let range = (map_bound(range.start_bound()), map_bound(range.end_bound()));
1104108411051105- let iter = self.inner.ops.range(range);
10851085+ Ok(self
10861086+ .inner
10871087+ .ops
10881088+ .range(range)
10891089+ .map(|item| -> anyhow::Result<Op> {
10901090+ let (_, value) = item
10911091+ .into_inner()
10921092+ .map_err(|e: fjall::Error| anyhow::anyhow!("fjall read error: {e}"))?;
10931093+ let db_op: DbOp = rmp_serde::from_slice(&value)?;
10941094+ let created_at =
10951095+ Dt::from_timestamp_micros(db_op.created_at as i64).ok_or_else(|| {
10961096+ anyhow::anyhow!("invalid created_at_micros {}", db_op.created_at)
10971097+ })?;
10981098+ let cid = decode_cid(&db_op.cid)?;
10991099+ let did = decode_did(&db_op.did);
11001100+ Ok(Op {
11011101+ did,
11021102+ cid,
11031103+ created_at,
11041104+ nullified: db_op.nullified,
11051105+ operation: db_op.operation.to_json_value(),
11061106+ })
11071107+ }))
11081108+ }
1106110911071107- Ok(iter.map(|item| {
11081108- let (key, value) = item
11101110+ pub fn drop_op(&self, did_str: &str, _created_at: &Dt, _cid: &str) -> anyhow::Result<()> {
11111111+ // scan the by_did index for this DID and find the op that matches
11121112+ // (in practice drop_op is rare so a scan is fine)
11131113+ let prefix = by_did_prefix(did_str)?;
11141114+ let mut found_seq: Option<u64> = None;
11151115+ let mut found_by_did_key: Option<Vec<u8>> = None;
11161116+11171117+ for guard in self.inner.by_did.prefix(&prefix) {
11181118+ let (key, _) = guard
11091119 .into_inner()
11101120 .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?;
11111111- let db_op: DbOp = rmp_serde::from_slice(&value)?;
11121112- let created_at = decode_timestamp(
11131113- key.get(..8)
11141114- .ok_or_else(|| anyhow::anyhow!("invalid op key {key:?}"))?,
11151115- )?;
11161116- let cid_suffix = key
11171117- .get(9..)
11181118- .ok_or_else(|| anyhow::anyhow!("invalid op key {key:?}"))?;
11211121+ let suffix = &key[prefix.len()..];
11221122+ let seq = u64::decode_variable(suffix).context("decode seq in drop_op")?;
11231123+ found_seq = Some(seq);
11241124+ found_by_did_key = Some(key.to_vec());
11251125+ // if there were multiple ops for this DID we'd need to match by cid,
11261126+ // but for now take the last matched (they're in seq order)
11271127+ }
1119112811201120- let mut full_cid_bytes = db_op.cid_prefix.clone();
11211121- full_cid_bytes.extend_from_slice(cid_suffix);
11221122-11231123- let cid = decode_cid(&full_cid_bytes)?;
11241124- let did = decode_did(&db_op.did);
11251125-11261126- Ok(Op {
11271127- did,
11281128- cid,
11291129- created_at,
11301130- nullified: db_op.nullified,
11311131- operation: db_op.operation.to_json_value(),
11321132- })
11331133- }))
11341134- }
11351135-11361136- pub fn drop_op(&self, did_str: &str, created_at: &Dt, cid: &str) -> anyhow::Result<()> {
11371137- let cid = decode_cid_str(cid)?;
11381138- let cid_suffix = &cid[30..];
11391139-11401140- let op_key = op_key(created_at, cid_suffix);
11411141- let by_did_key = by_did_key(did_str, created_at, cid_suffix)?;
11291129+ let (seq, by_did_key_bytes) = match (found_seq, found_by_did_key) {
11301130+ (Some(s), Some(k)) => (s, k),
11311131+ _ => {
11321132+ log::warn!("drop_op: by_did entry not found for {did_str}");
11331133+ return Ok(());
11341134+ }
11351135+ };
1142113611431137 let mut batch = self.inner.db.batch();
11441144- batch.remove(&self.inner.ops, op_key);
11451145- batch.remove(&self.inner.by_did, by_did_key);
11381138+ batch.remove(&self.inner.ops, seq_key(seq));
11391139+ batch.remove(&self.inner.by_did, by_did_key_bytes);
11461140 batch.commit()?;
1147114111481142 Ok(())
···13021296 }
13031297}
1304129813051305-impl BundleSource for FjallDb {
13061306- fn reader_for(
13071307- &self,
13081308- week: Week,
13091309- ) -> impl Future<Output = anyhow::Result<impl AsyncRead + Send>> + Send {
13101310- let db = self.clone();
13111311-13121312- async move {
13131313- let (mut tx, rx) = tokio::io::duplex(1024 * 1024 * 16);
13141314-13151315- tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
13161316- let after: Dt = week.into();
13171317- let before: Dt = week.next().into();
13181318-13191319- let iter = db.export_ops(after..before)?;
13201320-13211321- let rt = tokio::runtime::Handle::current();
13221322-13231323- for op_res in iter {
13241324- let op = op_res?;
13251325- let operation_str = serde_json::to_string(&op.operation)?;
13261326- let common_op = crate::Op {
13271327- did: op.did,
13281328- cid: op.cid,
13291329- created_at: op.created_at,
13301330- nullified: op.nullified,
13311331- operation: serde_json::value::RawValue::from_string(operation_str)?,
13321332- };
13331333-13341334- let mut json_bytes = serde_json::to_vec(&common_op)?;
13351335- json_bytes.push(b'\n');
13361336-13371337- if rt.block_on(tx.write_all(&json_bytes)).is_err() {
13381338- break;
13391339- }
13401340- }
13411341-13421342- Ok(())
13431343- });
13441344-13451345- Ok(rx)
13461346- }
13471347- }
13481348-}
13491349-13501299pub async fn backfill_to_fjall(
13511300 db: FjallDb,
13521301 reset: bool,
13531353- mut pages: mpsc::Receiver<ExportPage>,
13021302+ mut pages: mpsc::Receiver<crate::SeqPage>,
13541303 notify_last_at: Option<oneshot::Sender<Option<Dt>>>,
13551304) -> anyhow::Result<&'static str> {
13561305 let t0 = Instant::now();
···13751324 page = pages.recv(), if !pages_finished => {
13761325 let Some(page) = page else { continue; };
13771326 if notify_last_at.is_some() {
13781378- if let Some(s) = PageBoundaryState::new(&page) {
13791379- last_at = last_at.filter(|&l| l >= s.last_at).or(Some(s.last_at));
13271327+ // SeqPage ops are always in order, so we can just grab the last one
13281328+ if let Some(last_op) = page.ops.last() {
13291329+ last_at = last_at.filter(|&l| l >= last_op.created_at).or(Some(last_op.created_at));
13801330 }
13811331 }
13321332+13821333 let db = db.clone();
13341334+13831335 // we don't have to wait for inserts to finish, because insert_op
13841336 // without verification does not read anything from the db
13851337 insert_tasks.spawn_blocking(move || {
13861338 let mut count: usize = 0;
13871387- for op in &page.ops {
13881388- // we don't verify sigs for bulk, since pages might be out of order
13891389- count += db.insert_op::<false>(op)?;
13391339+ for seq_op in &page.ops {
13401340+ let op = CommonOp {
13411341+ did: seq_op.did.clone(),
13421342+ cid: seq_op.cid.clone(),
13431343+ created_at: seq_op.created_at,
13441344+ nullified: seq_op.nullified,
13451345+ operation: seq_op.operation.clone(),
13461346+ };
13471347+ // we don't verify sigs for bulk, since pages might be out of order (and we trust for backfills)
13481348+ count += db.insert_op::<false>(&op, seq_op.seq)?;
13901349 }
13911350 db.persist(PersistMode::Buffer)?;
13921351 Ok(count)
···14211380 Ok("backfill_to_fjall")
14221381}
1423138214241424-pub async fn pages_to_fjall(
13831383+/// Write sequenced ops (with PLC seq numbers) into fjall.
13841384+pub async fn seq_pages_to_fjall(
14251385 db: FjallDb,
14261426- mut pages: mpsc::Receiver<ExportPage>,
13861386+ mut pages: mpsc::Receiver<crate::SeqPage>,
14271387) -> anyhow::Result<&'static str> {
14281428- log::info!("starting pages_to_fjall writer...");
13881388+ log::info!("starting seq_pages_to_fjall writer...");
1429138914301390 let t0 = Instant::now();
14311391 let mut ops_inserted: usize = 0;
1432139214331393 while let Some(page) = pages.recv().await {
14341434- log::trace!("writing page with {} ops", page.ops.len());
13941394+ log::trace!("writing seq page with {} ops", page.ops.len());
14351395 let db = db.clone();
14361396 let count = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> {
14371397 let mut count: usize = 0;
14381438- for op in &page.ops {
14391439- count += db.insert_op::<true>(op)?;
13981398+ for seq_op in &page.ops {
13991399+ let common_op = CommonOp {
14001400+ did: seq_op.did.clone(),
14011401+ cid: seq_op.cid.clone(),
14021402+ created_at: seq_op.created_at,
14031403+ nullified: seq_op.nullified,
14041404+ operation: seq_op.operation.clone(),
14051405+ };
14061406+ count += db.insert_op::<true>(&common_op, seq_op.seq)?;
14401407 }
14411408 db.persist(PersistMode::Buffer)?;
14421409 Ok(count)
···14461413 }
1447141414481415 log::info!(
14491449- "no more pages. inserted {ops_inserted} ops in {:?}",
14161416+ "no more seq pages. inserted {ops_inserted} ops in {:?}",
14501417 t0.elapsed()
14511418 );
14521452- Ok("pages_to_fjall")
14191419+ Ok("seq_pages_to_fjall")
14531420}
1454142114551422pub async fn audit(
···14641431 t0.elapsed()
14651432 );
14661433 if failed > 0 {
14671467- anyhow::bail!("audit found {failed} invalid operations");
14341434+ log::error!("audit found {failed} invalid operations");
14681435 }
14691436 Ok("audit_fjall")
14701437}
···1481144814821449 let latest_at = db
14831450 .get_latest()?
14841484- .ok_or_else(|| anyhow::anyhow!("db not backfilled? expected at least one op"))?;
14511451+ .ok_or_else(|| anyhow::anyhow!("db not backfilled? expected at least one op"))
14521452+ .map(|(_, dt)| dt)?;
14531453+14541454+ // local seq counter for newly fetched ops
14551455+ let mut next_seq = db.get_latest()?.map(|(s, _)| s).unwrap_or(0) + 1;
1485145614861457 while let Some(op) = invalid_ops_rx.recv().await {
14871458 let InvalidOp { did, at, cid, .. } = op;
···15441515 continue;
15451516 }
1546151715471547- count += db.insert_op::<true>(&op)?;
15181518+ let seq = next_seq;
15191519+ next_seq += 1;
15201520+ count += db.insert_op::<true>(&op, seq)?;
15481521 }
1549152215501523 db.persist(PersistMode::Buffer)?;
+159-1
src/poll.rs
···11-use crate::{CLIENT, Dt, ExportPage, Op, OpKey};
11+use crate::{CLIENT, Dt, ExportPage, Op, OpKey, SeqOp, SeqPage};
22use reqwest::Url;
33use std::time::Duration;
44use thiserror::Error;
···255255256256 prev_last = next_last.or(prev_last);
257257 }
258258+}
259259+260260+/// Fetch one page of seq-based export from `/export?after=<seq>`
261261+async fn get_seq_page(url: Url) -> Result<SeqPage, GetPageError> {
262262+ use futures::TryStreamExt;
263263+ use tokio::io::{AsyncBufReadExt, BufReader};
264264+ use tokio_util::compat::FuturesAsyncReadCompatExt;
265265+266266+ log::trace!("getting seq page: {url}");
267267+268268+ let res = CLIENT.get(url).send().await?.error_for_status()?;
269269+ let stream = Box::pin(
270270+ res.bytes_stream()
271271+ .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
272272+ .into_async_read()
273273+ .compat(),
274274+ );
275275+276276+ let mut lines = BufReader::new(stream).lines();
277277+ let mut ops = Vec::new();
278278+279279+ loop {
280280+ match lines.next_line().await {
281281+ Ok(Some(line)) => {
282282+ let line = line.trim();
283283+ if line.is_empty() {
284284+ continue;
285285+ }
286286+ match serde_json::from_str::<SeqOp>(line) {
287287+ Ok(op) => ops.push(op),
288288+ Err(e) => log::warn!("failed to parse seq op: {e} ({line})"),
289289+ }
290290+ }
291291+ Ok(None) => break,
292292+ Err(e) => {
293293+ log::warn!(
294294+ "transport error mid-seq-page: {}; returning partial page",
295295+ e
296296+ );
297297+ break;
298298+ }
299299+ }
300300+ }
301301+302302+ Ok(SeqPage { ops })
303303+}
304304+305305+/// Poll an upstream PLC server using seq-number-based cursoring
306306+///
307307+/// Uses `/export?after=<seq>` — each op from the server carries a `seq` field
308308+/// which is a globally monotonic unsigned integer. Because seq is unique per op
309309+/// there is no need for page-boundary deduplication.
310310+///
311311+/// Pages are sent to `dest`. Returns when the channel closes.
312312+pub async fn poll_upstream_seq(
313313+ after: Option<u64>,
314314+ base: Url,
315315+ throttle: Duration,
316316+ dest: mpsc::Sender<SeqPage>,
317317+) -> anyhow::Result<&'static str> {
318318+ log::info!("starting seq upstream poller at {base} after {after:?}");
319319+ let mut tick = tokio::time::interval(throttle);
320320+ let mut last_seq: u64 = after.unwrap_or(0);
321321+322322+ loop {
323323+ tick.tick().await;
324324+325325+ let mut url = base.clone();
326326+ url.query_pairs_mut()
327327+ .append_pair("after", &last_seq.to_string());
328328+329329+ let page = match get_seq_page(url).await {
330330+ Ok(p) => p,
331331+ Err(e) => {
332332+ log::warn!("error polling upstream (seq): {e}");
333333+ continue;
334334+ }
335335+ };
336336+337337+ if let Some(last) = page.ops.last() {
338338+ last_seq = last.seq;
339339+ }
340340+341341+ if !page.is_empty() {
342342+ match dest.try_send(page) {
343343+ Ok(()) => {}
344344+ Err(mpsc::error::TrySendError::Full(page)) => {
345345+ log::warn!("seq poll: destination channel full, awaiting...");
346346+ dest.send(page).await?;
347347+ }
348348+ e => e?,
349349+ };
350350+ }
351351+ }
352352+}
353353+354354+/// Tail the upstream PLC `/export/stream` WebSocket endpoint
355355+///
356356+/// `cursor` is a seq number to resume from. The server only supports backfill
357357+/// of up to ~1 week (server-configurable), so this cannot replay from seq 0.
358358+/// Use `poll_upstream_seq` to catch up first, then hand off to this function.
359359+///
360360+/// Messages arrive as single-op `SeqPage`s sent to `dest`. Returns on
361361+/// disconnect so the caller can reconnect or fall back to polling.
362362+pub async fn tail_upstream_stream(
363363+ cursor: Option<u64>,
364364+ base: Url,
365365+ dest: mpsc::Sender<SeqPage>,
366366+) -> anyhow::Result<()> {
367367+ use futures::StreamExt;
368368+ use tokio_tungstenite::{connect_async, tungstenite::Message};
369369+370370+ let mut url = base.clone();
371371+ // convert ws(s):// scheme if needed; some callers pass http(s)://
372372+ let ws_scheme = match url.scheme() {
373373+ "https" => "wss",
374374+ "http" => "ws",
375375+ _ => "ws",
376376+ }
377377+ .to_owned();
378378+ url.set_scheme(&ws_scheme)
379379+ .map_err(|_| anyhow::anyhow!("failed to set websocket scheme"))?;
380380+ if let Some(seq) = cursor {
381381+ url.query_pairs_mut()
382382+ .append_pair("cursor", &seq.to_string());
383383+ }
384384+385385+ log::info!("connecting to stream: {url}");
386386+ let (mut ws, _) = connect_async(url.as_str()).await?;
387387+ log::info!("stream connected");
388388+389389+ while let Some(msg) = ws.next().await {
390390+ let msg = msg?;
391391+ let text = match msg {
392392+ Message::Text(t) => t,
393393+ Message::Close(_) => {
394394+ log::info!("stream closed by server");
395395+ break;
396396+ }
397397+ _ => continue,
398398+ };
399399+400400+ let op: SeqOp = match serde_json::from_str(&text) {
401401+ Ok(op) => op,
402402+ Err(e) => {
403403+ log::warn!("failed to parse stream event: {e} ({text})");
404404+ continue;
405405+ }
406406+ };
407407+408408+ let page = SeqPage { ops: vec![op] };
409409+ if dest.send(page).await.is_err() {
410410+ log::info!("stream dest channel closed, stopping");
411411+ break;
412412+ }
413413+ }
414414+415415+ Ok(())
258416}
259417260418#[cfg(test)]
-119
tests/fjall_mirror_test.rs
···11-use allegedly::{
22- ExperimentalConf, FjallDb, ListenConf, backfill_to_fjall, bin::bin_init, poll_upstream,
33- serve_fjall,
44-};
55-use futures::TryFutureExt;
66-use reqwest::{StatusCode, Url};
77-use std::time::Duration;
88-use tokio::sync::mpsc;
99-1010-#[tokio::test]
1111-async fn test_fjall_mirror_mode() -> anyhow::Result<()> {
1212- bin_init(false);
1313- let temp_dir = tempfile::tempdir()?;
1414- let db_path = temp_dir.path().join("fjall.db");
1515- let db = FjallDb::open(&db_path)?;
1616-1717- // backfill (limited to 1 page)
1818- let (backfill_tx, backfill_rx) = mpsc::channel(1);
1919- let (upstream_tx, mut upstream_rx) = mpsc::channel(1);
2020-2121- let upstream_url: Url = "https://plc.directory".parse()?;
2222-2323- // spawn upstream poller
2424- tokio::spawn({
2525- let mut base = upstream_url.clone();
2626- base.set_path("/export");
2727- async move {
2828- // poll fresh data so our data matches the upstream
2929- let start_at = chrono::Utc::now() - chrono::Duration::try_minutes(5).unwrap();
3030- let _ = poll_upstream(
3131- Some(start_at),
3232- base,
3333- Duration::from_millis(100),
3434- upstream_tx,
3535- )
3636- .inspect_err(|err| log::error!("failed to poll upstream: {err}"))
3737- .await;
3838- }
3939- });
4040-4141- log::info!("waiting for page from upstream...");
4242- let page = upstream_rx
4343- .recv()
4444- .await
4545- .expect("to receive page from upstream");
4646- log::info!("received page with {} ops", page.ops.len());
4747- let sample_did = page.ops.last().unwrap().did.clone();
4848- println!("will check did {sample_did}");
4949-5050- backfill_tx.send(page).await?;
5151- let backfill_handle = tokio::spawn(backfill_to_fjall(db.clone(), false, backfill_rx, None));
5252- // since we are using a channel with 1 capacity, we can wait that the backfill task received
5353- // the page by reserving on the channel, and then drop the sender to signal the backfill task to finish
5454- let _ = backfill_tx.reserve().await;
5555- drop(backfill_tx);
5656- backfill_handle.await??;
5757-5858- // todo: should probably use a random port here but shrug
5959- let listener = std::net::TcpListener::bind("127.0.0.1:17548")?;
6060- let port = listener.local_addr()?.port();
6161- drop(listener);
6262-6363- let listen_conf = ListenConf::Bind(([127, 0, 0, 1], port).into());
6464- let exp_conf = ExperimentalConf {
6565- acme_domain: None,
6666- write_upstream: false,
6767- };
6868-6969- let server_handle = tokio::spawn({
7070- let db = db.clone();
7171- let upstream = upstream_url.clone();
7272- serve_fjall(upstream, listen_conf, exp_conf, db)
7373- .inspect_err(|err| log::error!("failed to serve: {err}"))
7474- });
7575- let base_url = format!("http://127.0.0.1:{}", port);
7676-7777- // wait for server to be ready
7878- let client = reqwest::Client::new();
7979- let health_url = format!("{base_url}/_health");
8080- let mut ready = None;
8181- for _ in 0..50 {
8282- let resp = match client.get(&health_url).send().await {
8383- Ok(resp) => resp,
8484- Err(err) => {
8585- log::warn!("failed to get health: {err}");
8686- continue;
8787- }
8888- };
8989- if resp.status().is_success() {
9090- let json: serde_json::Value = resp.json().await?;
9191- ready = Some(json);
9292- break;
9393- }
9494- tokio::time::sleep(Duration::from_millis(100)).await;
9595- }
9696- assert!(ready.is_some(), "server failed to start");
9797- assert_eq!(ready.unwrap()["server"], "allegedly (mirror/fjall)");
9898-9999- // verify did resolution against upstream
100100- let mut doc_url = upstream_url.clone();
101101- doc_url.set_path(&format!("/{sample_did}"));
102102- let upstream_resp = client.get(doc_url).send().await?;
103103- assert_eq!(upstream_resp.status(), StatusCode::OK);
104104- let upstream_doc: serde_json::Value = upstream_resp.json().await?;
105105-106106- let local_doc_url = format!("{base_url}/{sample_did}");
107107- let resp = client.get(local_doc_url).send().await?;
108108- assert_eq!(resp.status(), StatusCode::OK);
109109- let doc: serde_json::Value = resp.json().await?;
110110-111111- assert_eq!(
112112- doc, upstream_doc,
113113- "local doc != upstream doc.\nlocal: {:#?}\nupstream: {:#?}",
114114- doc, upstream_doc
115115- );
116116-117117- server_handle.abort();
118118- Ok(())
119119-}