···39tokio = { version = "1.47.1", features = ["full"] }
40tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
41tokio-stream = { version = "0.1.17", features = ["io-util"] }
042tokio-util = { version = "0.7.16", features = ["compat"] }
43tracing = "0.1.41"
44tracing-opentelemetry = "0.31.0"
···53p256 = "0.13.2"
54k256 = "0.13.4"
55serde_ipld_dagcbor = "0.6.4"
056
···39tokio = { version = "1.47.1", features = ["full"] }
40tokio-postgres = { version = "0.7.13", features = ["with-chrono-0_4", "with-serde_json-1"] }
41tokio-stream = { version = "0.1.17", features = ["io-util"] }
42+tokio-tungstenite = { version = "0.26", features = ["native-tls"] }
43tokio-util = { version = "0.7.16", features = ["compat"] }
44tracing = "0.1.41"
45tracing-opentelemetry = "0.31.0"
···54p256 = "0.13.2"
55k256 = "0.13.4"
56serde_ipld_dagcbor = "0.6.4"
57+ordered-varint = "2.0.0"
58
+2-2
readme.md
···15 --wrap-pg "postgresql://user:pass@pg-host:5432/plc-db"
16 ```
1718-- Run a fully self-contained mirror using an embedded fjall database (no postgres needed):
1920 ```bash
21 # backfill first
22- allegedly backfill --to-fjall ./plc-data
2324 # then run the mirror
25 allegedly mirror --wrap-fjall ./plc-data
···15 --wrap-pg "postgresql://user:pass@pg-host:5432/plc-db"
16 ```
1718+- Run a fully self-contained mirror using an embedded fjall database (no postgres or local plc server needed):
1920 ```bash
21 # backfill first
22+ allegedly backfill --no-bulk --to-fjall ./plc-data
2324 # then run the mirror
25 allegedly mirror --wrap-fjall ./plc-data
+16-33
src/bin/backfill.rs
···1use allegedly::{
2- Db, Dt, ExportPage, FjallDb, FolderSource, HttpSource, backfill, backfill_to_fjall,
3- backfill_to_pg,
4 bin::{GlobalArgs, bin_init},
5- full_pages, logo, pages_to_fjall, pages_to_pg, pages_to_stdout, poll_upstream,
06};
7use clap::Parser;
8use reqwest::Url;
···23 /// Local folder to fetch bundles from (overrides `http`)
24 #[arg(long)]
25 dir: Option<PathBuf>,
26- /// Local fjall database to fetch raw ops from (overrides `http` and `dir`)
27- #[arg(long, conflicts_with_all = ["dir"])]
28- from_fjall: Option<PathBuf>,
29 /// Don't do weekly bulk-loading at all.
30 ///
31 /// overrides `http` and `dir`, makes catch_up redundant
···49 /// only used if `--to-postgres` or `--to-fjall` is present
50 #[arg(long, action)]
51 reset: bool,
52- /// Bulk load into a local fjall embedded database
053 ///
54 /// Pass a directory path for the fjall database
55 #[arg(long, conflicts_with_all = ["to_postgres", "postgres_cert"])]
···70 Args {
71 http,
72 dir,
73- from_fjall,
74 no_bulk,
75 source_workers,
76 to_postgres,
···95 };
9697 let (poll_tx, poll_out) = mpsc::channel::<ExportPage>(128); // normal/small pages
98- let (full_tx, full_out) = mpsc::channel(1); // don't need to buffer at this filter
99100 // set up sources
101 if no_bulk {
···114 let mut upstream = upstream;
115 upstream.set_path("/export");
116 let throttle = Duration::from_millis(upstream_throttle_ms);
117- tasks.spawn(poll_upstream(None, upstream, throttle, poll_tx));
118- tasks.spawn(full_pages(poll_out, full_tx));
119 if let Some(fjall_path) = to_fjall {
120 log::trace!("opening fjall db at {fjall_path:?}...");
121 let db = FjallDb::open(&fjall_path)?;
122 log::trace!("opened fjall db");
123124- tasks.spawn(pages_to_fjall(db, full_out));
00000125 } else {
00126 tasks.spawn(pages_to_stdout(full_out, None));
127 }
128 } else {
129 // fun mode
130131 // set up bulk sources
132- if let Some(fjall_path) = from_fjall {
133- log::trace!("opening source fjall db at {fjall_path:?}...");
134- let db = FjallDb::open(&fjall_path)?;
135- log::trace!("opened source fjall db");
136- tasks.spawn(backfill(db, bulk_tx, source_workers.unwrap_or(4), until));
137- } else if let Some(dir) = dir {
138 if http != DEFAULT_HTTP.parse()? {
139 anyhow::bail!(
140 "non-default bulk http setting can't be used with bulk dir setting ({dir:?})"
···167 }
168169 // set up sinks
170- if let Some(fjall_path) = to_fjall {
171- log::trace!("opening fjall db at {fjall_path:?}...");
172- let db = FjallDb::open(&fjall_path)?;
173- log::trace!("opened fjall db");
174-175- tasks.spawn(backfill_to_fjall(
176- db.clone(),
177- reset,
178- bulk_out,
179- found_last_tx,
180- ));
181- if catch_up {
182- tasks.spawn(pages_to_fjall(db, full_out));
183- }
184- } else if let Some(pg_url) = to_postgres {
185 log::trace!("connecting to postgres...");
186 let db = Db::new(pg_url.as_str(), postgres_cert).await?;
187 log::trace!("connected to postgres");
···1use allegedly::{
2+ Db, Dt, ExportPage, FjallDb, FolderSource, HttpSource, SeqPage, backfill, backfill_to_pg,
03 bin::{GlobalArgs, bin_init},
4+ full_pages, full_pages_seq, logo, pages_to_pg, pages_to_stdout, poll_upstream,
5+ poll_upstream_seq, seq_pages_to_fjall,
6};
7use clap::Parser;
8use reqwest::Url;
···23 /// Local folder to fetch bundles from (overrides `http`)
24 #[arg(long)]
25 dir: Option<PathBuf>,
00026 /// Don't do weekly bulk-loading at all.
27 ///
28 /// overrides `http` and `dir`, makes catch_up redundant
···46 /// only used if `--to-postgres` or `--to-fjall` is present
47 #[arg(long, action)]
48 reset: bool,
49+ /// Load into a local fjall embedded database
50+ /// (doesnt support bulk yet unless loading from another fjall db)
51 ///
52 /// Pass a directory path for the fjall database
53 #[arg(long, conflicts_with_all = ["to_postgres", "postgres_cert"])]
···68 Args {
69 http,
70 dir,
071 no_bulk,
72 source_workers,
73 to_postgres,
···92 };
9394 let (poll_tx, poll_out) = mpsc::channel::<ExportPage>(128); // normal/small pages
95+ let (full_tx, full_out) = mpsc::channel::<ExportPage>(1); // don't need to buffer at this filter
9697 // set up sources
98 if no_bulk {
···111 let mut upstream = upstream;
112 upstream.set_path("/export");
113 let throttle = Duration::from_millis(upstream_throttle_ms);
00114 if let Some(fjall_path) = to_fjall {
115 log::trace!("opening fjall db at {fjall_path:?}...");
116 let db = FjallDb::open(&fjall_path)?;
117 log::trace!("opened fjall db");
118119+ let (poll_tx, poll_out) = mpsc::channel::<SeqPage>(128); // normal/small pages
120+ let (full_tx, full_out) = mpsc::channel::<SeqPage>(1); // don't need to buffer at this filter
121+122+ tasks.spawn(poll_upstream_seq(None, upstream, throttle, poll_tx));
123+ tasks.spawn(full_pages_seq(poll_out, full_tx));
124+ tasks.spawn(seq_pages_to_fjall(db, full_out));
125 } else {
126+ tasks.spawn(poll_upstream(None, upstream, throttle, poll_tx));
127+ tasks.spawn(full_pages(poll_out, full_tx));
128 tasks.spawn(pages_to_stdout(full_out, None));
129 }
130 } else {
131 // fun mode
132133 // set up bulk sources
134+ if let Some(dir) = dir {
00000135 if http != DEFAULT_HTTP.parse()? {
136 anyhow::bail!(
137 "non-default bulk http setting can't be used with bulk dir setting ({dir:?})"
···164 }
165166 // set up sinks
167+ if let Some(pg_url) = to_postgres {
00000000000000168 log::trace!("connecting to postgres...");
169 let db = Db::new(pg_url.as_str(), postgres_cert).await?;
170 log::trace!("connected to postgres");
+85-10
src/bin/mirror.rs
···1use allegedly::{
2 Db, ExperimentalConf, FjallDb, ListenConf,
3 bin::{GlobalArgs, InstrumentationArgs, bin_init},
4- logo, pages_to_fjall, pages_to_pg, poll_upstream, serve, serve_fjall,
05};
6use clap::Parser;
7use reqwest::Url;
···69 /// accept writes! by forwarding them upstream
70 #[arg(long, action, env = "ALLEGEDLY_EXPERIMENTAL_WRITE_UPSTREAM")]
71 experimental_write_upstream: bool,
000072}
7374pub async fn run(
···89 acme_ipv6,
90 experimental_acme_domain,
91 experimental_write_upstream,
092 }: Args,
93 sync: bool,
94) -> anyhow::Result<()> {
···122 let db = FjallDb::open(&fjall_path)?;
123 if compact_fjall {
124 log::info!("compacting fjall...");
125- db.compact()?; // blocking here is fine, we didn't start anything yet
126 }
127128- log::debug!("getting the latest op from fjall...");
129- let latest = db
130 .get_latest()?
0131 .expect("there to be at least one op in the db. did you backfill?");
132- log::info!("starting polling from {latest}...");
133134- let (send_page, recv_page) = mpsc::channel(8);
135136- let mut poll_url = upstream.clone();
137- poll_url.set_path("/export");
00138 let throttle = Duration::from_millis(upstream_throttle_ms);
00000000000000000000000139140- tasks.spawn(poll_upstream(Some(latest), poll_url, throttle, send_page));
141- tasks.spawn(pages_to_fjall(db.clone(), recv_page));
0000000000000000000000000000000000000000001420143 tasks.spawn(serve_fjall(upstream, listen_conf, experimental_conf, db));
144 } else {
145 let wrap = wrap.ok_or(anyhow::anyhow!(
···1use allegedly::{
2 Db, ExperimentalConf, FjallDb, ListenConf,
3 bin::{GlobalArgs, InstrumentationArgs, bin_init},
4+ logo, pages_to_pg, poll_upstream, poll_upstream_seq, seq_pages_to_fjall, serve, serve_fjall,
5+ tail_upstream_stream,
6};
7use clap::Parser;
8use reqwest::Url;
···70 /// accept writes! by forwarding them upstream
71 #[arg(long, action, env = "ALLEGEDLY_EXPERIMENTAL_WRITE_UPSTREAM")]
72 experimental_write_upstream: bool,
73+ /// switch from polling to /export/stream once the latest op is within
74+ /// this many days of now (plc.directory only supports ~1 week of backfill)
75+ #[arg(long, env = "ALLEGEDLY_STREAM_CUTOVER_DAYS", default_value = "5")]
76+ stream_cutover_days: u32,
77}
7879pub async fn run(
···94 acme_ipv6,
95 experimental_acme_domain,
96 experimental_write_upstream,
97+ stream_cutover_days,
98 }: Args,
99 sync: bool,
100) -> anyhow::Result<()> {
···128 let db = FjallDb::open(&fjall_path)?;
129 if compact_fjall {
130 log::info!("compacting fjall...");
131+ db.compact()?;
132 }
133134+ log::debug!("getting the latest seq from fjall...");
135+ let latest_seq = db
136 .get_latest()?
137+ .map(|(seq, _)| seq)
138 .expect("there to be at least one op in the db. did you backfill?");
139+ log::info!("starting seq polling from seq {latest_seq}...");
140141+ let (send_page, recv_page) = mpsc::channel::<allegedly::SeqPage>(8);
142143+ let mut export_url = upstream.clone();
144+ export_url.set_path("/export");
145+ let mut stream_url = upstream.clone();
146+ stream_url.set_path("/export/stream");
147 let throttle = Duration::from_millis(upstream_throttle_ms);
148+ let cutover_age = Duration::from_secs(stream_cutover_days as u64 * 86_400);
149+150+ // the poll -> stream task: poll until we're caught up, then switch to stream.
151+ // on stream disconnect, fall back to polling to resync.
152+ let send_page_bg = send_page.clone();
153+ tasks.spawn(async move {
154+ let mut current_seq = latest_seq;
155+ loop {
156+ log::info!("seq polling from seq {current_seq}");
157+ let (inner_tx, mut inner_rx) = mpsc::channel::<allegedly::SeqPage>(8);
158+159+ // run poller; it ends only when the channel closes
160+ let poll_url = export_url.clone();
161+ let poll_task = tokio::spawn(poll_upstream_seq(
162+ Some(current_seq),
163+ poll_url,
164+ throttle,
165+ inner_tx,
166+ ));
167+168+ // drain pages from poller until the last op is within cutover_age of now,
169+ // meaning we're close enough to the tip that the stream can cover the rest
170+ let mut last_seq_from_poll = current_seq;
171172+ while let Some(page) = inner_rx.recv().await {
173+ let near_tip = page.ops.last().map_or(false, |op| {
174+ let age = chrono::Utc::now().signed_duration_since(op.created_at);
175+ age.to_std().map_or(false, |d| d <= cutover_age)
176+ });
177+ if let Some(last) = page.ops.last() {
178+ last_seq_from_poll = last.seq;
179+ }
180+ let _ = send_page_bg.send(page).await;
181+ if near_tip {
182+ break;
183+ }
184+ }
185+186+ poll_task.abort();
187+ current_seq = last_seq_from_poll;
188+189+ // switch to streaming
190+ log::info!("caught up at seq {current_seq}, switching to /export/stream");
191+ let (stream_inner_tx, mut stream_inner_rx) = mpsc::channel::<allegedly::SeqPage>(8);
192+ let stream_task = tokio::spawn(tail_upstream_stream(
193+ Some(current_seq),
194+ stream_url.clone(),
195+ stream_inner_tx,
196+ ));
197+198+ while let Some(page) = stream_inner_rx.recv().await {
199+ if let Some(last) = page.ops.last() {
200+ current_seq = last.seq;
201+ }
202+ if send_page_bg.send(page).await.is_err() {
203+ stream_task.abort();
204+ return anyhow::Ok("fjall-poll-stream (dest closed)");
205+ }
206+ }
207+208+ // stream ended/errored — loop back to polling to resync
209+ match stream_task.await {
210+ Ok(Ok(())) => log::info!("stream closed cleanly, resyncing via poll"),
211+ Ok(Err(e)) => log::warn!("stream error: {e}, resyncing via poll"),
212+ Err(e) => log::warn!("stream task join error: {e}"),
213+ }
214+ }
215+ });
216217+ tasks.spawn(seq_pages_to_fjall(db.clone(), recv_page));
218 tasks.spawn(serve_fjall(upstream, listen_conf, experimental_conf, db));
219 } else {
220 let wrap = wrap.ok_or(anyhow::anyhow!(
+72-3
src/lib.rs
···5mod backfill;
6mod cached_value;
7mod client;
8-mod crypto;
9pub mod doc;
10mod mirror;
11mod plc_fjall;
···21pub use client::{CLIENT, UA};
22pub use mirror::{ExperimentalConf, ListenConf, serve, serve_fjall};
23pub use plc_fjall::{
24- FjallDb, audit as audit_fjall, backfill_to_fjall, fix_ops as fix_ops_fjall, pages_to_fjall,
25};
26pub use plc_pg::{Db, backfill_to_pg, pages_to_pg};
27-pub use poll::{PageBoundaryState, get_page, poll_upstream};
0028pub use ratelimit::{CreatePlcOpLimiter, GovernorMiddleware, IpLimiters};
29pub use weekly::{BundleSource, FolderSource, HttpSource, Week, pages_to_weeks, week_to_pages};
30···85 }
86}
87000000000000000000000000000000000000000088/// page forwarder who drops its channels on receipt of a small page
89///
90/// PLC will return up to 1000 ops on a page, and returns full pages until it
···92pub async fn full_pages(
93 mut rx: mpsc::Receiver<ExportPage>,
94 tx: mpsc::Sender<ExportPage>,
00000000000000000000000000095) -> anyhow::Result<&'static str> {
96 while let Some(page) = rx.recv().await {
97 let n = page.ops.len();
···5mod backfill;
6mod cached_value;
7mod client;
8+pub mod crypto;
9pub mod doc;
10mod mirror;
11mod plc_fjall;
···21pub use client::{CLIENT, UA};
22pub use mirror::{ExperimentalConf, ListenConf, serve, serve_fjall};
23pub use plc_fjall::{
24+ FjallDb, audit as audit_fjall, backfill_to_fjall, fix_ops as fix_ops_fjall, seq_pages_to_fjall,
25};
26pub use plc_pg::{Db, backfill_to_pg, pages_to_pg};
27+pub use poll::{
28+ PageBoundaryState, get_page, poll_upstream, poll_upstream_seq, tail_upstream_stream,
29+};
30pub use ratelimit::{CreatePlcOpLimiter, GovernorMiddleware, IpLimiters};
31pub use weekly::{BundleSource, FolderSource, HttpSource, Week, pages_to_weeks, week_to_pages};
32···87 }
88}
8990+/// A PLC op from `/export?after=<seq>` or `/export/stream`
91+///
92+/// Both endpoints return the `seq` field per op, which is a globally monotonic
93+/// unsigned integer assigned by the PLC directory.
94+#[derive(Debug, Clone, Deserialize)]
95+#[serde(rename_all = "camelCase")]
96+pub struct SeqOp {
97+ pub seq: u64,
98+ pub did: String,
99+ pub cid: String,
100+ pub created_at: Dt,
101+ #[serde(default)]
102+ pub nullified: bool,
103+ pub operation: Box<serde_json::value::RawValue>,
104+}
105+106+impl From<SeqOp> for Op {
107+ fn from(s: SeqOp) -> Self {
108+ Op {
109+ did: s.did,
110+ cid: s.cid,
111+ created_at: s.created_at,
112+ nullified: s.nullified,
113+ operation: s.operation,
114+ }
115+ }
116+}
117+118+/// A page of sequenced ops from `/export?after=<seq>`
119+#[derive(Debug)]
120+pub struct SeqPage {
121+ pub ops: Vec<SeqOp>,
122+}
123+124+impl SeqPage {
125+ pub fn is_empty(&self) -> bool {
126+ self.ops.is_empty()
127+ }
128+}
129+130/// page forwarder who drops its channels on receipt of a small page
131///
132/// PLC will return up to 1000 ops on a page, and returns full pages until it
···134pub async fn full_pages(
135 mut rx: mpsc::Receiver<ExportPage>,
136 tx: mpsc::Sender<ExportPage>,
137+) -> anyhow::Result<&'static str> {
138+ while let Some(page) = rx.recv().await {
139+ let n = page.ops.len();
140+ if n < 900 {
141+ let last_age = page.ops.last().map(|op| chrono::Utc::now() - op.created_at);
142+ let Some(age) = last_age else {
143+ log::info!("full_pages done, empty final page");
144+ return Ok("full pages (hmm)");
145+ };
146+ if age <= chrono::TimeDelta::hours(6) {
147+ log::info!("full_pages done, final page of {n} ops");
148+ } else {
149+ log::warn!("full_pages finished with small page of {n} ops, but it's {age} old");
150+ }
151+ return Ok("full pages (cool)");
152+ }
153+ log::trace!("full_pages: continuing with page of {n} ops");
154+ tx.send(page).await?;
155+ }
156+ Err(anyhow::anyhow!(
157+ "full_pages ran out of source material, sender closed"
158+ ))
159+}
160+161+pub async fn full_pages_seq(
162+ mut rx: mpsc::Receiver<SeqPage>,
163+ tx: mpsc::Sender<SeqPage>,
164) -> anyhow::Result<&'static str> {
165 while let Some(page) = rx.recv().await {
166 let n = page.ops.len();
+15-24
src/mirror/fjall.rs
···1415#[derive(Clone)]
16struct FjallSyncInfo {
17- latest_at: CachedValue<Dt, GetFjallLatestAt>,
18 upstream_status: CachedValue<PlcStatus, CheckUpstream>,
19}
2021#[derive(Clone)]
22-struct GetFjallLatestAt(FjallDb);
23-impl Fetcher<Dt> for GetFjallLatestAt {
24- async fn fetch(&self) -> Result<Dt, Box<dyn std::error::Error>> {
25 let db = self.0.clone();
26- let now = tokio::task::spawn_blocking(move || db.get_latest())
27 .await??
28- .ok_or(anyhow::anyhow!(
29- "expected to find at least one thing in the db"
30- ))?;
31- Ok(now)
32 }
33}
34···116 if !ok {
117 overall_status = StatusCode::BAD_GATEWAY;
118 }
119- let latest = sync_info.latest_at.get().await.ok();
00120121 (
122 overall_status,
···124 "server": "allegedly (mirror/fjall)",
125 "version": env!("CARGO_PKG_VERSION"),
126 "upstream_plc": upstream_status,
127- "latest_at": latest,
0128 })),
129 )
130}
···250251#[derive(Deserialize)]
252struct ExportQuery {
253- after: Option<String>,
254 #[allow(dead_code)] // we just cap at 1000 for now, matching reference impl
255 count: Option<usize>,
256}
···261 Query(query): Query<ExportQuery>,
262 Data(FjallState { fjall, .. }): Data<&FjallState>,
263) -> Result<Body> {
264- let after = if let Some(a) = query.after {
265- Some(
266- chrono::DateTime::parse_from_rfc3339(&a)
267- .map_err(|e| Error::from_string(e.to_string(), StatusCode::BAD_REQUEST))?
268- .with_timezone(&chrono::Utc),
269- )
270- } else {
271- None
272- };
273-274 let limit = 1000;
275 let db = fjall.clone();
276277 let ops = tokio::task::spawn_blocking(move || {
278- let iter = db.export_ops(after.unwrap_or(Dt::UNIX_EPOCH)..)?;
279 iter.take(limit).collect::<anyhow::Result<Vec<_>>>()
280 })
281 .await
···324 .expect("reqwest client to build");
325326 let sync_info = FjallSyncInfo {
327- latest_at: CachedValue::new(GetFjallLatestAt(fjall.clone()), Duration::from_secs(2)),
328 upstream_status: CachedValue::new(
329 CheckUpstream(upstream.clone(), client.clone()),
330 Duration::from_secs(6),
···1415#[derive(Clone)]
16struct FjallSyncInfo {
17+ latest: CachedValue<(u64, Dt), GetFjallLatest>,
18 upstream_status: CachedValue<PlcStatus, CheckUpstream>,
19}
2021#[derive(Clone)]
22+struct GetFjallLatest(FjallDb);
23+impl Fetcher<(u64, Dt)> for GetFjallLatest {
24+ async fn fetch(&self) -> Result<(u64, Dt), Box<dyn std::error::Error>> {
25 let db = self.0.clone();
26+ tokio::task::spawn_blocking(move || db.get_latest())
27 .await??
28+ .ok_or_else(|| anyhow::anyhow!("db is empty").into())
00029 }
30}
31···113 if !ok {
114 overall_status = StatusCode::BAD_GATEWAY;
115 }
116+ let latest = sync_info.latest.get().await.ok();
117+ let latest_at = latest.map(|(_, dt)| dt);
118+ let latest_seq = latest.map(|(seq, _)| seq);
119120 (
121 overall_status,
···123 "server": "allegedly (mirror/fjall)",
124 "version": env!("CARGO_PKG_VERSION"),
125 "upstream_plc": upstream_status,
126+ "latest_at": latest_at,
127+ "latest_seq": latest_seq,
128 })),
129 )
130}
···250251#[derive(Deserialize)]
252struct ExportQuery {
253+ after: Option<u64>,
254 #[allow(dead_code)] // we just cap at 1000 for now, matching reference impl
255 count: Option<usize>,
256}
···261 Query(query): Query<ExportQuery>,
262 Data(FjallState { fjall, .. }): Data<&FjallState>,
263) -> Result<Body> {
264+ let after = query.after.unwrap_or(0);
000000000265 let limit = 1000;
266 let db = fjall.clone();
267268 let ops = tokio::task::spawn_blocking(move || {
269+ let iter = db.export_ops(after..)?;
270 iter.take(limit).collect::<anyhow::Result<Vec<_>>>()
271 })
272 .await
···315 .expect("reqwest client to build");
316317 let sync_info = FjallSyncInfo {
318+ latest: CachedValue::new(GetFjallLatest(fjall.clone()), Duration::from_secs(2)),
319 upstream_status: CachedValue::new(
320 CheckUpstream(upstream.clone(), client.clone()),
321 Duration::from_secs(6),
+151-178
src/plc_fjall.rs
···1use crate::{
2- BundleSource, Dt, ExportPage, InvalidOp, Op as CommonOp, PageBoundaryState, Week,
3 crypto::{AssuranceResults, DidKey, Signature, assure_valid_sig},
4};
5use anyhow::Context;
6use data_encoding::BASE32_NOPAD;
7use fjall::{Database, Keyspace, KeyspaceCreateOptions, PersistMode, config::BlockSizePolicy};
8-use futures::Future;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::fmt;
12use std::path::Path;
13use std::sync::Arc;
14use std::time::Instant;
15-use tokio::io::{AsyncRead, AsyncWriteExt};
16use tokio::sync::{mpsc, oneshot};
1718const SEP: u8 = 0;
000000001920type IpldCid = cid::CidGeneric<64>;
21···54 format!("did:plc:{decoded}")
55}
5657-fn op_key(created_at: &Dt, cid_suffix: &[u8]) -> Vec<u8> {
58- let micros = created_at.timestamp_micros() as u64;
59- let mut key = Vec::with_capacity(8 + 1 + cid_suffix.len());
60- key.extend_from_slice(µs.to_be_bytes());
61- key.push(SEP);
62- key.extend_from_slice(cid_suffix);
63- key
64-}
65-66fn by_did_prefix(did: &str) -> anyhow::Result<Vec<u8>> {
67 let mut p = Vec::with_capacity(BASE32_NOPAD.decode_len(did.len())? + 1);
68 encode_did(&mut p, did)?;
···70 Ok(p)
71}
7273-fn by_did_key(did: &str, created_at: &Dt, cid_suffix: &[u8]) -> anyhow::Result<Vec<u8>> {
074 let mut key = by_did_prefix(did)?;
75- let micros = created_at.timestamp_micros() as u64;
76- key.extend_from_slice(µs.to_be_bytes());
77- key.push(SEP);
78- key.extend_from_slice(cid_suffix);
79 Ok(key)
80-}
81-82-fn decode_timestamp(key: &[u8]) -> anyhow::Result<Dt> {
83- let micros = u64::from_be_bytes(
84- key.try_into()
85- .map_err(|e| anyhow::anyhow!("invalid timestamp key {key:?}: {e}"))?,
86- );
87- Dt::from_timestamp_micros(micros as i64)
88- .ok_or_else(|| anyhow::anyhow!("invalid timestamp {micros}"))
89}
9091/// CID string → binary CID bytes
···827 Ok(results)
828}
829830-// this is basically Op, but without the cid and created_at fields
831-// since we have them in the key already
832#[derive(Debug, Deserialize, Serialize)]
833#[serde(rename_all = "camelCase")]
834struct DbOp {
835 #[serde(with = "serde_bytes")]
836 pub did: Vec<u8>,
837 #[serde(with = "serde_bytes")]
838- pub cid_prefix: Vec<u8>,
0839 pub nullified: bool,
840 pub operation: StoredOp,
841}
···857858struct FjallInner {
859 db: Database,
0860 ops: Keyspace,
0861 by_did: Keyspace,
862}
863···915 Ok(())
916 }
917918- pub fn get_latest(&self) -> anyhow::Result<Option<Dt>> {
0919 let Some(guard) = self.inner.ops.last_key_value() else {
920 return Ok(None);
921 };
922- let key = guard
923- .key()
924- .map_err(|e| anyhow::anyhow!("fjall key error: {e}"))?;
925-926- key.get(..8)
927- .ok_or_else(|| anyhow::anyhow!("invalid timestamp key {key:?}"))
928- .map(decode_timestamp)
929- .flatten()
930- .map(Some)
931 }
932933- pub fn insert_op<const VERIFY: bool>(&self, op: &CommonOp) -> anyhow::Result<usize> {
934 let cid_bytes = decode_cid_str(&op.cid)?;
935- let cid_prefix = cid_bytes
936- .get(..30)
937- .ok_or_else(|| anyhow::anyhow!("invalid cid length (prefix): {}", op.cid))?
938- .to_vec();
939- let cid_suffix = cid_bytes
940- .get(30..)
941- .ok_or_else(|| anyhow::anyhow!("invalid cid length (suffix): {}", op.cid))?;
942943 let op_json: serde_json::Value = serde_json::from_str(op.operation.get())?;
944 let (stored, mut errors) = StoredOp::from_json_value(op_json);
···960 .prev
961 .as_ref()
962 .map(|prev_cid| {
0000963 self._ops_for_did(&op.did)
964 .map(|ops| {
965 ops.rev()
···1000 encode_did(&mut encoded_did, &op.did)?;
1001 encoded_did
1002 },
1003- cid_prefix,
01004 nullified: op.nullified,
1005 operation,
1006 };
100700001008 let mut batch = self.inner.db.batch();
1009- batch.insert(
1010- &self.inner.ops,
1011- op_key(&op.created_at, cid_suffix),
1012- rmp_serde::to_vec(&db_op)?,
1013- );
1014- batch.insert(
1015- &self.inner.by_did,
1016- by_did_key(&op.did, &op.created_at, cid_suffix)?,
1017- &[],
1018- );
1019 batch.commit()?;
10201021 Ok(1)
1022 }
010230001024 fn decode_by_did_entry(
1025 &self,
1026- by_did_key: &[u8],
1027 prefix_len: usize,
1028 ) -> anyhow::Result<(Dt, PlcCid, DbOp)> {
1029- let key_rest = by_did_key
1030 .get(prefix_len..)
1031- .ok_or_else(|| anyhow::anyhow!("invalid by_did key {by_did_key:?}"))?;
1032-1033- let ts_bytes = key_rest
1034- .get(..8)
1035- .ok_or_else(|| anyhow::anyhow!("invalid length: {key_rest:?}"))?;
1036- let cid_suffix = key_rest
1037- .get(9..)
1038- .ok_or_else(|| anyhow::anyhow!("invalid length: {key_rest:?}"))?;
10391040- let op_key = [ts_bytes, &[SEP][..], cid_suffix].concat();
1041- let ts = decode_timestamp(ts_bytes)?;
10421043 let value = self
1044 .inner
1045 .ops
1046- .get(&op_key)?
1047- .ok_or_else(|| anyhow::anyhow!("op not found: {op_key:?}"))?;
10481049 let op: DbOp = rmp_serde::from_slice(&value)?;
1050- let mut full_cid = op.cid_prefix.clone();
1051- full_cid.extend_from_slice(cid_suffix);
010521053- Ok((ts, PlcCid(full_cid), op))
1054 }
10551056 fn _ops_for_did(
···1074 ) -> anyhow::Result<impl DoubleEndedIterator<Item = anyhow::Result<Op>> + '_> {
1075 Ok(self._ops_for_did(did)?.map(|res| {
1076 let (ts, cid, op) = res?;
1077-1078 let cid = decode_cid(&cid.0)?;
1079 let did = decode_did(&op.did);
1080-1081 Ok(Op {
1082 did,
1083 cid,
···10901091 pub fn export_ops(
1092 &self,
1093- range: impl std::ops::RangeBounds<Dt>,
1094 ) -> anyhow::Result<impl Iterator<Item = anyhow::Result<Op>> + '_> {
1095 use std::ops::Bound;
1096- let map_bound = |b: Bound<&Dt>| -> Bound<[u8; 8]> {
1097 match b {
1098- Bound::Included(dt) => Bound::Included(dt.timestamp_micros().to_be_bytes()),
1099- Bound::Excluded(dt) => Bound::Excluded(dt.timestamp_micros().to_be_bytes()),
1100 Bound::Unbounded => Bound::Unbounded,
1101 }
1102 };
1103 let range = (map_bound(range.start_bound()), map_bound(range.end_bound()));
11041105- let iter = self.inner.ops.range(range);
0000000000000000000000011061107- Ok(iter.map(|item| {
1108- let (key, value) = item
00000001109 .into_inner()
1110 .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?;
1111- let db_op: DbOp = rmp_serde::from_slice(&value)?;
1112- let created_at = decode_timestamp(
1113- key.get(..8)
1114- .ok_or_else(|| anyhow::anyhow!("invalid op key {key:?}"))?,
1115- )?;
1116- let cid_suffix = key
1117- .get(9..)
1118- .ok_or_else(|| anyhow::anyhow!("invalid op key {key:?}"))?;
11191120- let mut full_cid_bytes = db_op.cid_prefix.clone();
1121- full_cid_bytes.extend_from_slice(cid_suffix);
1122-1123- let cid = decode_cid(&full_cid_bytes)?;
1124- let did = decode_did(&db_op.did);
1125-1126- Ok(Op {
1127- did,
1128- cid,
1129- created_at,
1130- nullified: db_op.nullified,
1131- operation: db_op.operation.to_json_value(),
1132- })
1133- }))
1134- }
1135-1136- pub fn drop_op(&self, did_str: &str, created_at: &Dt, cid: &str) -> anyhow::Result<()> {
1137- let cid = decode_cid_str(cid)?;
1138- let cid_suffix = &cid[30..];
1139-1140- let op_key = op_key(created_at, cid_suffix);
1141- let by_did_key = by_did_key(did_str, created_at, cid_suffix)?;
11421143 let mut batch = self.inner.db.batch();
1144- batch.remove(&self.inner.ops, op_key);
1145- batch.remove(&self.inner.by_did, by_did_key);
1146 batch.commit()?;
11471148 Ok(())
···1302 }
1303}
13041305-impl BundleSource for FjallDb {
1306- fn reader_for(
1307- &self,
1308- week: Week,
1309- ) -> impl Future<Output = anyhow::Result<impl AsyncRead + Send>> + Send {
1310- let db = self.clone();
1311-1312- async move {
1313- let (mut tx, rx) = tokio::io::duplex(1024 * 1024 * 16);
1314-1315- tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
1316- let after: Dt = week.into();
1317- let before: Dt = week.next().into();
1318-1319- let iter = db.export_ops(after..before)?;
1320-1321- let rt = tokio::runtime::Handle::current();
1322-1323- for op_res in iter {
1324- let op = op_res?;
1325- let operation_str = serde_json::to_string(&op.operation)?;
1326- let common_op = crate::Op {
1327- did: op.did,
1328- cid: op.cid,
1329- created_at: op.created_at,
1330- nullified: op.nullified,
1331- operation: serde_json::value::RawValue::from_string(operation_str)?,
1332- };
1333-1334- let mut json_bytes = serde_json::to_vec(&common_op)?;
1335- json_bytes.push(b'\n');
1336-1337- if rt.block_on(tx.write_all(&json_bytes)).is_err() {
1338- break;
1339- }
1340- }
1341-1342- Ok(())
1343- });
1344-1345- Ok(rx)
1346- }
1347- }
1348-}
1349-1350pub async fn backfill_to_fjall(
1351 db: FjallDb,
1352 reset: bool,
1353- mut pages: mpsc::Receiver<ExportPage>,
1354 notify_last_at: Option<oneshot::Sender<Option<Dt>>>,
1355) -> anyhow::Result<&'static str> {
1356 let t0 = Instant::now();
···1375 page = pages.recv(), if !pages_finished => {
1376 let Some(page) = page else { continue; };
1377 if notify_last_at.is_some() {
1378- if let Some(s) = PageBoundaryState::new(&page) {
1379- last_at = last_at.filter(|&l| l >= s.last_at).or(Some(s.last_at));
01380 }
1381 }
01382 let db = db.clone();
01383 // we don't have to wait for inserts to finish, because insert_op
1384 // without verification does not read anything from the db
1385 insert_tasks.spawn_blocking(move || {
1386 let mut count: usize = 0;
1387- for op in &page.ops {
1388- // we don't verify sigs for bulk, since pages might be out of order
1389- count += db.insert_op::<false>(op)?;
00000001390 }
1391 db.persist(PersistMode::Buffer)?;
1392 Ok(count)
···1421 Ok("backfill_to_fjall")
1422}
14231424-pub async fn pages_to_fjall(
01425 db: FjallDb,
1426- mut pages: mpsc::Receiver<ExportPage>,
1427) -> anyhow::Result<&'static str> {
1428- log::info!("starting pages_to_fjall writer...");
14291430 let t0 = Instant::now();
1431 let mut ops_inserted: usize = 0;
14321433 while let Some(page) = pages.recv().await {
1434- log::trace!("writing page with {} ops", page.ops.len());
1435 let db = db.clone();
1436 let count = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> {
1437 let mut count: usize = 0;
1438- for op in &page.ops {
1439- count += db.insert_op::<true>(op)?;
00000001440 }
1441 db.persist(PersistMode::Buffer)?;
1442 Ok(count)
···1446 }
14471448 log::info!(
1449- "no more pages. inserted {ops_inserted} ops in {:?}",
1450 t0.elapsed()
1451 );
1452- Ok("pages_to_fjall")
1453}
14541455pub async fn audit(
···1464 t0.elapsed()
1465 );
1466 if failed > 0 {
1467- anyhow::bail!("audit found {failed} invalid operations");
1468 }
1469 Ok("audit_fjall")
1470}
···14811482 let latest_at = db
1483 .get_latest()?
1484- .ok_or_else(|| anyhow::anyhow!("db not backfilled? expected at least one op"))?;
000014851486 while let Some(op) = invalid_ops_rx.recv().await {
1487 let InvalidOp { did, at, cid, .. } = op;
···1544 continue;
1545 }
15461547- count += db.insert_op::<true>(&op)?;
001548 }
15491550 db.persist(PersistMode::Buffer)?;
···1use crate::{
2+ Dt, InvalidOp, Op as CommonOp,
3 crypto::{AssuranceResults, DidKey, Signature, assure_valid_sig},
4};
5use anyhow::Context;
6use data_encoding::BASE32_NOPAD;
7use fjall::{Database, Keyspace, KeyspaceCreateOptions, PersistMode, config::BlockSizePolicy};
8+use ordered_varint::Variable;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::fmt;
12use std::path::Path;
13use std::sync::Arc;
14use std::time::Instant;
015use tokio::sync::{mpsc, oneshot};
1617const SEP: u8 = 0;
18+19+fn seq_key(seq: u64) -> Vec<u8> {
20+ seq.to_variable_vec().expect("that seq number encodes")
21+}
22+23+fn decode_seq_key(key: &[u8]) -> anyhow::Result<u64> {
24+ u64::decode_variable(key).context("failed to decode seq key")
25+}
2627type IpldCid = cid::CidGeneric<64>;
28···61 format!("did:plc:{decoded}")
62}
6300000000064fn by_did_prefix(did: &str) -> anyhow::Result<Vec<u8>> {
65 let mut p = Vec::with_capacity(BASE32_NOPAD.decode_len(did.len())? + 1);
66 encode_did(&mut p, did)?;
···68 Ok(p)
69}
7071+/// by_did key: [15 bytes encoded did][SEP][seq varint]
72+fn by_did_key(did: &str, seq: u64) -> anyhow::Result<Vec<u8>> {
73 let mut key = by_did_prefix(did)?;
74+ seq.encode_variable(&mut key)?;
00075 Ok(key)
00000000076}
7778/// CID string → binary CID bytes
···814 Ok(results)
815}
816817+// stored alongside the seq key in the ops keyspace
818+// cid and created_at are in the value (not the key) in the new layout
819#[derive(Debug, Deserialize, Serialize)]
820#[serde(rename_all = "camelCase")]
821struct DbOp {
822 #[serde(with = "serde_bytes")]
823 pub did: Vec<u8>,
824 #[serde(with = "serde_bytes")]
825+ pub cid: Vec<u8>,
826+ pub created_at: u64,
827 pub nullified: bool,
828 pub operation: StoredOp,
829}
···845846struct FjallInner {
847 db: Database,
848+ /// primary keyspace: seq (varint) -> DbOp
849 ops: Keyspace,
850+ /// secondary index: [encoded_did][SEP][seq_varint] -> []
851 by_did: Keyspace,
852}
853···905 Ok(())
906 }
907908+ /// Returns `(seq, created_at)` for the last stored op, or `None` if empty.
909+ pub fn get_latest(&self) -> anyhow::Result<Option<(u64, Dt)>> {
910 let Some(guard) = self.inner.ops.last_key_value() else {
911 return Ok(None);
912 };
913+ let (key, value) = guard
914+ .into_inner()
915+ .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?;
916+ let seq = decode_seq_key(&key)?;
917+ let db_op: DbOp = rmp_serde::from_slice(&value)?;
918+ let dt = Dt::from_timestamp_micros(db_op.created_at as i64)
919+ .ok_or_else(|| anyhow::anyhow!("invalid created_at in last op"))?;
920+ Ok(Some((seq, dt)))
0921 }
922923+ pub fn insert_op<const VERIFY: bool>(&self, op: &CommonOp, seq: u64) -> anyhow::Result<usize> {
924 let cid_bytes = decode_cid_str(&op.cid)?;
0000000925926 let op_json: serde_json::Value = serde_json::from_str(op.operation.get())?;
927 let (stored, mut errors) = StoredOp::from_json_value(op_json);
···943 .prev
944 .as_ref()
945 .map(|prev_cid| {
946+ // TODO: we should have a cid -> seq lookup eventually maybe?
947+ // this is probably fine though we will only iter over like 2 ops at most
948+ // or so, its there to handle nullified...
949+ // but a cid lookup would also help us avoid duplicate ops!
950 self._ops_for_did(&op.did)
951 .map(|ops| {
952 ops.rev()
···987 encode_did(&mut encoded_did, &op.did)?;
988 encoded_did
989 },
990+ cid: cid_bytes,
991+ created_at: op.created_at.timestamp_micros() as u64,
992 nullified: op.nullified,
993 operation,
994 };
995996+ let seq_val = rmp_serde::to_vec(&db_op)?;
997+ let seq_key_bytes = seq_key(seq);
998+ let by_did_key_bytes = by_did_key(&op.did, seq)?;
999+1000 let mut batch = self.inner.db.batch();
1001+ batch.insert(&self.inner.ops, seq_key_bytes, seq_val);
1002+ batch.insert(&self.inner.by_did, by_did_key_bytes, &[]);
000000001003 batch.commit()?;
10041005 Ok(1)
1006 }
1007+}
10081009+impl FjallDb {
1010+ /// Decode a `by_did` entry: extract the seq from the key suffix, then
1011+ /// look up the full `DbOp` in the `ops` keyspace.
1012 fn decode_by_did_entry(
1013 &self,
1014+ by_did_key_bytes: &[u8],
1015 prefix_len: usize,
1016 ) -> anyhow::Result<(Dt, PlcCid, DbOp)> {
1017+ let key_suffix = by_did_key_bytes
1018 .get(prefix_len..)
1019+ .ok_or_else(|| anyhow::anyhow!("invalid by_did key {by_did_key_bytes:?}"))?;
000000010201021+ let seq =
1022+ u64::decode_variable(key_suffix).context("failed to decode seq from by_did key")?;
10231024 let value = self
1025 .inner
1026 .ops
1027+ .get(seq_key(seq))?
1028+ .ok_or_else(|| anyhow::anyhow!("op not found for seq {seq}"))?;
10291030 let op: DbOp = rmp_serde::from_slice(&value)?;
1031+ let ts = Dt::from_timestamp_micros(op.created_at as i64)
1032+ .ok_or_else(|| anyhow::anyhow!("invalid created_at_micros {}", op.created_at))?;
1033+ let cid = PlcCid(op.cid.clone());
10341035+ Ok((ts, cid, op))
1036 }
10371038 fn _ops_for_did(
···1056 ) -> anyhow::Result<impl DoubleEndedIterator<Item = anyhow::Result<Op>> + '_> {
1057 Ok(self._ops_for_did(did)?.map(|res| {
1058 let (ts, cid, op) = res?;
01059 let cid = decode_cid(&cid.0)?;
1060 let did = decode_did(&op.did);
01061 Ok(Op {
1062 did,
1063 cid,
···10701071 pub fn export_ops(
1072 &self,
1073+ range: impl std::ops::RangeBounds<u64>,
1074 ) -> anyhow::Result<impl Iterator<Item = anyhow::Result<Op>> + '_> {
1075 use std::ops::Bound;
1076+ let map_bound = |b: Bound<&u64>| -> Bound<Vec<u8>> {
1077 match b {
1078+ Bound::Included(seq) => Bound::Included(seq_key(*seq)),
1079+ Bound::Excluded(seq) => Bound::Excluded(seq_key(*seq)),
1080 Bound::Unbounded => Bound::Unbounded,
1081 }
1082 };
1083 let range = (map_bound(range.start_bound()), map_bound(range.end_bound()));
10841085+ Ok(self
1086+ .inner
1087+ .ops
1088+ .range(range)
1089+ .map(|item| -> anyhow::Result<Op> {
1090+ let (_, value) = item
1091+ .into_inner()
1092+ .map_err(|e: fjall::Error| anyhow::anyhow!("fjall read error: {e}"))?;
1093+ let db_op: DbOp = rmp_serde::from_slice(&value)?;
1094+ let created_at =
1095+ Dt::from_timestamp_micros(db_op.created_at as i64).ok_or_else(|| {
1096+ anyhow::anyhow!("invalid created_at_micros {}", db_op.created_at)
1097+ })?;
1098+ let cid = decode_cid(&db_op.cid)?;
1099+ let did = decode_did(&db_op.did);
1100+ Ok(Op {
1101+ did,
1102+ cid,
1103+ created_at,
1104+ nullified: db_op.nullified,
1105+ operation: db_op.operation.to_json_value(),
1106+ })
1107+ }))
1108+ }
11091110+ pub fn drop_op(&self, did_str: &str, _created_at: &Dt, _cid: &str) -> anyhow::Result<()> {
1111+ // scan the by_did index for this DID and find the op that matches
1112+ // (in practice drop_op is rare so a scan is fine)
1113+ let prefix = by_did_prefix(did_str)?;
1114+ let mut found_seq: Option<u64> = None;
1115+ let mut found_by_did_key: Option<Vec<u8>> = None;
1116+1117+ for guard in self.inner.by_did.prefix(&prefix) {
1118+ let (key, _) = guard
1119 .into_inner()
1120 .map_err(|e| anyhow::anyhow!("fjall read error: {e}"))?;
1121+ let suffix = &key[prefix.len()..];
1122+ let seq = u64::decode_variable(suffix).context("decode seq in drop_op")?;
1123+ found_seq = Some(seq);
1124+ found_by_did_key = Some(key.to_vec());
1125+ // if there were multiple ops for this DID we'd need to match by cid,
1126+ // but for now take the last matched (they're in seq order)
1127+ }
011281129+ let (seq, by_did_key_bytes) = match (found_seq, found_by_did_key) {
1130+ (Some(s), Some(k)) => (s, k),
1131+ _ => {
1132+ log::warn!("drop_op: by_did entry not found for {did_str}");
1133+ return Ok(());
1134+ }
1135+ };
00000000000000011361137 let mut batch = self.inner.db.batch();
1138+ batch.remove(&self.inner.ops, seq_key(seq));
1139+ batch.remove(&self.inner.by_did, by_did_key_bytes);
1140 batch.commit()?;
11411142 Ok(())
···1296 }
1297}
12980000000000000000000000000000000000000000000001299pub async fn backfill_to_fjall(
1300 db: FjallDb,
1301 reset: bool,
1302+ mut pages: mpsc::Receiver<crate::SeqPage>,
1303 notify_last_at: Option<oneshot::Sender<Option<Dt>>>,
1304) -> anyhow::Result<&'static str> {
1305 let t0 = Instant::now();
···1324 page = pages.recv(), if !pages_finished => {
1325 let Some(page) = page else { continue; };
1326 if notify_last_at.is_some() {
1327+ // SeqPage ops are always in order, so we can just grab the last one
1328+ if let Some(last_op) = page.ops.last() {
1329+ last_at = last_at.filter(|&l| l >= last_op.created_at).or(Some(last_op.created_at));
1330 }
1331 }
1332+1333 let db = db.clone();
1334+1335 // we don't have to wait for inserts to finish, because insert_op
1336 // without verification does not read anything from the db
1337 insert_tasks.spawn_blocking(move || {
1338 let mut count: usize = 0;
1339+ for seq_op in &page.ops {
1340+ let op = CommonOp {
1341+ did: seq_op.did.clone(),
1342+ cid: seq_op.cid.clone(),
1343+ created_at: seq_op.created_at,
1344+ nullified: seq_op.nullified,
1345+ operation: seq_op.operation.clone(),
1346+ };
1347+ // we don't verify sigs for bulk, since pages might be out of order (and we trust for backfills)
1348+ count += db.insert_op::<false>(&op, seq_op.seq)?;
1349 }
1350 db.persist(PersistMode::Buffer)?;
1351 Ok(count)
···1380 Ok("backfill_to_fjall")
1381}
13821383+/// Write sequenced ops (with PLC seq numbers) into fjall.
1384+pub async fn seq_pages_to_fjall(
1385 db: FjallDb,
1386+ mut pages: mpsc::Receiver<crate::SeqPage>,
1387) -> anyhow::Result<&'static str> {
1388+ log::info!("starting seq_pages_to_fjall writer...");
13891390 let t0 = Instant::now();
1391 let mut ops_inserted: usize = 0;
13921393 while let Some(page) = pages.recv().await {
1394+ log::trace!("writing seq page with {} ops", page.ops.len());
1395 let db = db.clone();
1396 let count = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> {
1397 let mut count: usize = 0;
1398+ for seq_op in &page.ops {
1399+ let common_op = CommonOp {
1400+ did: seq_op.did.clone(),
1401+ cid: seq_op.cid.clone(),
1402+ created_at: seq_op.created_at,
1403+ nullified: seq_op.nullified,
1404+ operation: seq_op.operation.clone(),
1405+ };
1406+ count += db.insert_op::<true>(&common_op, seq_op.seq)?;
1407 }
1408 db.persist(PersistMode::Buffer)?;
1409 Ok(count)
···1413 }
14141415 log::info!(
1416+ "no more seq pages. inserted {ops_inserted} ops in {:?}",
1417 t0.elapsed()
1418 );
1419+ Ok("seq_pages_to_fjall")
1420}
14211422pub async fn audit(
···1431 t0.elapsed()
1432 );
1433 if failed > 0 {
1434+ log::error!("audit found {failed} invalid operations");
1435 }
1436 Ok("audit_fjall")
1437}
···14481449 let latest_at = db
1450 .get_latest()?
1451+ .ok_or_else(|| anyhow::anyhow!("db not backfilled? expected at least one op"))
1452+ .map(|(_, dt)| dt)?;
1453+1454+ // local seq counter for newly fetched ops
1455+ let mut next_seq = db.get_latest()?.map(|(s, _)| s).unwrap_or(0) + 1;
14561457 while let Some(op) = invalid_ops_rx.recv().await {
1458 let InvalidOp { did, at, cid, .. } = op;
···1515 continue;
1516 }
15171518+ let seq = next_seq;
1519+ next_seq += 1;
1520+ count += db.insert_op::<true>(&op, seq)?;
1521 }
15221523 db.persist(PersistMode::Buffer)?;