···11# Allegedly
2233-A public ledger copy machine for [PLC](https://github.com/did-method-plc/did-method-plc) mirrors running the canonical typescript directory code.
33+Some [public ledger](https://github.com/did-method-plc/did-method-plc) tools and services
44+55+Allegedly can
66+77+- Tail PLC ops to stdout
88+- Copy ops to postgres for a mirror running the [reference typescript implementation](https://github.com/did-method-plc/did-method-plc)
+20-1
src/lib.rs
···16161717/// One page of PLC export
1818///
1919-/// Not limited, but expected to have up to about 1000 lines
1919+/// Expected to have up to around 1000 lines of raw json ops
2020#[derive(Debug)]
2121pub struct ExportPage {
2222 pub ops: Vec<String>,
···2828 }
2929}
30303131+/// A fully-deserialized plc operation
3232+///
3333+/// including the plc's wrapping with timestmap and nullified state
3134#[derive(Debug, Deserialize)]
3235#[serde(rename_all = "camelCase")]
3336pub struct Op<'a> {
···3740 pub nullified: bool,
3841 #[serde(borrow)]
3942 pub operation: &'a serde_json::value::RawValue,
4343+}
4444+4545+/// Database primary key for an op
4646+#[derive(Debug, PartialEq)]
4747+pub struct OpKey {
4848+ pub did: String,
4949+ pub cid: String,
5050+}
5151+5252+impl From<&Op<'_>> for OpKey {
5353+ fn from(Op { did, cid, .. }: &Op<'_>) -> Self {
5454+ Self {
5555+ did: did.to_string(),
5656+ cid: cid.to_string(),
5757+ }
5858+ }
4059}
41604261pub fn bin_init(name: &str) {
+114-22
src/poll.rs
···11-use crate::{CLIENT, Dt, ExportPage, Op};
11+use crate::{CLIENT, Dt, ExportPage, Op, OpKey};
22use std::time::Duration;
33use thiserror::Error;
44use url::Url;
···4242 }
4343}
44444545-impl ExportPage {
4646- /// this is a (slightly flawed) op deduplicator
4747- fn only_after_last(&mut self, last_op: &LastOp) {
4848- loop {
4949- let Some(s) = self.ops.first().cloned() else {
5050- break;
5151- };
5252- let Ok(op) = serde_json::from_str::<Op>(&s) else {
5353- log::warn!(
5454- "deduplication failed op parsing ({s:?}), bailing for downstream to deal with."
5555- );
5656- break;
4545+/// PLC
4646+struct PageBoundaryState {
4747+ last_at: Dt,
4848+ keys_at: Vec<OpKey>, // expected to ~always be length one
4949+}
5050+5151+impl PageBoundaryState {
5252+ fn new(page: &mut ExportPage) -> Option<Self> {
5353+ // grab the very last op
5454+ let (last_at, last_key) = loop {
5555+ let Some(s) = page.ops.last().cloned() else {
5656+ // there are no ops left? oop. bail.
5757+ // last_at and existing keys remain in tact if there was no later op
5858+ return None;
5759 };
5858- if op.created_at > last_op.created_at {
5959- break;
6060+ if s.is_empty() {
6161+ // annoying: trim off any trailing blank lines
6262+ page.ops.pop();
6363+ continue;
6064 }
6161- log::trace!("dedup: dropping an op");
6262- self.ops.remove(0);
6363- if Into::<LastOp>::into(op) == *last_op {
6464- log::trace!("dedup: found exact op, keeping all after here");
6565- break;
6565+ let Ok(op) = serde_json::from_str::<Op>(&s)
6666+ .inspect_err(|e| log::warn!("deduplication failed last op parsing ({s:?}: {e}), ignoring for downstream to deal with."))
6767+ else {
6868+ // doubly annoying: skip over trailing garbage??
6969+ continue;
7070+ };
7171+ break (op.created_at, Into::<OpKey>::into(&op));
7272+ };
7373+7474+ // set initial state
7575+ let mut me = Self {
7676+ last_at,
7777+ keys_at: vec![last_key],
7878+ };
7979+8080+ // and make sure all keys at this time are captured from the back
8181+ page.ops
8282+ .iter()
8383+ .rev()
8484+ .skip(1) // we alredy added the very last one
8585+ .map(|s| serde_json::from_str::<Op>(s).inspect_err(|e|
8686+ log::warn!("deduplication failed op parsing ({s:?}: {e}), bailing for downstream to deal with.")))
8787+ .take_while(|opr| opr.as_ref().map(|op| op.created_at == last_at).unwrap_or(false))
8888+ .for_each(|opr| {
8989+ let op = &opr.expect("any Errs were filtered by take_while");
9090+ me.keys_at.push(op.into());
9191+ });
9292+9393+ Some(me)
9494+ }
9595+ fn apply_to_next(&mut self, page: &mut ExportPage) {
9696+ // walk ops forward, kicking previously-seen ops until created_at advances
9797+ let to_remove: Vec<usize> = page
9898+ .ops
9999+ .iter()
100100+ .map(|s| serde_json::from_str::<Op>(s).inspect_err(|e|
101101+ log::warn!("deduplication failed op parsing ({s:?}: {e}), bailing for downstream to deal with.")))
102102+ .enumerate()
103103+ .take_while(|(_, opr)| opr.as_ref().map(|op| op.created_at == self.last_at).unwrap_or(false))
104104+ .filter_map(|(i, opr)| {
105105+ if self.keys_at.contains(&(&opr.expect("any Errs were filtered by take_while")).into()) {
106106+ Some(i)
107107+ } else { None }
108108+ })
109109+ .collect();
110110+111111+ // actually remove them. last to first to indices don't shift
112112+ for dup_idx in to_remove.into_iter().rev() {
113113+ page.ops.remove(dup_idx);
114114+ }
115115+116116+ // grab the very last op
117117+ let (last_at, last_key) = loop {
118118+ let Some(s) = page.ops.last().cloned() else {
119119+ // there are no ops left? oop. bail.
120120+ // last_at and existing keys remain in tact if there was no later op
121121+ return;
122122+ };
123123+ if s.is_empty() {
124124+ // annoying: trim off any trailing blank lines
125125+ page.ops.pop();
126126+ continue;
66127 }
128128+ let Ok(op) = serde_json::from_str::<Op>(&s)
129129+ .inspect_err(|e| log::warn!("deduplication failed last op parsing ({s:?}: {e}), ignoring for downstream to deal with."))
130130+ else {
131131+ // doubly annoying: skip over trailing garbage??
132132+ continue;
133133+ };
134134+ break (op.created_at, Into::<OpKey>::into(&op));
135135+ };
136136+137137+ // reset state (as long as time actually moved forward on this page)
138138+ if last_at > self.last_at {
139139+ self.last_at = last_at;
140140+ self.keys_at = vec![last_key];
141141+ } else {
142142+ // weird cases: either time didn't move (fine...) or went backwards (not fine)
143143+ assert_eq!(last_at, self.last_at, "time moved backwards on a page");
67144 }
145145+ // and make sure all keys at this time are captured from the back
146146+ page.ops
147147+ .iter()
148148+ .rev()
149149+ .skip(1) // we alredy added the very last one
150150+ .map(|s| serde_json::from_str::<Op>(s).inspect_err(|e|
151151+ log::warn!("deduplication failed op parsing ({s:?}: {e}), bailing for downstream to deal with.")))
152152+ .take_while(|opr| opr.as_ref().map(|op| op.created_at == last_at).unwrap_or(false))
153153+ .for_each(|opr| {
154154+ let op = &opr.expect("any Errs were filtered by take_while");
155155+ self.keys_at.push(op.into());
156156+ });
68157 }
69158}
70159···105194) -> anyhow::Result<()> {
106195 let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL);
107196 let mut prev_last: Option<LastOp> = after.map(Into::into);
197197+ let mut boundary_state: Option<PageBoundaryState> = None;
108198 loop {
109199 tick.tick().await;
110200···115205 };
116206117207 let (mut page, next_last) = get_page(url).await?;
118118- if let Some(ref pl) = prev_last {
119119- page.only_after_last(pl);
208208+ if let Some(ref mut state) = boundary_state {
209209+ state.apply_to_next(&mut page);
210210+ } else {
211211+ boundary_state = PageBoundaryState::new(&mut page);
120212 }
121213 if !page.is_empty() {
122214 match dest.try_send(page) {