tangled
alpha
login
or
join now
ptr.pet
/
Allegedly
forked from
microcosm.blue/Allegedly
0
fork
atom
Server tools to backfill, tail, mirror, and verify PLC logs
0
fork
atom
overview
issues
pulls
pipelines
tail: deduplicate
bad-example.com
6 months ago
815336da
e32ad0df
+93
-15
2 changed files
expand all
collapse all
unified
split
src
lib.rs
poll.rs
+14
-2
src/lib.rs
···
13
13
/// One page of PLC export
14
14
///
15
15
/// Not limited, but expected to have up to about 1000 lines
16
16
+
#[derive(Debug)]
16
17
pub struct ExportPage {
17
18
pub ops: Vec<String>,
18
19
}
19
20
20
20
-
#[derive(Deserialize)]
21
21
+
impl ExportPage {
22
22
+
pub fn is_empty(&self) -> bool {
23
23
+
self.ops.is_empty()
24
24
+
}
25
25
+
}
26
26
+
27
27
+
#[derive(Debug, Deserialize)]
21
28
#[serde(rename_all = "camelCase")]
22
22
-
pub struct OpPeek {
29
29
+
pub struct Op<'a> {
30
30
+
pub did: &'a str,
31
31
+
pub cid: &'a str,
23
32
pub created_at: Dt,
33
33
+
pub nullified: bool,
34
34
+
#[serde(borrow)]
35
35
+
pub operation: &'a serde_json::value::RawValue,
24
36
}
25
37
26
38
pub fn bin_init(name: &str) {
+79
-13
src/poll.rs
···
1
1
-
use crate::{CLIENT, Dt, ExportPage, OpPeek};
1
1
+
use crate::{CLIENT, Dt, ExportPage, Op};
2
2
use std::time::Duration;
3
3
use thiserror::Error;
4
4
use url::Url;
···
13
13
SerdeError(#[from] serde_json::Error),
14
14
}
15
15
16
16
-
pub async fn get_page(url: Url) -> Result<(ExportPage, Option<Dt>), GetPageError> {
16
16
+
/// ops are primary-keyed by (did, cid)
17
17
+
/// plc orders by `created_at` but does not guarantee distinct times per op
18
18
+
/// we assume that the order will at least be deterministic: this may be unsound
19
19
+
#[derive(Debug, PartialEq)]
20
20
+
pub struct LastOp {
21
21
+
created_at: Dt, // any op greater is definitely not duplicated
22
22
+
pk: (String, String), // did, cid
23
23
+
}
24
24
+
25
25
+
impl From<Op<'_>> for LastOp {
26
26
+
fn from(op: Op) -> Self {
27
27
+
Self {
28
28
+
created_at: op.created_at,
29
29
+
pk: (op.did.to_string(), op.cid.to_string()),
30
30
+
}
31
31
+
}
32
32
+
}
33
33
+
34
34
+
impl From<Dt> for LastOp {
35
35
+
fn from(dt: Dt) -> Self {
36
36
+
Self {
37
37
+
created_at: dt,
38
38
+
pk: ("".to_string(), "".to_string()),
39
39
+
}
40
40
+
}
41
41
+
}
42
42
+
43
43
+
impl ExportPage {
44
44
+
/// this is a (slightly flawed) op deduplicator
45
45
+
fn only_after_last(&mut self, last_op: &LastOp) {
46
46
+
loop {
47
47
+
let Some(s) = self.ops.first().cloned() else {
48
48
+
break;
49
49
+
};
50
50
+
let Ok(op) = serde_json::from_str::<Op>(&s) else {
51
51
+
log::warn!(
52
52
+
"deduplication failed op parsing ({s:?}), bailing for downstream to deal with."
53
53
+
);
54
54
+
break;
55
55
+
};
56
56
+
if op.created_at > last_op.created_at {
57
57
+
break;
58
58
+
}
59
59
+
log::trace!("dedup: dropping an op");
60
60
+
self.ops.remove(0);
61
61
+
if Into::<LastOp>::into(op) == *last_op {
62
62
+
log::trace!("dedup: found exact op, keeping all after here");
63
63
+
break;
64
64
+
}
65
65
+
}
66
66
+
}
67
67
+
}
68
68
+
69
69
+
pub async fn get_page(url: Url) -> Result<(ExportPage, Option<LastOp>), GetPageError> {
17
70
log::trace!("Getting page: {url}");
18
71
19
72
let ops: Vec<String> = CLIENT
···
25
78
.await?
26
79
.trim()
27
80
.split('\n')
81
81
+
.filter_map(|s| {
82
82
+
let s = s.trim();
83
83
+
if s.is_empty() { None } else { Some(s) }
84
84
+
})
28
85
.map(Into::into)
29
86
.collect();
30
87
31
31
-
let last_at = ops
88
88
+
let last_op = ops
32
89
.last()
33
90
.filter(|s| !s.is_empty())
34
34
-
.map(|s| serde_json::from_str::<OpPeek>(s))
91
91
+
.map(|s| serde_json::from_str::<Op>(s))
35
92
.transpose()?
36
36
-
.map(|o| o.created_at)
37
37
-
.inspect(|at| log::trace!("new last_at: {at}"));
93
93
+
.map(Into::into)
94
94
+
.inspect(|at| log::trace!("new last op: {at:?}"));
38
95
39
39
-
Ok((ExportPage { ops }, last_at))
96
96
+
Ok((ExportPage { ops }, last_op))
40
97
}
41
98
42
99
pub async fn poll_upstream(
···
45
102
dest: flume::Sender<ExportPage>,
46
103
) -> anyhow::Result<()> {
47
104
let mut tick = tokio::time::interval(UPSTREAM_REQUEST_INTERVAL);
48
48
-
let mut after = after;
105
105
+
let mut prev_last: Option<LastOp> = after.map(Into::into);
49
106
loop {
50
107
tick.tick().await;
108
108
+
51
109
let mut url = base.clone();
52
52
-
if let Some(a) = after {
53
53
-
url.query_pairs_mut().append_pair("after", &a.to_rfc3339());
110
110
+
if let Some(ref pl) = prev_last {
111
111
+
url.query_pairs_mut()
112
112
+
.append_pair("after", &pl.created_at.to_rfc3339());
54
113
};
55
55
-
let (page, next_after) = get_page(url).await?;
56
56
-
dest.send_async(page).await?;
57
57
-
after = next_after.or(after);
114
114
+
115
115
+
let (mut page, next_last) = get_page(url).await?;
116
116
+
if let Some(ref pl) = prev_last {
117
117
+
page.only_after_last(pl);
118
118
+
}
119
119
+
if !page.is_empty() {
120
120
+
dest.send_async(page).await?;
121
121
+
}
122
122
+
123
123
+
prev_last = next_last.or(prev_last);
58
124
}
59
125
}