tangled
alpha
login
or
join now
ptr.pet
/
Allegedly
forked from
microcosm.blue/Allegedly
0
fork
atom
Server tools to backfill, tail, mirror, and verify PLC logs
0
fork
atom
overview
issues
pulls
pipelines
tests for the horrible boundary dedup
bad-example.com
5 months ago
c8224075
1ed0de3f
+399
-10
1 changed file
expand all
collapse all
unified
split
src
poll.rs
+399
-10
src/poll.rs
···
43
43
}
44
44
45
45
/// PLC
46
46
+
#[derive(Debug, PartialEq)]
46
47
struct PageBoundaryState {
47
48
last_at: Dt,
48
49
keys_at: Vec<OpKey>, // expected to ~always be length one
···
64
65
// should unrefactor to make Op own its data again, parse (and deal with errors)
65
66
// upfront, and probably greatly simplify everything downstream. simple.
66
67
impl PageBoundaryState {
67
67
-
fn new(page: &mut ExportPage) -> Option<Self> {
68
68
+
fn new(page: &ExportPage) -> Option<Self> {
69
69
+
let mut skips = 0;
70
70
+
68
71
// grab the very last op
69
72
let (last_at, last_key) = loop {
70
70
-
let Some(s) = page.ops.last().cloned() else {
73
73
+
let Some(s) = page.ops.iter().rev().nth(skips).cloned() else {
71
74
// there are no ops left? oop. bail.
72
75
// last_at and existing keys remain in tact if there was no later op
73
76
return None;
74
77
};
75
78
if s.is_empty() {
76
76
-
// annoying: trim off any trailing blank lines
77
77
-
page.ops.pop();
79
79
+
// annoying: ignore any trailing blank lines
80
80
+
skips += 1;
78
81
continue;
79
82
}
80
83
let Ok(op) = serde_json::from_str::<Op>(&s)
81
84
.inspect_err(|e| log::warn!("deduplication failed last op parsing ({s:?}: {e}), ignoring for downstream to deal with."))
82
85
else {
83
86
// doubly annoying: skip over trailing garbage??
87
87
+
skips += 1;
84
88
continue;
85
89
};
86
90
break (op.created_at, Into::<OpKey>::into(&op));
···
93
97
};
94
98
95
99
// and make sure all keys at this time are captured from the back
96
96
-
me.capture_nth_last_at(page, last_at);
100
100
+
me.capture_nth_last_at(page, last_at, skips);
97
101
98
102
Some(me)
99
103
}
···
119
123
}
120
124
121
125
// grab the very last op
126
126
+
let mut skips = 0;
122
127
let (last_at, last_key) = loop {
123
123
-
let Some(s) = page.ops.last().cloned() else {
128
128
+
let Some(s) = page.ops.iter().rev().nth(skips).cloned() else {
124
129
// there are no ops left? oop. bail.
125
130
// last_at and existing keys remain in tact if there was no later op
126
131
return;
127
132
};
128
133
if s.is_empty() {
129
134
// annoying: trim off any trailing blank lines
130
130
-
page.ops.pop();
135
135
+
skips += 1;
131
136
continue;
132
137
}
133
138
let Ok(op) = serde_json::from_str::<Op>(&s)
134
139
.inspect_err(|e| log::warn!("deduplication failed last op parsing ({s:?}: {e}), ignoring for downstream to deal with."))
135
140
else {
136
141
// doubly annoying: skip over trailing garbage??
142
142
+
skips += 1;
137
143
continue;
138
144
};
139
145
break (op.created_at, Into::<OpKey>::into(&op));
···
146
152
} else {
147
153
// weird cases: either time didn't move (fine...) or went backwards (not fine)
148
154
assert_eq!(last_at, self.last_at, "time moved backwards on a page");
155
155
+
self.keys_at.push(last_key);
149
156
}
150
157
// and make sure all keys at this time are captured from the back
151
151
-
self.capture_nth_last_at(page, last_at);
158
158
+
self.capture_nth_last_at(page, last_at, skips);
152
159
}
153
160
154
161
/// walk backwards from 2nd last and collect keys until created_at changes
155
155
-
fn capture_nth_last_at(&mut self, page: &mut ExportPage, last_at: Dt) {
162
162
+
fn capture_nth_last_at(&mut self, page: &ExportPage, last_at: Dt, skips: usize) {
156
163
page.ops
157
164
.iter()
158
165
.rev()
166
166
+
.skip(skips)
159
167
.skip(1) // we alredy added the very last one
160
168
.map(|s| serde_json::from_str::<Op>(s).inspect_err(|e|
161
169
log::warn!("deduplication failed op parsing ({s:?}: {e}), bailing for downstream to deal with.")))
···
218
226
if let Some(ref mut state) = boundary_state {
219
227
state.apply_to_next(&mut page);
220
228
} else {
221
221
-
boundary_state = PageBoundaryState::new(&mut page);
229
229
+
boundary_state = PageBoundaryState::new(&page);
222
230
}
223
231
if !page.is_empty() {
224
232
match dest.try_send(page) {
···
234
242
prev_last = next_last.or(prev_last);
235
243
}
236
244
}
245
245
+
246
246
+
#[cfg(test)]
247
247
+
mod test {
248
248
+
use super::*;
249
249
+
250
250
+
const FIVES_TS: i64 = 1431648000;
251
251
+
const NEXT_TS: i64 = 1431648001;
252
252
+
253
253
+
fn valid_op() -> serde_json::Value {
254
254
+
serde_json::json!({
255
255
+
"did": "did",
256
256
+
"cid": "cid",
257
257
+
"createdAt": "2015-05-15T00:00:00Z",
258
258
+
"nullified": false,
259
259
+
"operation": {},
260
260
+
})
261
261
+
}
262
262
+
263
263
+
fn next_op() -> serde_json::Value {
264
264
+
serde_json::json!({
265
265
+
"did": "didnext",
266
266
+
"cid": "cidnext",
267
267
+
"createdAt": "2015-05-15T00:00:01Z",
268
268
+
"nullified": false,
269
269
+
"operation": {},
270
270
+
})
271
271
+
}
272
272
+
273
273
+
fn base_state() -> PageBoundaryState {
274
274
+
let page = ExportPage {
275
275
+
ops: vec![valid_op().to_string()],
276
276
+
};
277
277
+
PageBoundaryState::new(&page).unwrap()
278
278
+
}
279
279
+
280
280
+
#[test]
281
281
+
fn test_boundary_new_empty() {
282
282
+
let page = ExportPage { ops: vec![] };
283
283
+
let state = PageBoundaryState::new(&page);
284
284
+
assert!(state.is_none());
285
285
+
}
286
286
+
287
287
+
#[test]
288
288
+
fn test_boundary_new_empty_op() {
289
289
+
let page = ExportPage {
290
290
+
ops: vec!["".to_string()],
291
291
+
};
292
292
+
let state = PageBoundaryState::new(&page);
293
293
+
assert!(state.is_none());
294
294
+
}
295
295
+
296
296
+
#[test]
297
297
+
fn test_boundary_new_ignores_bad_op() {
298
298
+
let page = ExportPage {
299
299
+
ops: vec!["bad".to_string()],
300
300
+
};
301
301
+
let state = PageBoundaryState::new(&page);
302
302
+
assert!(state.is_none());
303
303
+
}
304
304
+
305
305
+
#[test]
306
306
+
fn test_boundary_new_multiple_bad_end() {
307
307
+
let page = ExportPage {
308
308
+
ops: vec![
309
309
+
"bad".to_string(),
310
310
+
"".to_string(),
311
311
+
"foo".to_string(),
312
312
+
"".to_string(),
313
313
+
],
314
314
+
};
315
315
+
let state = PageBoundaryState::new(&page);
316
316
+
assert!(state.is_none());
317
317
+
}
318
318
+
319
319
+
#[test]
320
320
+
fn test_boundary_new_one_op() {
321
321
+
let page = ExportPage {
322
322
+
ops: vec![valid_op().to_string()],
323
323
+
};
324
324
+
let state = PageBoundaryState::new(&page).unwrap();
325
325
+
assert_eq!(state.last_at, Dt::from_timestamp(FIVES_TS, 0).unwrap());
326
326
+
assert_eq!(
327
327
+
state.keys_at,
328
328
+
vec![OpKey {
329
329
+
cid: "cid".to_string(),
330
330
+
did: "did".to_string(),
331
331
+
}]
332
332
+
);
333
333
+
}
334
334
+
335
335
+
#[test]
336
336
+
fn test_boundary_new_one_op_with_stuff() {
337
337
+
let expect_same_state = |m, ops| {
338
338
+
let this_state = PageBoundaryState::new(&ExportPage { ops }).unwrap();
339
339
+
assert_eq!(this_state, base_state(), "{}", m);
340
340
+
};
341
341
+
342
342
+
expect_same_state("empty before", vec!["".to_string(), valid_op().to_string()]);
343
343
+
344
344
+
expect_same_state("empty after", vec![valid_op().to_string(), "".to_string()]);
345
345
+
346
346
+
expect_same_state(
347
347
+
"bad before, empty after",
348
348
+
vec!["bad".to_string(), valid_op().to_string(), "".to_string()],
349
349
+
);
350
350
+
351
351
+
expect_same_state(
352
352
+
"bad and empty before and after",
353
353
+
vec![
354
354
+
"".to_string(),
355
355
+
"bad".to_string(),
356
356
+
valid_op().to_string(),
357
357
+
"".to_string(),
358
358
+
"bad".to_string(),
359
359
+
],
360
360
+
);
361
361
+
}
362
362
+
363
363
+
#[test]
364
364
+
fn test_add_new_empty() {
365
365
+
let mut state = base_state();
366
366
+
state.apply_to_next(&mut ExportPage { ops: vec![] });
367
367
+
assert_eq!(state, base_state());
368
368
+
}
369
369
+
370
370
+
#[test]
371
371
+
fn test_add_new_empty_op() {
372
372
+
let mut state = base_state();
373
373
+
state.apply_to_next(&mut ExportPage {
374
374
+
ops: vec!["".to_string()],
375
375
+
});
376
376
+
assert_eq!(state, base_state());
377
377
+
}
378
378
+
379
379
+
#[test]
380
380
+
fn test_add_new_ignores_bad_op() {
381
381
+
let mut state = base_state();
382
382
+
state.apply_to_next(&mut ExportPage {
383
383
+
ops: vec!["bad".to_string()],
384
384
+
});
385
385
+
assert_eq!(state, base_state());
386
386
+
}
387
387
+
388
388
+
#[test]
389
389
+
fn test_add_new_multiple_bad() {
390
390
+
let mut page = ExportPage {
391
391
+
ops: vec![
392
392
+
"bad".to_string(),
393
393
+
"".to_string(),
394
394
+
"foo".to_string(),
395
395
+
"".to_string(),
396
396
+
],
397
397
+
};
398
398
+
399
399
+
let mut state = base_state();
400
400
+
state.apply_to_next(&mut page);
401
401
+
assert_eq!(state, base_state());
402
402
+
}
403
403
+
404
404
+
#[test]
405
405
+
fn test_add_new_same_op() {
406
406
+
let mut page = ExportPage {
407
407
+
ops: vec![valid_op().to_string()],
408
408
+
};
409
409
+
let mut state = base_state();
410
410
+
state.apply_to_next(&mut page);
411
411
+
assert_eq!(state, base_state());
412
412
+
}
413
413
+
414
414
+
#[test]
415
415
+
fn test_add_new_same_time() {
416
416
+
// make an op with a different OpKey
417
417
+
let mut op = valid_op();
418
418
+
op.as_object_mut()
419
419
+
.unwrap()
420
420
+
.insert("cid".to_string(), "cid2".into());
421
421
+
let mut page = ExportPage {
422
422
+
ops: vec![op.to_string()],
423
423
+
};
424
424
+
425
425
+
let mut state = base_state();
426
426
+
state.apply_to_next(&mut page);
427
427
+
assert_eq!(state.last_at, Dt::from_timestamp(FIVES_TS, 0).unwrap());
428
428
+
assert_eq!(
429
429
+
state.keys_at,
430
430
+
vec![
431
431
+
OpKey {
432
432
+
cid: "cid".to_string(),
433
433
+
did: "did".to_string(),
434
434
+
},
435
435
+
OpKey {
436
436
+
cid: "cid2".to_string(),
437
437
+
did: "did".to_string(),
438
438
+
},
439
439
+
]
440
440
+
);
441
441
+
}
442
442
+
443
443
+
#[test]
444
444
+
fn test_add_new_same_time_dup_before() {
445
445
+
// make an op with a different OpKey
446
446
+
let mut op = valid_op();
447
447
+
op.as_object_mut()
448
448
+
.unwrap()
449
449
+
.insert("cid".to_string(), "cid2".into());
450
450
+
let mut page = ExportPage {
451
451
+
ops: vec![valid_op().to_string(), op.to_string()],
452
452
+
};
453
453
+
454
454
+
let mut state = base_state();
455
455
+
state.apply_to_next(&mut page);
456
456
+
assert_eq!(state.last_at, Dt::from_timestamp(FIVES_TS, 0).unwrap());
457
457
+
assert_eq!(
458
458
+
state.keys_at,
459
459
+
vec![
460
460
+
OpKey {
461
461
+
cid: "cid".to_string(),
462
462
+
did: "did".to_string(),
463
463
+
},
464
464
+
OpKey {
465
465
+
cid: "cid2".to_string(),
466
466
+
did: "did".to_string(),
467
467
+
},
468
468
+
]
469
469
+
);
470
470
+
}
471
471
+
472
472
+
#[test]
473
473
+
fn test_add_new_same_time_dup_after() {
474
474
+
// make an op with a different OpKey
475
475
+
let mut op = valid_op();
476
476
+
op.as_object_mut()
477
477
+
.unwrap()
478
478
+
.insert("cid".to_string(), "cid2".into());
479
479
+
let mut page = ExportPage {
480
480
+
ops: vec![op.to_string(), valid_op().to_string()],
481
481
+
};
482
482
+
483
483
+
let mut state = base_state();
484
484
+
state.apply_to_next(&mut page);
485
485
+
assert_eq!(state.last_at, Dt::from_timestamp(FIVES_TS, 0).unwrap());
486
486
+
assert_eq!(
487
487
+
state.keys_at,
488
488
+
vec![
489
489
+
OpKey {
490
490
+
cid: "cid".to_string(),
491
491
+
did: "did".to_string(),
492
492
+
},
493
493
+
OpKey {
494
494
+
cid: "cid2".to_string(),
495
495
+
did: "did".to_string(),
496
496
+
},
497
497
+
]
498
498
+
);
499
499
+
}
500
500
+
501
501
+
#[test]
502
502
+
fn test_add_new_same_time_blank_after() {
503
503
+
// make an op with a different OpKey
504
504
+
let mut op = valid_op();
505
505
+
op.as_object_mut()
506
506
+
.unwrap()
507
507
+
.insert("cid".to_string(), "cid2".into());
508
508
+
let mut page = ExportPage {
509
509
+
ops: vec![op.to_string(), "".to_string()],
510
510
+
};
511
511
+
512
512
+
let mut state = base_state();
513
513
+
state.apply_to_next(&mut page);
514
514
+
assert_eq!(state.last_at, Dt::from_timestamp(FIVES_TS, 0).unwrap());
515
515
+
assert_eq!(
516
516
+
state.keys_at,
517
517
+
vec![
518
518
+
OpKey {
519
519
+
cid: "cid".to_string(),
520
520
+
did: "did".to_string(),
521
521
+
},
522
522
+
OpKey {
523
523
+
cid: "cid2".to_string(),
524
524
+
did: "did".to_string(),
525
525
+
},
526
526
+
]
527
527
+
);
528
528
+
}
529
529
+
530
530
+
#[test]
531
531
+
fn test_add_new_next_time() {
532
532
+
let mut page = ExportPage {
533
533
+
ops: vec![next_op().to_string()],
534
534
+
};
535
535
+
let mut state = base_state();
536
536
+
state.apply_to_next(&mut page);
537
537
+
assert_eq!(state.last_at, Dt::from_timestamp(NEXT_TS, 0).unwrap());
538
538
+
assert_eq!(
539
539
+
state.keys_at,
540
540
+
vec![OpKey {
541
541
+
cid: "cidnext".to_string(),
542
542
+
did: "didnext".to_string(),
543
543
+
},]
544
544
+
);
545
545
+
}
546
546
+
547
547
+
#[test]
548
548
+
fn test_add_new_next_time_with_dup() {
549
549
+
let mut page = ExportPage {
550
550
+
ops: vec![valid_op().to_string(), next_op().to_string()],
551
551
+
};
552
552
+
let mut state = base_state();
553
553
+
state.apply_to_next(&mut page);
554
554
+
assert_eq!(state.last_at, Dt::from_timestamp(NEXT_TS, 0).unwrap());
555
555
+
assert_eq!(
556
556
+
state.keys_at,
557
557
+
vec![OpKey {
558
558
+
cid: "cidnext".to_string(),
559
559
+
did: "didnext".to_string(),
560
560
+
},]
561
561
+
);
562
562
+
assert_eq!(page.ops.len(), 1);
563
563
+
assert_eq!(page.ops[0], next_op().to_string());
564
564
+
}
565
565
+
566
566
+
#[test]
567
567
+
fn test_add_new_next_time_with_dup_and_new_prev_same_time() {
568
568
+
// make an op with a different OpKey
569
569
+
let mut op = valid_op();
570
570
+
op.as_object_mut()
571
571
+
.unwrap()
572
572
+
.insert("cid".to_string(), "cid2".into());
573
573
+
574
574
+
let mut page = ExportPage {
575
575
+
ops: vec![
576
576
+
valid_op().to_string(), // should get dropped
577
577
+
op.to_string(), // should be kept
578
578
+
next_op().to_string(),
579
579
+
],
580
580
+
};
581
581
+
let mut state = base_state();
582
582
+
state.apply_to_next(&mut page);
583
583
+
assert_eq!(state.last_at, Dt::from_timestamp(NEXT_TS, 0).unwrap());
584
584
+
assert_eq!(
585
585
+
state.keys_at,
586
586
+
vec![OpKey {
587
587
+
cid: "cidnext".to_string(),
588
588
+
did: "didnext".to_string(),
589
589
+
},]
590
590
+
);
591
591
+
assert_eq!(page.ops.len(), 2);
592
592
+
assert_eq!(page.ops[0], op.to_string());
593
593
+
assert_eq!(page.ops[1], next_op().to_string());
594
594
+
}
595
595
+
596
596
+
#[test]
597
597
+
fn test_add_new_next_time_with_dup_later_and_new_prev_same_time() {
598
598
+
// make an op with a different OpKey
599
599
+
let mut op = valid_op();
600
600
+
op.as_object_mut()
601
601
+
.unwrap()
602
602
+
.insert("cid".to_string(), "cid2".into());
603
603
+
604
604
+
let mut page = ExportPage {
605
605
+
ops: vec![
606
606
+
op.to_string(), // should be kept
607
607
+
valid_op().to_string(), // should get dropped
608
608
+
next_op().to_string(),
609
609
+
],
610
610
+
};
611
611
+
let mut state = base_state();
612
612
+
state.apply_to_next(&mut page);
613
613
+
assert_eq!(state.last_at, Dt::from_timestamp(NEXT_TS, 0).unwrap());
614
614
+
assert_eq!(
615
615
+
state.keys_at,
616
616
+
vec![OpKey {
617
617
+
cid: "cidnext".to_string(),
618
618
+
did: "didnext".to_string(),
619
619
+
},]
620
620
+
);
621
621
+
assert_eq!(page.ops.len(), 2);
622
622
+
assert_eq!(page.ops[0], op.to_string());
623
623
+
assert_eq!(page.ops[1], next_op().to_string());
624
624
+
}
625
625
+
}