···4444 /// records with multiple links are single-counted.
4545 /// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it.
4646 pub linking_records: u64,
4747+4848+ /// first jetstream cursor when this instance first started
4949+ pub started_at: Option<u64>,
5050+5151+ /// anything else we want to throw in
5252+ pub other_data: HashMap<String, u64>,
4753}
48544955pub trait LinkStorage: Send + Sync {
+162-3
constellation/src/storage/rocks_store.rs
···2323 Arc,
2424};
2525use std::thread;
2626-use std::time::{Duration, Instant};
2626+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
2727use tokio_util::sync::CancellationToken;
28282929static DID_IDS_CF: &str = "did_ids";
···3232static LINK_TARGETS_CF: &str = "link_targets";
33333434static JETSTREAM_CURSOR_KEY: &str = "jetstream_cursor";
3535+static STARTED_AT_KEY: &str = "jetstream_first_cursor";
3636+// add reverse mappings for targets if this db was running before that was a thing
3737+static TARGET_ID_REPAIR_STATE_KEY: &str = "target_id_table_repair_state";
3838+3939+static COZY_FIRST_CURSOR: u64 = 1_738_083_600_000_000; // constellation.microcosm.blue started
4040+4141+#[derive(Debug, Clone, Serialize, Deserialize)]
4242+struct TargetIdRepairState {
4343+ /// start time for repair, microseconds timestamp
4444+ current_us_started_at: u64,
4545+ /// id table's latest id when repair started
4646+ id_when_started: u64,
4747+ /// id table id
4848+ latest_repaired_i: u64,
4949+}
5050+impl AsRocksValue for TargetIdRepairState {}
5151+impl ValueFromRocks for TargetIdRepairState {}
35523653// todo: actually understand and set these options probably better
3754fn rocks_opts_base() -> Options {
···139156 _key_marker: PhantomData,
140157 _val_marker: PhantomData,
141158 name: name.into(),
142142- id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninint", first seq num will be 1
159159+ id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninit", first seq num will be 1
143160 }
144161 }
145162 fn get_id_val(
···228245 }
229246}
230247248248+fn now() -> u64 {
249249+ SystemTime::now()
250250+ .duration_since(UNIX_EPOCH)
251251+ .unwrap()
252252+ .as_micros() as u64
253253+}
254254+231255impl RocksStorage {
232256 pub fn new(path: impl AsRef<Path>) -> Result<Self> {
233257 Self::describe_metrics();
234234- RocksStorage::open_readmode(path, false)
258258+ let me = RocksStorage::open_readmode(path, false)?;
259259+ me.global_init()?;
260260+ Ok(me)
235261 }
236262237263 pub fn open_readonly(path: impl AsRef<Path>) -> Result<Self> {
···242268 let did_id_table = IdTable::setup(DID_IDS_CF);
243269 let target_id_table = IdTable::setup(TARGET_IDS_CF);
244270271271+ // note: global stuff like jetstream cursor goes in the default cf
272272+ // these are bonus extra cfs
245273 let cfs = vec![
246274 // id reference tables
247275 did_id_table.cf_descriptor(),
···275303 is_writer: !readonly,
276304 backup_task: None.into(),
277305 })
306306+ }
307307+308308+ fn global_init(&self) -> Result<()> {
309309+ let first_run = self.db.get(JETSTREAM_CURSOR_KEY)?.is_some();
310310+ if first_run {
311311+ self.db.put(STARTED_AT_KEY, _rv(now()))?;
312312+313313+ // hack / temporary: if we're a new db, put in a completed repair
314314+ // state so we don't run repairs (repairs are for old-code dbs)
315315+ let completed = TargetIdRepairState {
316316+ id_when_started: 0,
317317+ current_us_started_at: 0,
318318+ latest_repaired_i: 0,
319319+ };
320320+ self.db.put(TARGET_ID_REPAIR_STATE_KEY, _rv(completed))?;
321321+ }
322322+ Ok(())
323323+ }
324324+325325+ pub fn run_repair(&self, breather: Duration, stay_alive: CancellationToken) -> Result<bool> {
326326+ let mut state = match self
327327+ .db
328328+ .get(TARGET_ID_REPAIR_STATE_KEY)?
329329+ .map(|s| _vr(&s))
330330+ .transpose()?
331331+ {
332332+ Some(s) => s,
333333+ None => TargetIdRepairState {
334334+ id_when_started: self.did_id_table.priv_id_seq,
335335+ current_us_started_at: now(),
336336+ latest_repaired_i: 0,
337337+ },
338338+ };
339339+340340+ eprintln!("initial repair state: {state:?}");
341341+342342+ let cf = self.db.cf_handle(TARGET_IDS_CF).unwrap();
343343+344344+ let mut iter = self.db.raw_iterator_cf(&cf);
345345+ iter.seek_to_first();
346346+347347+ eprintln!("repair iterator sent to first key");
348348+349349+ // skip ahead if we're done some, or take a single first step
350350+ for _ in 0..state.latest_repaired_i {
351351+ iter.next();
352352+ }
353353+354354+ eprintln!(
355355+ "repair iterator skipped to {}th key",
356356+ state.latest_repaired_i
357357+ );
358358+359359+ let mut maybe_done = false;
360360+361361+ while !stay_alive.is_cancelled() && !maybe_done {
362362+ // let mut batch = WriteBatch::default();
363363+364364+ let mut any_written = false;
365365+366366+ for _ in 0..1000 {
367367+ if state.latest_repaired_i % 1_000_000 == 0 {
368368+ eprintln!("target iter at {}", state.latest_repaired_i);
369369+ }
370370+ state.latest_repaired_i += 1;
371371+372372+ if !iter.valid() {
373373+ eprintln!("invalid iter, are we done repairing?");
374374+ maybe_done = true;
375375+ break;
376376+ };
377377+378378+ // eprintln!("iterator seems to be valid! getting the key...");
379379+ let raw_key = iter.key().unwrap();
380380+ if raw_key.len() == 8 {
381381+ // eprintln!("found an 8-byte key, skipping it since it's probably an id...");
382382+ iter.next();
383383+ continue;
384384+ }
385385+ let target: TargetKey = _kr::<TargetKey>(raw_key)?;
386386+ let target_id: TargetId = _vr(iter.value().unwrap())?;
387387+388388+ self.db
389389+ .put_cf(&cf, target_id.id().to_be_bytes(), _rv(&target))?;
390390+ any_written = true;
391391+ iter.next();
392392+ }
393393+394394+ if any_written {
395395+ self.db
396396+ .put(TARGET_ID_REPAIR_STATE_KEY, _rv(state.clone()))?;
397397+ std::thread::sleep(breather);
398398+ }
399399+ }
400400+401401+ eprintln!("repair iterator done.");
402402+403403+ Ok(false)
278404 }
279405280406 pub fn start_backup(
···11791305 .map(|s| s.parse::<u64>())
11801306 .transpose()?
11811307 .unwrap_or(0);
13081308+ let started_at = self
13091309+ .db
13101310+ .get(STARTED_AT_KEY)?
13111311+ .map(|c| _vr(&c))
13121312+ .transpose()?
13131313+ .unwrap_or(COZY_FIRST_CURSOR);
13141314+13151315+ let other_data = self
13161316+ .db
13171317+ .get(TARGET_ID_REPAIR_STATE_KEY)?
13181318+ .map(|s| _vr(&s))
13191319+ .transpose()?
13201320+ .map(
13211321+ |TargetIdRepairState {
13221322+ current_us_started_at,
13231323+ id_when_started,
13241324+ latest_repaired_i,
13251325+ }| {
13261326+ HashMap::from([
13271327+ ("current_us_started_at".to_string(), current_us_started_at),
13281328+ ("id_when_started".to_string(), id_when_started),
13291329+ ("latest_repaired_i".to_string(), latest_repaired_i),
13301330+ ])
13311331+ },
13321332+ )
13331333+ .unwrap_or(HashMap::default());
13341334+11821335 Ok(StorageStats {
11831336 dids,
11841337 targetables,
11851338 linking_records,
13391339+ started_at: Some(started_at),
13401340+ other_data,
11861341 })
11871342 }
11881343}
···12081363impl AsRocksValue for &TargetId {}
12091364impl KeyFromRocks for TargetKey {}
12101365impl ValueFromRocks for TargetId {}
13661366+13671367+// temp?
13681368+impl KeyFromRocks for TargetId {}
13691369+impl AsRocksValue for &TargetKey {}
1211137012121371// target_links table
12131372impl AsRocksKey for &TargetId {}
+12-2
constellation/templates/hello.html.j2
···1919 <p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p>
20202121 <p>
2222- This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">{{ days_indexed|human_number }}</span> days.<br/>
2222+ This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">
2323+ {%- if let Some(days) = days_indexed %}
2424+ {{ days|human_number }}
2525+ {% else %}
2626+ ???
2727+ {% endif -%}
2828+ </span> days.<br/>
2329 <small>(indexing new records in real time, backfill coming soon!)</small>
2430 </p>
25312626- <p>But feel free to use it! If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>
3232+ {# {% for k, v in stats.other_data.iter() %}
3333+ <p><strong>{{ k }}</strong>: {{ v }}</p>
3434+ {% endfor %} #}
3535+3636+ <p>You're welcome to use this public instance! Please do not build the torment nexus. If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>
273728382939 <h2>API Endpoints</h2>