···1+use std::{cmp::Ordering, str::FromStr};
2+3+use jacquard::{types::tid::Tid, url::Url};
4+use sqlx::{Pool, Postgres, query};
5+use thiserror::Error;
6+7+use crate::{
8+ backfill::{load_car::load_car, parse_car::parse_car},
9+ config,
10+};
11+12+pub mod load_car;
13+pub mod parse_car;
14+15+const DB_MAX_REQ: usize = 65535;
16+17+#[derive(Error, Debug)]
18+pub enum Error {
19+ #[error("Error parsing TID: {}", .0)]
20+ TidParse(#[from] jacquard::types::string::AtStrError),
21+ #[error("{}", .0)]
22+ GetCarError(#[from] crate::backfill::load_car::Error),
23+ #[error(
24+ "The database claims to be more up to date than the PDS.
25+Most likely either the PDS or repo is broken, or the database has been corrupted.
26+Check your PDS repo is working and/or drop the database."
27+ )]
28+ DbTidTooLow,
29+ #[error("Database error: {}", .0)]
30+ DbError(#[from] sqlx::Error),
31+ #[error("{}", .0)]
32+ ParseCarError(#[from] crate::backfill::parse_car::Error),
33+}
34+35+/// backfill works as follows (https://docs.bsky.app/docs/advanced-guides/backfill)
36+///
37+/// 1. resolve did -> pds
38+/// 2. stream com.atproto.sync.subscribeRepos to a buffer
39+/// 3. get a car file from com.atproto.sync.getRepo (diff if a rev is stored in database)
40+/// 4. apply car file diff to database (incl rev)
41+/// 5. start playing events from buffer
42+/// 1. drop all events from other users
43+/// 2. drop all events with a lower rev than current rev
44+/// 3. apply event & update rev
45+/// 4. (non blocking) get blobs if missing
46+/// 5. (non blocking) parse for strongref and store strongrefs
47+/// 6. (non blocking) trigger garbage collection of blobs and strongref
48+/// 6. once buffer is empty, parse events live
49+pub async fn backfill(pds: &str, conn: &Pool<Postgres>) -> Result<(), Error> {
50+ let db_rev = if let Some(rev) = query!(
51+ "SELECT (rev) FROM meta WHERE did = $1",
52+ config::USER.to_string()
53+ )
54+ .fetch_one(conn)
55+ .await
56+ .ok()
57+ .and_then(|x| x.rev)
58+ {
59+ Tid::from_str(&rev)?
60+ } else {
61+ Tid::from_time(0, 0)
62+ };
63+64+ let pds = Url::from_str(&format!("https://{pds}/")).unwrap();
65+ let car = load_car(config::USER.clone(), pds).await?;
66+67+ match car.partial_cmp(&db_rev) {
68+ Some(val) => match val {
69+ // car rev newer than db rev
70+ // continue on; every other branch diverges
71+ Ordering::Greater => {}
72+ // revisions are the same so we can skip backfill
73+ Ordering::Equal => return Ok(()),
74+ // db rev newer than car rev
75+ // this means the db or car file is borked
76+ // panic out and let the user deal with things
77+ Ordering::Less => return Err(Error::DbTidTooLow),
78+ // panic!(
79+ // r"The database claims to be more up to date than the PDS.
80+ // Most likely either the PDS or repo is broken, or the database has been corrupted.
81+ // Check your PDS repo is working and/or drop the database."
82+ // ),
83+ },
84+ // cant compare rev so assume all is ok and continue
85+ None => {}
86+ };
87+88+ // erase all old records and return if it fails
89+ // we dont use diffs bc theyre complex and the overhead is minimal rn
90+ // only real overhead is network latency which would be ~= anyway
91+ let _ = query!("DELETE FROM records").execute(conn).await?;
92+93+ let data = parse_car(&car).await?;
94+ let mut data = data.chunks(DB_MAX_REQ / 4);
95+96+ while let Some(data) = data.next() {
97+ let mut query = sqlx::QueryBuilder::new("INSERT INTO records(collection, rkey, record) ");
98+ query.push_values(
99+ data,
100+ |mut b: sqlx::query_builder::Separated<'_, '_, Postgres, &'static str>, data| {
101+ b.push_bind(data.0.0.clone())
102+ .push_bind(data.0.1.clone())
103+ .push_bind(data.1.clone());
104+ },
105+ );
106+107+ match query.build().execute(conn).await {
108+ Err(err) => {
109+ // couldnt backfill so go nuclear
110+ // this is program startup so its prolly safe lol
111+ println!("Got error \"{}\"\nDeleting records and exiting...", err);
112+ let _ = query!("DELETE FROM records").execute(conn).await?;
113+ panic!()
114+ }
115+ _ => {}
116+ };
117+ }
118+119+ match query!(
120+ "UPDATE meta SET rev = $1 WHERE did = $2",
121+ car.rev.to_string(),
122+ config::USER.to_string()
123+ )
124+ .execute(conn)
125+ .await
126+ {
127+ Err(err) => {
128+ // couldnt save tid so go nuclear
129+ // this is program startup so its prolly safe lol
130+ println!("Got error \"{}\"\nDeleting records and exiting...", err);
131+ let _ = query!("DELETE FROM records").execute(conn).await?;
132+ panic!()
133+ }
134+ _ => {}
135+ };
136+137+ Ok(())
138+}