···11+-- Add migration script here
22+33+ALTER TABLE records ADD cid TEXT;
44+55+UPDATE records SET cid = '' WHERE cid IS NULL;
66+77+ALTER TABLE records ALTER COLUMN cid SET NOT NULL;
+23-9
src/backfill/mod.rs
···8899use std::str::FromStr;
10101111+use ipld_core::cid::multibase::Base;
1112use jacquard::url::Url;
1213use sqlx::{Pool, Postgres, query};
1314use thiserror::Error;
···2021pub mod load_car;
2122pub mod parse_car;
22232323-const DB_MAX_REQ: usize = 65535;
2424-2524#[derive(Error, Debug)]
2625pub enum Error {
2726 #[error("Error parsing TID: {}", .0)]
···3231 Db(#[from] sqlx::Error),
3332 #[error("{}", .0)]
3433 ParseCar(#[from] crate::backfill::parse_car::Error),
3434+ #[error("Error processing cid: {}", .0)]
3535+ Cid(#[from] ipld_core::cid::Error),
3536}
36373738pub async fn backfill(
···5253 // only real overhead is network latency which would be ~= anyway
5354 let _ = query!("DELETE FROM records").execute(conn).await?;
54555555- let data = parse_car(&car).await?;
5656- let data = data.chunks(DB_MAX_REQ / 4);
5656+ let data = parse_car(&car)
5757+ .await?
5858+ .into_iter()
5959+ .map(|(collection, rkey, cid, value)| {
6060+ Ok::<_, Error>((
6161+ collection,
6262+ rkey,
6363+ cid.to_string_of_base(Base::Base32Lower)?,
6464+ value,
6565+ ))
6666+ })
6767+ .collect::<Result<Vec<_>, _>>()?;
6868+ let data = data.chunks(config::DB_MAX_REQ / 4);
57695870 if let Some(time) = time {
5971 println!("Parsed car file ({:?})", time.elapsed());
···6173 let time = time.map(|_| std::time::Instant::now());
62746375 for data in data {
6464- let mut query = sqlx::QueryBuilder::new("INSERT INTO records(collection, rkey, record) ");
7676+ let mut query =
7777+ sqlx::QueryBuilder::new("INSERT INTO records(collection, rkey, cid, record) ");
6578 query.push_values(
6666- data,
7979+ data.to_owned(),
6780 |mut b: sqlx::query_builder::Separated<'_, '_, Postgres, &'static str>, data| {
6868- b.push_bind(data.0.0.clone())
6969- .push_bind(data.0.1.clone())
7070- .push_bind(data.1.clone());
8181+ b.push_bind(data.0)
8282+ .push_bind(data.1)
8383+ .push_bind(data.2)
8484+ .push_bind(data.3);
7185 },
7286 );
7387
+15-10
src/backfill/parse_car.rs
···2020 IpldToJson(#[from] crate::utils::ipld_json::Error),
2121 #[error("Could not break {} into a collection and rkey", .0)]
2222 MalformedRecordKey(SmolStr),
2323+ #[error("Could not generate cid for commit: {}", .0)]
2424+ Cid(#[from] ipld_core::cid::Error),
2325}
24262525-pub type AccountData = Vec<((String, String), Value)>;
2727+pub type AccountData = Vec<(String, String, CidGeneric<64>, Value)>;
26282729pub async fn parse_car(car: &Car) -> Result<AccountData, Error> {
2828- let (keys, records): (Vec<SmolStr>, Vec<CidGeneric<64>>) =
3030+ let (keys, record_cids): (Vec<SmolStr>, Vec<CidGeneric<64>>) =
2931 car.mst.leaves().await?.into_iter().unzip();
30323133 // convert keys into (collection, rkey)
···4749 .collect::<Result<Vec<_>, _>>()?;
48504951 // convert records into Value
5050- let records = &records[..];
5252+ let record_cids = &record_cids[..];
5153 let records = car
5254 .storage
5353- .get_many(records)
5555+ .get_many(record_cids)
5456 .await?
5557 .into_iter()
5658 .collect::<Option<Vec<_>>>()
5757- .ok_or_else(|| Error::MissingCid)?
5858- .into_iter()
5959- .map(|x| {
6060- let data = serde_ipld_dagcbor::from_slice::<Ipld>(&x)?;
5959+ .ok_or_else(|| Error::MissingCid)?;
6060+6161+ let records = zip(records, record_cids)
6262+ .map(|(bytes, cid)| {
6363+ let data = serde_ipld_dagcbor::from_slice::<Ipld>(&bytes)?;
6164 let value = ipld_to_json_value(&data)?;
6262- Ok::<_, Error>(value)
6565+ Ok::<(CidGeneric<64>, Value), Error>((cid.to_owned(), value))
6366 })
6467 .collect::<Result<Vec<_>, _>>()?;
65686666- let data = zip(keys, records).collect::<Vec<((_, _), _)>>();
6969+ let data = zip(keys, records)
7070+ .map(|((collection, rkey), (cid, record))| (collection, rkey, cid, record))
7171+ .collect();
67726873 Ok(data)
6974}
+2
src/config.rs
···77use std::env;
88use std::sync::LazyLock;
991010+pub const DB_MAX_REQ: usize = 65535;
1111+1012// this should be loaded before the program starts any threads
1113// if this panics threads that access it will be poisoned
1214pub static USER: LazyLock<Did<'static>> = LazyLock::new(|| {