···1+-- Add migration script here
2+3+ALTER TABLE records ADD cid TEXT;
4+5+UPDATE records SET cid = '' WHERE cid IS NULL;
6+7+ALTER TABLE records ALTER COLUMN cid SET NOT NULL;
+23-9
src/backfill/mod.rs
···89use std::str::FromStr;
10011use jacquard::url::Url;
12use sqlx::{Pool, Postgres, query};
13use thiserror::Error;
···20pub mod load_car;
21pub mod parse_car;
2223-const DB_MAX_REQ: usize = 65535;
24-25#[derive(Error, Debug)]
26pub enum Error {
27 #[error("Error parsing TID: {}", .0)]
···32 Db(#[from] sqlx::Error),
33 #[error("{}", .0)]
34 ParseCar(#[from] crate::backfill::parse_car::Error),
0035}
3637pub async fn backfill(
···52 // only real overhead is network latency which would be ~= anyway
53 let _ = query!("DELETE FROM records").execute(conn).await?;
5455- let data = parse_car(&car).await?;
56- let data = data.chunks(DB_MAX_REQ / 4);
000000000005758 if let Some(time) = time {
59 println!("Parsed car file ({:?})", time.elapsed());
···61 let time = time.map(|_| std::time::Instant::now());
6263 for data in data {
64- let mut query = sqlx::QueryBuilder::new("INSERT INTO records(collection, rkey, record) ");
065 query.push_values(
66- data,
67 |mut b: sqlx::query_builder::Separated<'_, '_, Postgres, &'static str>, data| {
68- b.push_bind(data.0.0.clone())
69- .push_bind(data.0.1.clone())
70- .push_bind(data.1.clone());
071 },
72 );
73
···89use std::str::FromStr;
1011+use ipld_core::cid::multibase::Base;
12use jacquard::url::Url;
13use sqlx::{Pool, Postgres, query};
14use thiserror::Error;
···21pub mod load_car;
22pub mod parse_car;
230024#[derive(Error, Debug)]
25pub enum Error {
26 #[error("Error parsing TID: {}", .0)]
···31 Db(#[from] sqlx::Error),
32 #[error("{}", .0)]
33 ParseCar(#[from] crate::backfill::parse_car::Error),
34+ #[error("Error processing cid: {}", .0)]
35+ Cid(#[from] ipld_core::cid::Error),
36}
3738pub async fn backfill(
···53 // only real overhead is network latency which would be ~= anyway
54 let _ = query!("DELETE FROM records").execute(conn).await?;
5556+ let data = parse_car(&car)
57+ .await?
58+ .into_iter()
59+ .map(|(collection, rkey, cid, value)| {
60+ Ok::<_, Error>((
61+ collection,
62+ rkey,
63+ cid.to_string_of_base(Base::Base32Lower)?,
64+ value,
65+ ))
66+ })
67+ .collect::<Result<Vec<_>, _>>()?;
68+ let data = data.chunks(config::DB_MAX_REQ / 4);
6970 if let Some(time) = time {
71 println!("Parsed car file ({:?})", time.elapsed());
···73 let time = time.map(|_| std::time::Instant::now());
7475 for data in data {
76+ let mut query =
77+ sqlx::QueryBuilder::new("INSERT INTO records(collection, rkey, cid, record) ");
78 query.push_values(
79+ data.to_owned(),
80 |mut b: sqlx::query_builder::Separated<'_, '_, Postgres, &'static str>, data| {
81+ b.push_bind(data.0)
82+ .push_bind(data.1)
83+ .push_bind(data.2)
84+ .push_bind(data.3);
85 },
86 );
87
+15-10
src/backfill/parse_car.rs
···20 IpldToJson(#[from] crate::utils::ipld_json::Error),
21 #[error("Could not break {} into a collection and rkey", .0)]
22 MalformedRecordKey(SmolStr),
0023}
2425-pub type AccountData = Vec<((String, String), Value)>;
2627pub async fn parse_car(car: &Car) -> Result<AccountData, Error> {
28- let (keys, records): (Vec<SmolStr>, Vec<CidGeneric<64>>) =
29 car.mst.leaves().await?.into_iter().unzip();
3031 // convert keys into (collection, rkey)
···47 .collect::<Result<Vec<_>, _>>()?;
4849 // convert records into Value
50- let records = &records[..];
51 let records = car
52 .storage
53- .get_many(records)
54 .await?
55 .into_iter()
56 .collect::<Option<Vec<_>>>()
57- .ok_or_else(|| Error::MissingCid)?
58- .into_iter()
59- .map(|x| {
60- let data = serde_ipld_dagcbor::from_slice::<Ipld>(&x)?;
061 let value = ipld_to_json_value(&data)?;
62- Ok::<_, Error>(value)
63 })
64 .collect::<Result<Vec<_>, _>>()?;
6566- let data = zip(keys, records).collect::<Vec<((_, _), _)>>();
006768 Ok(data)
69}
···20 IpldToJson(#[from] crate::utils::ipld_json::Error),
21 #[error("Could not break {} into a collection and rkey", .0)]
22 MalformedRecordKey(SmolStr),
23+ #[error("Could not generate cid for commit: {}", .0)]
24+ Cid(#[from] ipld_core::cid::Error),
25}
2627+pub type AccountData = Vec<(String, String, CidGeneric<64>, Value)>;
2829pub async fn parse_car(car: &Car) -> Result<AccountData, Error> {
30+ let (keys, record_cids): (Vec<SmolStr>, Vec<CidGeneric<64>>) =
31 car.mst.leaves().await?.into_iter().unzip();
3233 // convert keys into (collection, rkey)
···49 .collect::<Result<Vec<_>, _>>()?;
5051 // convert records into Value
52+ let record_cids = &record_cids[..];
53 let records = car
54 .storage
55+ .get_many(record_cids)
56 .await?
57 .into_iter()
58 .collect::<Option<Vec<_>>>()
59+ .ok_or_else(|| Error::MissingCid)?;
60+61+ let records = zip(records, record_cids)
62+ .map(|(bytes, cid)| {
63+ let data = serde_ipld_dagcbor::from_slice::<Ipld>(&bytes)?;
64 let value = ipld_to_json_value(&data)?;
65+ Ok::<(CidGeneric<64>, Value), Error>((cid.to_owned(), value))
66 })
67 .collect::<Result<Vec<_>, _>>()?;
6869+ let data = zip(keys, records)
70+ .map(|((collection, rkey), (cid, record))| (collection, rkey, cid, record))
71+ .collect();
7273 Ok(data)
74}
+2
src/config.rs
···7use std::env;
8use std::sync::LazyLock;
90010// this should be loaded before the program starts any threads
11// if this panics threads that access it will be poisoned
12pub static USER: LazyLock<Did<'static>> = LazyLock::new(|| {
···7use std::env;
8use std::sync::LazyLock;
910+pub const DB_MAX_REQ: usize = 65535;
11+12// this should be loaded before the program starts any threads
13// if this panics threads that access it will be poisoned
14pub static USER: LazyLock<Did<'static>> = LazyLock::new(|| {