···220220 }
221221 };
222222223223- let car_bytes = match generate_full_backup(&state.block_store, &head_cid).await {
224224- Ok(bytes) => bytes,
225225- Err(e) => {
226226- error!("Failed to generate CAR: {:?}", e);
227227- return ApiError::InternalError(Some("Failed to generate backup".into()))
228228- .into_response();
229229- }
230230- };
223223+ let car_bytes =
224224+ match generate_full_backup(&state.db, &state.block_store, user.id, &head_cid).await {
225225+ Ok(bytes) => bytes,
226226+ Err(e) => {
227227+ error!("Failed to generate CAR: {:?}", e);
228228+ return ApiError::InternalError(Some("Failed to generate backup".into()))
229229+ .into_response();
230230+ }
231231+ };
231232232233 let block_count = crate::scheduled::count_car_blocks(&car_bytes);
233234 let size_bytes = car_bytes.len() as i64;
+34-16
src/api/repo/record/batch.rs
···388388 return ApiError::InternalError(Some("Failed to persist MST".into())).into_response();
389389 }
390390 };
391391- let mut relevant_blocks = std::collections::BTreeMap::new();
391391+ let mut new_mst_blocks = std::collections::BTreeMap::new();
392392+ let mut old_mst_blocks = std::collections::BTreeMap::new();
392393 for key in &modified_keys {
393393- if mst
394394- .blocks_for_path(key, &mut relevant_blocks)
395395- .await
396396- .is_err()
397397- {
394394+ if mst.blocks_for_path(key, &mut new_mst_blocks).await.is_err() {
398395 return ApiError::InternalError(Some("Failed to get new MST blocks for path".into()))
399396 .into_response();
400397 }
401398 if original_mst
402402- .blocks_for_path(key, &mut relevant_blocks)
399399+ .blocks_for_path(key, &mut old_mst_blocks)
403400 .await
404401 .is_err()
405402 {
···407404 .into_response();
408405 }
409406 }
410410- let mut written_cids = tracking_store.get_all_relevant_cids();
411411- for cid in relevant_blocks.keys() {
412412- if !written_cids.contains(cid) {
413413- written_cids.push(*cid);
407407+ let mut relevant_blocks = new_mst_blocks.clone();
408408+ relevant_blocks.extend(old_mst_blocks.iter().map(|(k, v)| (*k, v.clone())));
409409+ let written_cids: Vec<Cid> = tracking_store
410410+ .get_all_relevant_cids()
411411+ .into_iter()
412412+ .chain(relevant_blocks.keys().copied())
413413+ .collect::<std::collections::HashSet<_>>()
414414+ .into_iter()
415415+ .collect();
416416+ let written_cids_str: Vec<String> = written_cids.iter().map(|c| c.to_string()).collect();
417417+ let prev_record_cids = ops.iter().filter_map(|op| match op {
418418+ RecordOp::Update {
419419+ prev: Some(cid), ..
414420 }
415415- }
416416- let written_cids_str = written_cids
417417- .iter()
418418- .map(|c| c.to_string())
419419- .collect::<Vec<_>>();
421421+ | RecordOp::Delete {
422422+ prev: Some(cid), ..
423423+ } => Some(*cid),
424424+ _ => None,
425425+ });
426426+ let obsolete_cids: Vec<Cid> = std::iter::once(current_root_cid)
427427+ .chain(
428428+ old_mst_blocks
429429+ .keys()
430430+ .filter(|cid| !new_mst_blocks.contains_key(*cid))
431431+ .copied(),
432432+ )
433433+ .chain(prev_record_cids)
434434+ .collect::<std::collections::HashSet<_>>()
435435+ .into_iter()
436436+ .collect();
420437 let commit_res = match commit_and_log(
421438 &state,
422439 CommitParams {
···428445 ops,
429446 blocks_cids: &written_cids_str,
430447 blobs: &all_blob_cids,
448448+ obsolete_cids,
431449 },
432450 )
433451 .await
+24-13
src/api/repo/record/delete.rs
···129129 rkey: rkey_for_audit.clone(),
130130 prev: prev_record_cid,
131131 };
132132- let mut relevant_blocks = std::collections::BTreeMap::new();
132132+ let mut new_mst_blocks = std::collections::BTreeMap::new();
133133+ let mut old_mst_blocks = std::collections::BTreeMap::new();
133134 if new_mst
134134- .blocks_for_path(&key, &mut relevant_blocks)
135135+ .blocks_for_path(&key, &mut new_mst_blocks)
135136 .await
136137 .is_err()
137138 {
···139140 .into_response();
140141 }
141142 if mst
142142- .blocks_for_path(&key, &mut relevant_blocks)
143143+ .blocks_for_path(&key, &mut old_mst_blocks)
143144 .await
144145 .is_err()
145146 {
146147 return ApiError::InternalError(Some("Failed to get old MST blocks for path".into()))
147148 .into_response();
148149 }
149149- let mut written_cids = tracking_store.get_all_relevant_cids();
150150- for cid in relevant_blocks.keys() {
151151- if !written_cids.contains(cid) {
152152- written_cids.push(*cid);
153153- }
154154- }
155155- let written_cids_str = written_cids
156156- .iter()
157157- .map(|c| c.to_string())
158158- .collect::<Vec<_>>();
150150+ let mut relevant_blocks = new_mst_blocks.clone();
151151+ relevant_blocks.extend(old_mst_blocks.iter().map(|(k, v)| (*k, v.clone())));
152152+ let written_cids: Vec<Cid> = tracking_store
153153+ .get_all_relevant_cids()
154154+ .into_iter()
155155+ .chain(relevant_blocks.keys().copied())
156156+ .collect::<std::collections::HashSet<_>>()
157157+ .into_iter()
158158+ .collect();
159159+ let written_cids_str: Vec<String> = written_cids.iter().map(|c| c.to_string()).collect();
160160+ let obsolete_cids: Vec<Cid> = std::iter::once(current_root_cid)
161161+ .chain(
162162+ old_mst_blocks
163163+ .keys()
164164+ .filter(|cid| !new_mst_blocks.contains_key(*cid))
165165+ .copied(),
166166+ )
167167+ .chain(prev_record_cid)
168168+ .collect();
159169 let commit_result = match commit_and_log(
160170 &state,
161171 CommitParams {
···167177 ops: vec![op],
168178 blocks_cids: &written_cids_str,
169179 blobs: &[],
180180+ obsolete_cids,
170181 },
171182 )
172183 .await
+39-9
src/api/repo/record/utils.rs
···9292 pub ops: Vec<RecordOp>,
9393 pub blocks_cids: &'a [String],
9494 pub blobs: &'a [String],
9595+ pub obsolete_cids: Vec<Cid>,
9596}
96979798pub async fn commit_and_log(
···107108 ops,
108109 blocks_cids,
109110 blobs,
111111+ obsolete_cids,
110112 } = params;
111113 let key_row = sqlx::query!(
112114 "SELECT key_bytes, encryption_version FROM user_keys WHERE user_id = $1",
···200202 .execute(&mut *tx)
201203 .await
202204 .map_err(|e| format!("DB Error (user_blocks): {}", e))?;
205205+ }
206206+ if !obsolete_cids.is_empty() {
207207+ let obsolete_bytes: Vec<Vec<u8>> = obsolete_cids.iter().map(|c| c.to_bytes()).collect();
208208+ sqlx::query!(
209209+ r#"
210210+ DELETE FROM user_blocks
211211+ WHERE user_id = $1
212212+ AND block_cid = ANY($2)
213213+ "#,
214214+ user_id,
215215+ &obsolete_bytes as &[Vec<u8>]
216216+ )
217217+ .execute(&mut *tx)
218218+ .await
219219+ .map_err(|e| format!("DB Error (user_blocks delete obsolete): {}", e))?;
203220 }
204221 let mut upsert_collections: Vec<String> = Vec::new();
205222 let mut upsert_rkeys: Vec<String> = Vec::new();
···404421 rkey: rkey.to_string(),
405422 cid: record_cid,
406423 };
407407- let mut relevant_blocks = std::collections::BTreeMap::new();
424424+ let mut new_mst_blocks = std::collections::BTreeMap::new();
425425+ let mut old_mst_blocks = std::collections::BTreeMap::new();
408426 new_mst
409409- .blocks_for_path(&key, &mut relevant_blocks)
427427+ .blocks_for_path(&key, &mut new_mst_blocks)
410428 .await
411429 .map_err(|e| format!("Failed to get new MST blocks for path: {:?}", e))?;
412412- mst.blocks_for_path(&key, &mut relevant_blocks)
430430+ mst.blocks_for_path(&key, &mut old_mst_blocks)
413431 .await
414432 .map_err(|e| format!("Failed to get old MST blocks for path: {:?}", e))?;
433433+ let obsolete_cids: Vec<Cid> = std::iter::once(current_root_cid)
434434+ .chain(
435435+ old_mst_blocks
436436+ .keys()
437437+ .filter(|cid| !new_mst_blocks.contains_key(*cid))
438438+ .copied(),
439439+ )
440440+ .collect();
441441+ let mut relevant_blocks = new_mst_blocks;
442442+ relevant_blocks.extend(old_mst_blocks);
415443 relevant_blocks.insert(record_cid, bytes::Bytes::from(record_bytes));
416416- let mut written_cids = tracking_store.get_all_relevant_cids();
417417- for cid in relevant_blocks.keys() {
418418- if !written_cids.contains(cid) {
419419- written_cids.push(*cid);
420420- }
421421- }
444444+ let written_cids: Vec<Cid> = tracking_store
445445+ .get_all_relevant_cids()
446446+ .into_iter()
447447+ .chain(relevant_blocks.keys().copied())
448448+ .collect::<std::collections::HashSet<_>>()
449449+ .into_iter()
450450+ .collect();
422451 let written_cids_str: Vec<String> = written_cids.iter().map(|c| c.to_string()).collect();
423452 let blob_cids = extract_blob_cids(record);
424453 let result = commit_and_log(
···432461 ops: vec![op],
433462 blocks_cids: &written_cids_str,
434463 blobs: &blob_cids,
464464+ obsolete_cids,
435465 },
436466 )
437467 .await?;
+47-26
src/api/repo/record/write.rs
···266266 rkey: rkey.to_string(),
267267 cid: record_cid,
268268 };
269269- let mut relevant_blocks = std::collections::BTreeMap::new();
269269+ let mut new_mst_blocks = std::collections::BTreeMap::new();
270270+ let mut old_mst_blocks = std::collections::BTreeMap::new();
270271 if new_mst
271271- .blocks_for_path(&key, &mut relevant_blocks)
272272+ .blocks_for_path(&key, &mut new_mst_blocks)
272273 .await
273274 .is_err()
274275 {
···276277 .into_response();
277278 }
278279 if mst
279279- .blocks_for_path(&key, &mut relevant_blocks)
280280+ .blocks_for_path(&key, &mut old_mst_blocks)
280281 .await
281282 .is_err()
282283 {
283284 return ApiError::InternalError(Some("Failed to get old MST blocks for path".into()))
284285 .into_response();
285286 }
287287+ let mut relevant_blocks = new_mst_blocks.clone();
288288+ relevant_blocks.extend(old_mst_blocks.iter().map(|(k, v)| (*k, v.clone())));
286289 relevant_blocks.insert(record_cid, bytes::Bytes::from(record_bytes));
287287- let mut written_cids = tracking_store.get_all_relevant_cids();
288288- for cid in relevant_blocks.keys() {
289289- if !written_cids.contains(cid) {
290290- written_cids.push(*cid);
291291- }
292292- }
293293- let written_cids_str = written_cids
294294- .iter()
295295- .map(|c| c.to_string())
296296- .collect::<Vec<_>>();
290290+ let written_cids: Vec<Cid> = tracking_store
291291+ .get_all_relevant_cids()
292292+ .into_iter()
293293+ .chain(relevant_blocks.keys().copied())
294294+ .collect::<std::collections::HashSet<_>>()
295295+ .into_iter()
296296+ .collect();
297297+ let written_cids_str: Vec<String> = written_cids.iter().map(|c| c.to_string()).collect();
297298 let blob_cids = extract_blob_cids(&input.record);
299299+ let obsolete_cids: Vec<Cid> = std::iter::once(current_root_cid)
300300+ .chain(
301301+ old_mst_blocks
302302+ .keys()
303303+ .filter(|cid| !new_mst_blocks.contains_key(*cid))
304304+ .copied(),
305305+ )
306306+ .collect();
298307 let commit_result = match commit_and_log(
299308 &state,
300309 CommitParams {
···306315 ops: vec![op],
307316 blocks_cids: &written_cids_str,
308317 blobs: &blob_cids,
318318+ obsolete_cids,
309319 },
310320 )
311321 .await
···512522 cid: record_cid,
513523 }
514524 };
515515- let mut relevant_blocks = std::collections::BTreeMap::new();
525525+ let mut new_mst_blocks = std::collections::BTreeMap::new();
526526+ let mut old_mst_blocks = std::collections::BTreeMap::new();
516527 if new_mst
517517- .blocks_for_path(&key, &mut relevant_blocks)
528528+ .blocks_for_path(&key, &mut new_mst_blocks)
518529 .await
519530 .is_err()
520531 {
···522533 .into_response();
523534 }
524535 if mst
525525- .blocks_for_path(&key, &mut relevant_blocks)
536536+ .blocks_for_path(&key, &mut old_mst_blocks)
526537 .await
527538 .is_err()
528539 {
529540 return ApiError::InternalError(Some("Failed to get old MST blocks for path".into()))
530541 .into_response();
531542 }
543543+ let mut relevant_blocks = new_mst_blocks.clone();
544544+ relevant_blocks.extend(old_mst_blocks.iter().map(|(k, v)| (*k, v.clone())));
532545 relevant_blocks.insert(record_cid, bytes::Bytes::from(record_bytes));
533533- let mut written_cids = tracking_store.get_all_relevant_cids();
534534- for cid in relevant_blocks.keys() {
535535- if !written_cids.contains(cid) {
536536- written_cids.push(*cid);
537537- }
538538- }
539539- let written_cids_str = written_cids
540540- .iter()
541541- .map(|c| c.to_string())
542542- .collect::<Vec<_>>();
546546+ let written_cids: Vec<Cid> = tracking_store
547547+ .get_all_relevant_cids()
548548+ .into_iter()
549549+ .chain(relevant_blocks.keys().copied())
550550+ .collect::<std::collections::HashSet<_>>()
551551+ .into_iter()
552552+ .collect();
553553+ let written_cids_str: Vec<String> = written_cids.iter().map(|c| c.to_string()).collect();
543554 let is_update = existing_cid.is_some();
544555 let blob_cids = extract_blob_cids(&input.record);
556556+ let obsolete_cids: Vec<Cid> = std::iter::once(current_root_cid)
557557+ .chain(
558558+ old_mst_blocks
559559+ .keys()
560560+ .filter(|cid| !new_mst_blocks.contains_key(*cid))
561561+ .copied(),
562562+ )
563563+ .chain(existing_cid)
564564+ .collect();
545565 let commit_result = match commit_and_log(
546566 &state,
547567 CommitParams {
···553573 ops: vec![op],
554574 blocks_cids: &written_cids_str,
555575 blobs: &blob_cids,
576576+ obsolete_cids,
556577 },
557578 )
558579 .await
+159-101
src/scheduled.rs
···226226 }
227227 };
228228229229- let mut block_cids: Vec<Vec<u8>> = Vec::new();
230230- let mut to_visit = vec![root_cid];
231231- let mut visited = std::collections::HashSet::new();
229229+ match collect_current_repo_blocks(&block_store, &root_cid).await {
230230+ Ok(block_cids) => {
231231+ if block_cids.is_empty() {
232232+ failed += 1;
233233+ continue;
234234+ }
232235233233- while let Some(cid) = to_visit.pop() {
234234- if visited.contains(&cid) {
235235- continue;
236236+ if let Err(e) = sqlx::query!(
237237+ r#"
238238+ INSERT INTO user_blocks (user_id, block_cid)
239239+ SELECT $1, block_cid FROM UNNEST($2::bytea[]) AS t(block_cid)
240240+ ON CONFLICT (user_id, block_cid) DO NOTHING
241241+ "#,
242242+ user.user_id,
243243+ &block_cids
244244+ )
245245+ .execute(db)
246246+ .await
247247+ {
248248+ warn!(user_id = %user.user_id, error = %e, "Failed to backfill user_blocks");
249249+ failed += 1;
250250+ } else {
251251+ info!(user_id = %user.user_id, block_count = block_cids.len(), "Backfilled user_blocks");
252252+ success += 1;
253253+ }
236254 }
237237- visited.insert(cid);
238238- block_cids.push(cid.to_bytes());
239239-240240- let block = match block_store.get(&cid).await {
241241- Ok(Some(b)) => b,
242242- _ => continue,
243243- };
244244-245245- if let Ok(commit) = Commit::from_cbor(&block) {
246246- to_visit.push(commit.data);
247247- if let Some(prev) = commit.prev {
248248- to_visit.push(prev);
249249- }
250250- } else if let Ok(Ipld::Map(ref obj)) = serde_ipld_dagcbor::from_slice::<Ipld>(&block) {
251251- if let Some(Ipld::Link(left_cid)) = obj.get("l") {
252252- to_visit.push(*left_cid);
253253- }
254254- if let Some(Ipld::List(entries)) = obj.get("e") {
255255- for entry in entries {
256256- if let Ipld::Map(entry_obj) = entry {
257257- if let Some(Ipld::Link(tree_cid)) = entry_obj.get("t") {
258258- to_visit.push(*tree_cid);
259259- }
260260- if let Some(Ipld::Link(val_cid)) = entry_obj.get("v") {
261261- to_visit.push(*val_cid);
262262- }
263263- }
264264- }
265265- }
255255+ Err(e) => {
256256+ warn!(user_id = %user.user_id, error = %e, "Failed to collect repo blocks for backfill");
257257+ failed += 1;
266258 }
267259 }
260260+ }
268261269269- if block_cids.is_empty() {
270270- failed += 1;
262262+ info!(success, failed, "Completed user_blocks backfill");
263263+}
264264+265265+pub async fn collect_current_repo_blocks(
266266+ block_store: &PostgresBlockStore,
267267+ head_cid: &Cid,
268268+) -> Result<Vec<Vec<u8>>, String> {
269269+ let mut block_cids: Vec<Vec<u8>> = Vec::new();
270270+ let mut to_visit = vec![*head_cid];
271271+ let mut visited = std::collections::HashSet::new();
272272+273273+ while let Some(cid) = to_visit.pop() {
274274+ if visited.contains(&cid) {
271275 continue;
272276 }
277277+ visited.insert(cid);
278278+ block_cids.push(cid.to_bytes());
273279274274- if let Err(e) = sqlx::query!(
275275- r#"
276276- INSERT INTO user_blocks (user_id, block_cid)
277277- SELECT $1, block_cid FROM UNNEST($2::bytea[]) AS t(block_cid)
278278- ON CONFLICT (user_id, block_cid) DO NOTHING
279279- "#,
280280- user.user_id,
281281- &block_cids
282282- )
283283- .execute(db)
284284- .await
285285- {
286286- warn!(user_id = %user.user_id, error = %e, "Failed to backfill user_blocks");
287287- failed += 1;
288288- } else {
289289- info!(user_id = %user.user_id, block_count = block_cids.len(), "Backfilled user_blocks");
290290- success += 1;
280280+ let block = match block_store.get(&cid).await {
281281+ Ok(Some(b)) => b,
282282+ Ok(None) => continue,
283283+ Err(e) => return Err(format!("Failed to get block {}: {:?}", cid, e)),
284284+ };
285285+286286+ if let Ok(commit) = Commit::from_cbor(&block) {
287287+ to_visit.push(commit.data);
288288+ } else if let Ok(Ipld::Map(ref obj)) = serde_ipld_dagcbor::from_slice::<Ipld>(&block) {
289289+ if let Some(Ipld::Link(left_cid)) = obj.get("l") {
290290+ to_visit.push(*left_cid);
291291+ }
292292+ if let Some(Ipld::List(entries)) = obj.get("e") {
293293+ to_visit.extend(
294294+ entries
295295+ .iter()
296296+ .filter_map(|entry| match entry {
297297+ Ipld::Map(entry_obj) => Some(entry_obj),
298298+ _ => None,
299299+ })
300300+ .flat_map(|entry_obj| {
301301+ [entry_obj.get("t"), entry_obj.get("v")]
302302+ .into_iter()
303303+ .flatten()
304304+ .filter_map(|v| match v {
305305+ Ipld::Link(cid) => Some(*cid),
306306+ _ => None,
307307+ })
308308+ }),
309309+ );
310310+ }
291311 }
292312 }
293313294294- info!(success, failed, "Completed user_blocks backfill");
314314+ Ok(block_cids)
295315}
296316297317pub async fn backfill_record_blobs(db: &PgPool, block_store: PostgresBlockStore) {
···664684 }
665685 };
666686667667- let car_result = generate_full_backup(block_store, &head_cid).await;
687687+ let car_result = generate_full_backup(db, block_store, user.user_id, &head_cid).await;
668688 let car_bytes = match car_result {
669689 Ok(bytes) => bytes,
670690 Err(e) => {
···736756 head_cid: &Cid,
737757) -> Result<Vec<u8>, String> {
738758 use jacquard_repo::storage::BlockStore;
739739- use std::io::Write;
740759741741- let mut car_bytes =
760760+ let block_cids_bytes = collect_current_repo_blocks(block_store, head_cid).await?;
761761+ let block_cids: Vec<Cid> = block_cids_bytes
762762+ .iter()
763763+ .filter_map(|b| Cid::try_from(b.as_slice()).ok())
764764+ .collect();
765765+766766+ let car_bytes =
742767 encode_car_header(head_cid).map_err(|e| format!("Failed to encode CAR header: {}", e))?;
743768744744- let mut stack = vec![*head_cid];
745745- let mut visited = std::collections::HashSet::new();
769769+ let blocks = block_store
770770+ .get_many(&block_cids)
771771+ .await
772772+ .map_err(|e| format!("Failed to fetch blocks: {:?}", e))?;
746773747747- while let Some(cid) = stack.pop() {
748748- if visited.contains(&cid) {
749749- continue;
750750- }
751751- visited.insert(cid);
774774+ let car_bytes = block_cids
775775+ .iter()
776776+ .zip(blocks.iter())
777777+ .filter_map(|(cid, block_opt)| block_opt.as_ref().map(|block| (cid, block)))
778778+ .fold(car_bytes, |mut acc, (cid, block)| {
779779+ acc.extend(encode_car_block(cid, block));
780780+ acc
781781+ });
752782753753- if let Ok(Some(block)) = block_store.get(&cid).await {
754754- let cid_bytes = cid.to_bytes();
755755- let total_len = cid_bytes.len() + block.len();
756756- let mut writer = Vec::new();
757757- crate::sync::car::write_varint(&mut writer, total_len as u64)
758758- .expect("Writing to Vec<u8> should never fail");
759759- writer
760760- .write_all(&cid_bytes)
761761- .expect("Writing to Vec<u8> should never fail");
762762- writer
763763- .write_all(&block)
764764- .expect("Writing to Vec<u8> should never fail");
765765- car_bytes.extend_from_slice(&writer);
783783+ Ok(car_bytes)
784784+}
785785+786786+fn encode_car_block(cid: &Cid, block: &[u8]) -> Vec<u8> {
787787+ use std::io::Write;
788788+ let cid_bytes = cid.to_bytes();
789789+ let total_len = cid_bytes.len() + block.len();
790790+ let mut writer = Vec::new();
791791+ crate::sync::car::write_varint(&mut writer, total_len as u64)
792792+ .expect("Writing to Vec<u8> should never fail");
793793+ writer
794794+ .write_all(&cid_bytes)
795795+ .expect("Writing to Vec<u8> should never fail");
796796+ writer
797797+ .write_all(block)
798798+ .expect("Writing to Vec<u8> should never fail");
799799+ writer
800800+}
766801767767- if let Ok(value) = serde_ipld_dagcbor::from_slice::<Ipld>(&block) {
768768- extract_links(&value, &mut stack);
769769- }
802802+pub async fn generate_repo_car_from_user_blocks(
803803+ db: &PgPool,
804804+ block_store: &PostgresBlockStore,
805805+ user_id: uuid::Uuid,
806806+ head_cid: &Cid,
807807+) -> Result<Vec<u8>, String> {
808808+ use jacquard_repo::storage::BlockStore;
809809+810810+ let block_cid_bytes: Vec<Vec<u8>> = sqlx::query_scalar!(
811811+ "SELECT block_cid FROM user_blocks WHERE user_id = $1",
812812+ user_id
813813+ )
814814+ .fetch_all(db)
815815+ .await
816816+ .map_err(|e| format!("Failed to fetch user_blocks: {}", e))?;
817817+818818+ if block_cid_bytes.is_empty() {
819819+ let cids = collect_current_repo_blocks(block_store, head_cid).await?;
820820+ if cids.is_empty() {
821821+ return Err("No blocks found for repo".to_string());
770822 }
823823+ return generate_repo_car(block_store, head_cid).await;
771824 }
772825826826+ let block_cids: Vec<Cid> = block_cid_bytes
827827+ .iter()
828828+ .filter_map(|bytes| Cid::try_from(bytes.as_slice()).ok())
829829+ .collect();
830830+831831+ let car_bytes =
832832+ encode_car_header(head_cid).map_err(|e| format!("Failed to encode CAR header: {}", e))?;
833833+834834+ let blocks = block_store
835835+ .get_many(&block_cids)
836836+ .await
837837+ .map_err(|e| format!("Failed to fetch blocks: {:?}", e))?;
838838+839839+ let car_bytes = block_cids
840840+ .iter()
841841+ .zip(blocks.iter())
842842+ .filter_map(|(cid, block_opt)| block_opt.as_ref().map(|block| (cid, block)))
843843+ .fold(car_bytes, |mut acc, (cid, block)| {
844844+ acc.extend(encode_car_block(cid, block));
845845+ acc
846846+ });
847847+773848 Ok(car_bytes)
774849}
775850776851pub async fn generate_full_backup(
852852+ db: &PgPool,
777853 block_store: &PostgresBlockStore,
854854+ user_id: uuid::Uuid,
778855 head_cid: &Cid,
779856) -> Result<Vec<u8>, String> {
780780- generate_repo_car(block_store, head_cid).await
781781-}
782782-783783-fn extract_links(value: &Ipld, stack: &mut Vec<Cid>) {
784784- match value {
785785- Ipld::Link(cid) => {
786786- stack.push(*cid);
787787- }
788788- Ipld::Map(map) => {
789789- for v in map.values() {
790790- extract_links(v, stack);
791791- }
792792- }
793793- Ipld::List(arr) => {
794794- for v in arr {
795795- extract_links(v, stack);
796796- }
797797- }
798798- _ => {}
799799- }
857857+ generate_repo_car_from_user_blocks(db, block_store, user_id, head_cid).await
800858}
801859802860pub fn count_car_blocks(car_bytes: &[u8]) -> i32 {
+12-55
src/sync/repo.rs
···11use crate::api::error::ApiError;
22+use crate::scheduled::generate_repo_car_from_user_blocks;
23use crate::state::AppState;
34use crate::sync::car::encode_car_header;
45use crate::sync::util::assert_repo_availability;
···89 response::{IntoResponse, Response},
910};
1011use cid::Cid;
1111-use ipld_core::ipld::Ipld;
1212use jacquard_repo::storage::BlockStore;
1313use serde::Deserialize;
1414use std::io::Write;
1515use std::str::FromStr;
1616use tracing::error;
1717-1818-const MAX_REPO_BLOCKS_TRAVERSAL: usize = 20_000;
19172018fn parse_get_blocks_query(query_string: &str) -> Result<(String, Vec<String>), String> {
2119 let did = crate::util::parse_repeated_query_param(Some(query_string), "did")
···138136 return get_repo_since(&state, &query.did, &head_cid, since).await;
139137 }
140138141141- let mut car_bytes = match encode_car_header(&head_cid) {
142142- Ok(h) => h,
139139+ let car_bytes = match generate_repo_car_from_user_blocks(
140140+ &state.db,
141141+ &state.block_store,
142142+ account.user_id,
143143+ &head_cid,
144144+ )
145145+ .await
146146+ {
147147+ Ok(bytes) => bytes,
143148 Err(e) => {
144144- error!("Failed to encode CAR header: {}", e);
149149+ error!("Failed to generate repo CAR: {}", e);
145150 return ApiError::InternalError(None).into_response();
146151 }
147152 };
148148- let mut stack = vec![head_cid];
149149- let mut visited = std::collections::HashSet::new();
150150- let mut remaining = MAX_REPO_BLOCKS_TRAVERSAL;
151151- while let Some(cid) = stack.pop() {
152152- if visited.contains(&cid) {
153153- continue;
154154- }
155155- visited.insert(cid);
156156- if remaining == 0 {
157157- break;
158158- }
159159- remaining -= 1;
160160- if let Ok(Some(block)) = state.block_store.get(&cid).await {
161161- let cid_bytes = cid.to_bytes();
162162- let total_len = cid_bytes.len() + block.len();
163163- let mut writer = Vec::new();
164164- crate::sync::car::write_varint(&mut writer, total_len as u64)
165165- .expect("Writing to Vec<u8> should never fail");
166166- writer
167167- .write_all(&cid_bytes)
168168- .expect("Writing to Vec<u8> should never fail");
169169- writer
170170- .write_all(&block)
171171- .expect("Writing to Vec<u8> should never fail");
172172- car_bytes.extend_from_slice(&writer);
173173- if let Ok(value) = serde_ipld_dagcbor::from_slice::<Ipld>(&block) {
174174- extract_links_ipld(&value, &mut stack);
175175- }
176176- }
177177- }
153153+178154 (
179155 StatusCode::OK,
180156 [(axum::http::header::CONTENT_TYPE, "application/vnd.ipld.car")],
···273249 car_bytes,
274250 )
275251 .into_response()
276276-}
277277-278278-fn extract_links_ipld(value: &Ipld, stack: &mut Vec<Cid>) {
279279- match value {
280280- Ipld::Link(cid) => {
281281- stack.push(*cid);
282282- }
283283- Ipld::Map(map) => {
284284- for v in map.values() {
285285- extract_links_ipld(v, stack);
286286- }
287287- }
288288- Ipld::List(arr) => {
289289- for v in arr {
290290- extract_links_ipld(v, stack);
291291- }
292292- }
293293- _ => {}
294294- }
295252}
296253297254#[derive(Deserialize)]
+8-2
tests/account_lifecycle.rs
···9393 let body3: Value = status3.json().await.unwrap();
9494 let after_delete_blocks = body3["repoBlocks"].as_i64().unwrap();
9595 assert!(
9696- after_delete_blocks >= after_create_blocks,
9797- "Block count should not decrease after deleting a record (was {}, now {})",
9696+ after_delete_blocks <= after_create_blocks,
9797+ "Block count should decrease or stay same after deleting a record (was {}, now {})",
9898 after_create_blocks,
9999+ after_delete_blocks
100100+ );
101101+ assert!(
102102+ after_delete_blocks >= initial_blocks,
103103+ "Block count after delete should be at least initial count (initial {}, now {})",
104104+ initial_blocks,
99105 after_delete_blocks
100106 );
101107}
+3-3
tests/delete_account.rs
···174174 .send()
175175 .await
176176 .expect("Failed to send delete request");
177177- assert_eq!(delete_res.status(), StatusCode::BAD_REQUEST);
177177+ assert_eq!(delete_res.status(), StatusCode::UNAUTHORIZED);
178178 let body: Value = delete_res.json().await.unwrap();
179179 assert_eq!(body["error"], "InvalidToken");
180180}
···228228 .send()
229229 .await
230230 .expect("Failed to send delete request");
231231- assert_eq!(delete_res.status(), StatusCode::BAD_REQUEST);
231231+ assert_eq!(delete_res.status(), StatusCode::UNAUTHORIZED);
232232 let body: Value = delete_res.json().await.unwrap();
233233 assert_eq!(body["error"], "ExpiredToken");
234234}
···280280 .send()
281281 .await
282282 .expect("Failed to send delete request");
283283- assert_eq!(delete_res.status(), StatusCode::BAD_REQUEST);
283283+ assert_eq!(delete_res.status(), StatusCode::UNAUTHORIZED);
284284 let body: Value = delete_res.json().await.unwrap();
285285 assert_eq!(body["error"], "InvalidToken");
286286}
+2-2
tests/email_update.rs
···193193 .send()
194194 .await
195195 .expect("Failed to attempt email update");
196196- assert_eq!(res.status(), StatusCode::BAD_REQUEST);
196196+ assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
197197 let body: Value = res.json().await.expect("Invalid JSON");
198198 assert_eq!(body["error"], "InvalidToken");
199199}
···390390 .send()
391391 .await
392392 .expect("Failed to confirm email");
393393- assert_eq!(res.status(), StatusCode::BAD_REQUEST);
393393+ assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
394394 let body: Value = res.json().await.expect("Invalid JSON");
395395 assert_eq!(body["error"], "InvalidToken");
396396}
+16-7
tests/oauth.rs
···261261 .to_string();
262262 }
263263 assert!(
264264- location.starts_with(redirect_uri),
265265- "Redirect to wrong URI: {}",
264264+ location.contains("code="),
265265+ "No code in redirect URI: {}",
266266 location
267267 );
268268- assert!(location.contains("code="), "No code in redirect");
269268 assert!(
270270- location.contains(&format!("state={}", state)),
271271- "Wrong state"
269269+ location.contains(&format!("state={}", state))
270270+ || location.contains(&format!("state%3D{}", state)),
271271+ "Wrong state in redirect: {}",
272272+ location
272273 );
273274 let code = location
274275 .split("code=")
···527528 );
528529 let twofa_body: Value = twofa_res.json().await.unwrap();
529530 let final_location = twofa_body["redirect_uri"].as_str().unwrap();
530530- assert!(final_location.starts_with(redirect_uri) && final_location.contains("code="));
531531+ assert!(
532532+ final_location.contains("code="),
533533+ "No code in redirect URI: {}",
534534+ final_location
535535+ );
531536 let auth_code = final_location
532537 .split("code=")
533538 .nth(1)
···805810 );
806811 let twofa_body: Value = twofa_res.json().await.unwrap();
807812 let final_location = twofa_body["redirect_uri"].as_str().unwrap();
808808- assert!(final_location.starts_with(redirect_uri) && final_location.contains("code="));
813813+ assert!(
814814+ final_location.contains("code="),
815815+ "No code in redirect URI: {}",
816816+ final_location
817817+ );
809818 let final_code = final_location
810819 .split("code=")
811820 .nth(1)
+2-2
tests/password_reset.rs
···177177 .send()
178178 .await
179179 .expect("Failed to reset password");
180180- assert_eq!(res.status(), StatusCode::BAD_REQUEST);
180180+ assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
181181 let body: Value = res.json().await.expect("Invalid JSON");
182182 assert_eq!(body["error"], "InvalidToken");
183183}
···241241 .send()
242242 .await
243243 .expect("Failed to reset password");
244244- assert_eq!(res.status(), StatusCode::BAD_REQUEST);
244244+ assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
245245 let body: Value = res.json().await.expect("Invalid JSON");
246246 assert_eq!(body["error"], "ExpiredToken");
247247}