add network.slices.slice.getSyncSummary xrpc and corresponding ui, shows a summary of the sync before syncing, limits to 5000 repos but can change via env var
···1919# System slice URI
2020SYSTEM_SLICE_URI=at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z
21212222+# Default maximum repositories per sync operation
2323+DEFAULT_MAX_SYNC_REPOS=5000
2424+2225# Logging level
2326RUST_LOG=debug
2427
···667667 /// Fetch all repositories that have records in a given collection.
668668 ///
669669 /// Uses cursor-based pagination to fetch all repos from the relay.
670670- async fn get_repos_for_collection(
670670+ pub async fn get_repos_for_collection(
671671 &self,
672672 collection: &str,
673673 slice_uri: &str,
···11+use crate::{AppState, auth, errors::AppError, sync::SyncService};
22+use axum::{extract::{Query, State}, http::HeaderMap, response::Json};
33+use serde::{Deserialize, Serialize};
44+use std::collections::HashMap;
55+66+#[derive(Debug, Deserialize)]
77+#[serde(rename_all = "camelCase")]
88+pub struct Params {
99+ pub slice: String,
1010+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
1111+ pub collections: Option<Vec<String>>,
1212+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
1313+ pub external_collections: Option<Vec<String>>,
1414+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
1515+ pub repos: Option<Vec<String>>,
1616+}
1717+1818+fn deserialize_string_or_vec<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
1919+where
2020+ D: serde::Deserializer<'de>,
2121+{
2222+ use serde::de::{self, Visitor};
2323+ use std::fmt;
2424+2525+ struct StringOrVec;
2626+2727+ impl<'de> Visitor<'de> for StringOrVec {
2828+ type Value = Option<Vec<String>>;
2929+3030+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
3131+ formatter.write_str("string or list of strings")
3232+ }
3333+3434+ fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
3535+ where
3636+ E: de::Error,
3737+ {
3838+ Ok(Some(vec![value.to_string()]))
3939+ }
4040+4141+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
4242+ where
4343+ A: de::SeqAccess<'de>,
4444+ {
4545+ let mut vec = Vec::new();
4646+ while let Some(item) = seq.next_element::<String>()? {
4747+ vec.push(item);
4848+ }
4949+ Ok(if vec.is_empty() { None } else { Some(vec) })
5050+ }
5151+5252+ fn visit_none<E>(self) -> Result<Self::Value, E>
5353+ where
5454+ E: de::Error,
5555+ {
5656+ Ok(None)
5757+ }
5858+5959+ fn visit_unit<E>(self) -> Result<Self::Value, E>
6060+ where
6161+ E: de::Error,
6262+ {
6363+ Ok(None)
6464+ }
6565+ }
6666+6767+ deserializer.deserialize_any(StringOrVec)
6868+}
6969+7070+#[derive(Debug, Serialize)]
7171+#[serde(rename_all = "camelCase")]
7272+pub struct Output {
7373+ pub total_repos: i64,
7474+ pub capped_repos: i64,
7575+ pub collections_summary: Vec<CollectionSummary>,
7676+ pub would_be_capped: bool,
7777+ pub applied_limit: i32,
7878+}
7979+8080+#[derive(Debug, Serialize)]
8181+#[serde(rename_all = "camelCase")]
8282+pub struct CollectionSummary {
8383+ pub collection: String,
8484+ pub estimated_repos: i64,
8585+ pub is_external: bool,
8686+}
8787+8888+pub async fn handler(
8989+ State(state): State<AppState>,
9090+ headers: HeaderMap,
9191+ Query(params): Query<Params>,
9292+) -> Result<Json<Output>, AppError> {
9393+ tracing::info!("getSyncSummary called with params: {:?}", params);
9494+9595+ let token = auth::extract_bearer_token(&headers)?;
9696+ let _user_info = auth::verify_oauth_token_cached(
9797+ &token,
9898+ &state.config.auth_base_url,
9999+ Some(state.auth_cache.clone()),
100100+ )
101101+ .await?;
102102+103103+ let slice_uri = ¶ms.slice;
104104+ let primary_collections = params.collections.unwrap_or_default();
105105+ let external_collections = params.external_collections.unwrap_or_default();
106106+ let user_provided_repos = params.repos;
107107+108108+ // Use the system default limit
109109+ let applied_limit = state.config.default_max_sync_repos;
110110+111111+ // Get slice domain for categorizing collections
112112+ let slice_domain = state
113113+ .database
114114+ .get_slice_domain(slice_uri)
115115+ .await
116116+ .map_err(|e| AppError::Internal(format!("Failed to get slice domain: {}", e)))?
117117+ .ok_or_else(|| AppError::NotFound(format!("Slice not found: {}", slice_uri)))?;
118118+119119+ // Create sync service for repo discovery
120120+ let sync_service = SyncService::with_cache(
121121+ state.database.clone(),
122122+ state.config.relay_endpoint.clone(),
123123+ state.auth_cache.clone(),
124124+ );
125125+126126+ // Discover repos if not provided
127127+ let all_repos = if let Some(provided_repos) = user_provided_repos {
128128+ provided_repos
129129+ } else {
130130+ // Discover repos from collections
131131+ let mut discovered_repos = std::collections::HashSet::new();
132132+133133+ // Get repos from primary collections
134134+ for collection in &primary_collections {
135135+ match sync_service.get_repos_for_collection(collection, slice_uri).await {
136136+ Ok(repos) => {
137137+ discovered_repos.extend(repos);
138138+ }
139139+ Err(e) => {
140140+ tracing::warn!("Failed to get repos for collection {}: {}", collection, e);
141141+ }
142142+ }
143143+ }
144144+145145+ // Get repos from external collections
146146+ for collection in &external_collections {
147147+ match sync_service.get_repos_for_collection(collection, slice_uri).await {
148148+ Ok(repos) => {
149149+ discovered_repos.extend(repos);
150150+ }
151151+ Err(e) => {
152152+ tracing::warn!("Failed to get repos for collection {}: {}", collection, e);
153153+ }
154154+ }
155155+ }
156156+157157+ discovered_repos.into_iter().collect()
158158+ };
159159+160160+ let total_repos = all_repos.len() as i64;
161161+ let capped_repos = std::cmp::min(total_repos, applied_limit as i64);
162162+ let would_be_capped = total_repos > applied_limit as i64;
163163+164164+ // Build collections summary
165165+ let mut collections_summary = Vec::new();
166166+ let mut collection_repo_counts: HashMap<String, i64> = HashMap::new();
167167+168168+ // Count repos per collection (this is an approximation)
169169+ for collection in &primary_collections {
170170+ let is_external = !collection.starts_with(&slice_domain);
171171+ let estimated_repos = if let Ok(repos) = sync_service.get_repos_for_collection(collection, slice_uri).await {
172172+ repos.len() as i64
173173+ } else {
174174+ 0
175175+ };
176176+177177+ collection_repo_counts.insert(collection.clone(), estimated_repos);
178178+ collections_summary.push(CollectionSummary {
179179+ collection: collection.clone(),
180180+ estimated_repos,
181181+ is_external,
182182+ });
183183+ }
184184+185185+ for collection in &external_collections {
186186+ let is_external = !collection.starts_with(&slice_domain);
187187+ let estimated_repos = if let Ok(repos) = sync_service.get_repos_for_collection(collection, slice_uri).await {
188188+ repos.len() as i64
189189+ } else {
190190+ 0
191191+ };
192192+193193+ collection_repo_counts.insert(collection.clone(), estimated_repos);
194194+ collections_summary.push(CollectionSummary {
195195+ collection: collection.clone(),
196196+ estimated_repos,
197197+ is_external,
198198+ });
199199+ }
200200+201201+ Ok(Json(Output {
202202+ total_repos,
203203+ capped_repos,
204204+ collections_summary,
205205+ would_be_capped,
206206+ applied_limit,
207207+ }))
208208+}
+1
api/src/xrpc/network/slices/slice/mod.rs
···1010pub mod get_oauth_clients;
1111pub mod get_slice_records;
1212pub mod get_sparklines;
1313+pub mod get_sync_summary;
1314pub mod openapi;
1415pub mod start_sync;
1516pub mod stats;
···118118 className="leading-relaxed"
119119 >
120120 When enabled, records for this lexicon will not be
121121- synced from the AT Protocol firehose or during bulk sync
121121+ synced from the AT Protocol firehose or during sync
122122 operations.
123123 </Text>
124124 </div>