add network.slices.slice.getSyncSummary xrpc and corresponding ui, shows a summary of the sync before syncing, limits to 5000 repos but can change via env var
···19# System slice URI
20SYSTEM_SLICE_URI=at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z
2100022# Logging level
23RUST_LOG=debug
24
···19# System slice URI
20SYSTEM_SLICE_URI=at://did:plc:bcgltzqazw5tb6k2g3ttenbj/network.slices.slice/3lymhd4jhrd2z
2122+# Default maximum repositories per sync operation
23+DEFAULT_MAX_SYNC_REPOS=5000
24+25# Logging level
26RUST_LOG=debug
27
···667 /// Fetch all repositories that have records in a given collection.
668 ///
669 /// Uses cursor-based pagination to fetch all repos from the relay.
670- async fn get_repos_for_collection(
671 &self,
672 collection: &str,
673 slice_uri: &str,
···667 /// Fetch all repositories that have records in a given collection.
668 ///
669 /// Uses cursor-based pagination to fetch all repos from the relay.
670+ pub async fn get_repos_for_collection(
671 &self,
672 collection: &str,
673 slice_uri: &str,
···1+use crate::{AppState, auth, errors::AppError, sync::SyncService};
2+use axum::{extract::{Query, State}, http::HeaderMap, response::Json};
3+use serde::{Deserialize, Serialize};
4+use std::collections::HashMap;
5+6+#[derive(Debug, Deserialize)]
7+#[serde(rename_all = "camelCase")]
8+pub struct Params {
9+ pub slice: String,
10+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
11+ pub collections: Option<Vec<String>>,
12+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
13+ pub external_collections: Option<Vec<String>>,
14+ #[serde(default, deserialize_with = "deserialize_string_or_vec")]
15+ pub repos: Option<Vec<String>>,
16+}
17+18+fn deserialize_string_or_vec<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
19+where
20+ D: serde::Deserializer<'de>,
21+{
22+ use serde::de::{self, Visitor};
23+ use std::fmt;
24+25+ struct StringOrVec;
26+27+ impl<'de> Visitor<'de> for StringOrVec {
28+ type Value = Option<Vec<String>>;
29+30+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
31+ formatter.write_str("string or list of strings")
32+ }
33+34+ fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
35+ where
36+ E: de::Error,
37+ {
38+ Ok(Some(vec![value.to_string()]))
39+ }
40+41+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
42+ where
43+ A: de::SeqAccess<'de>,
44+ {
45+ let mut vec = Vec::new();
46+ while let Some(item) = seq.next_element::<String>()? {
47+ vec.push(item);
48+ }
49+ Ok(if vec.is_empty() { None } else { Some(vec) })
50+ }
51+52+ fn visit_none<E>(self) -> Result<Self::Value, E>
53+ where
54+ E: de::Error,
55+ {
56+ Ok(None)
57+ }
58+59+ fn visit_unit<E>(self) -> Result<Self::Value, E>
60+ where
61+ E: de::Error,
62+ {
63+ Ok(None)
64+ }
65+ }
66+67+ deserializer.deserialize_any(StringOrVec)
68+}
69+70+#[derive(Debug, Serialize)]
71+#[serde(rename_all = "camelCase")]
72+pub struct Output {
73+ pub total_repos: i64,
74+ pub capped_repos: i64,
75+ pub collections_summary: Vec<CollectionSummary>,
76+ pub would_be_capped: bool,
77+ pub applied_limit: i32,
78+}
79+80+#[derive(Debug, Serialize)]
81+#[serde(rename_all = "camelCase")]
82+pub struct CollectionSummary {
83+ pub collection: String,
84+ pub estimated_repos: i64,
85+ pub is_external: bool,
86+}
87+88+pub async fn handler(
89+ State(state): State<AppState>,
90+ headers: HeaderMap,
91+ Query(params): Query<Params>,
92+) -> Result<Json<Output>, AppError> {
93+ tracing::info!("getSyncSummary called with params: {:?}", params);
94+95+ let token = auth::extract_bearer_token(&headers)?;
96+ let _user_info = auth::verify_oauth_token_cached(
97+ &token,
98+ &state.config.auth_base_url,
99+ Some(state.auth_cache.clone()),
100+ )
101+ .await?;
102+103+ let slice_uri = ¶ms.slice;
104+ let primary_collections = params.collections.unwrap_or_default();
105+ let external_collections = params.external_collections.unwrap_or_default();
106+ let user_provided_repos = params.repos;
107+108+ // Use the system default limit
109+ let applied_limit = state.config.default_max_sync_repos;
110+111+ // Get slice domain for categorizing collections
112+ let slice_domain = state
113+ .database
114+ .get_slice_domain(slice_uri)
115+ .await
116+ .map_err(|e| AppError::Internal(format!("Failed to get slice domain: {}", e)))?
117+ .ok_or_else(|| AppError::NotFound(format!("Slice not found: {}", slice_uri)))?;
118+119+ // Create sync service for repo discovery
120+ let sync_service = SyncService::with_cache(
121+ state.database.clone(),
122+ state.config.relay_endpoint.clone(),
123+ state.auth_cache.clone(),
124+ );
125+126+ // Discover repos if not provided
127+ let all_repos = if let Some(provided_repos) = user_provided_repos {
128+ provided_repos
129+ } else {
130+ // Discover repos from collections
131+ let mut discovered_repos = std::collections::HashSet::new();
132+133+ // Get repos from primary collections
134+ for collection in &primary_collections {
135+ match sync_service.get_repos_for_collection(collection, slice_uri).await {
136+ Ok(repos) => {
137+ discovered_repos.extend(repos);
138+ }
139+ Err(e) => {
140+ tracing::warn!("Failed to get repos for collection {}: {}", collection, e);
141+ }
142+ }
143+ }
144+145+ // Get repos from external collections
146+ for collection in &external_collections {
147+ match sync_service.get_repos_for_collection(collection, slice_uri).await {
148+ Ok(repos) => {
149+ discovered_repos.extend(repos);
150+ }
151+ Err(e) => {
152+ tracing::warn!("Failed to get repos for collection {}: {}", collection, e);
153+ }
154+ }
155+ }
156+157+ discovered_repos.into_iter().collect()
158+ };
159+160+ let total_repos = all_repos.len() as i64;
161+ let capped_repos = std::cmp::min(total_repos, applied_limit as i64);
162+ let would_be_capped = total_repos > applied_limit as i64;
163+164+ // Build collections summary
165+ let mut collections_summary = Vec::new();
166+ let mut collection_repo_counts: HashMap<String, i64> = HashMap::new();
167+168+ // Count repos per collection (this is an approximation)
169+ for collection in &primary_collections {
170+ let is_external = !collection.starts_with(&slice_domain);
171+ let estimated_repos = if let Ok(repos) = sync_service.get_repos_for_collection(collection, slice_uri).await {
172+ repos.len() as i64
173+ } else {
174+ 0
175+ };
176+177+ collection_repo_counts.insert(collection.clone(), estimated_repos);
178+ collections_summary.push(CollectionSummary {
179+ collection: collection.clone(),
180+ estimated_repos,
181+ is_external,
182+ });
183+ }
184+185+ for collection in &external_collections {
186+ let is_external = !collection.starts_with(&slice_domain);
187+ let estimated_repos = if let Ok(repos) = sync_service.get_repos_for_collection(collection, slice_uri).await {
188+ repos.len() as i64
189+ } else {
190+ 0
191+ };
192+193+ collection_repo_counts.insert(collection.clone(), estimated_repos);
194+ collections_summary.push(CollectionSummary {
195+ collection: collection.clone(),
196+ estimated_repos,
197+ is_external,
198+ });
199+ }
200+201+ Ok(Json(Output {
202+ total_repos,
203+ capped_repos,
204+ collections_summary,
205+ would_be_capped,
206+ applied_limit,
207+ }))
208+}
+1
api/src/xrpc/network/slices/slice/mod.rs
···10pub mod get_oauth_clients;
11pub mod get_slice_records;
12pub mod get_sparklines;
013pub mod openapi;
14pub mod start_sync;
15pub mod stats;
···10pub mod get_oauth_clients;
11pub mod get_slice_records;
12pub mod get_sparklines;
13+pub mod get_sync_summary;
14pub mod openapi;
15pub mod start_sync;
16pub mod stats;
+36-2
frontend/src/client.ts
···1// Generated TypeScript client for AT Protocol records
2-// Generated at: 2025-09-28 21:47:20 UTC
3-// Lexicons: 41
45/**
6 * @example Usage
···1046 connected: boolean;
1047}
104800000000000000000000001049export interface NetworkSlicesSlice {
1050 /** Name of the slice */
1051 name: string;
···15521553export interface NetworkSlicesSliceGetJobLogs {
1554 readonly LogEntry: NetworkSlicesSliceGetJobLogsLogEntry;
00001555}
15561557export interface NetworkSlicesSliceGetJobStatus {
···2243 return await this.client.makeRequest<
2244 NetworkSlicesSliceGetJetstreamStatusOutput
2245 >("network.slices.slice.getJetstreamStatus", "GET", {});
000000002246 }
22472248 async getJobStatus(
···118 className="leading-relaxed"
119 >
120 When enabled, records for this lexicon will not be
121- synced from the AT Protocol firehose or during bulk sync
122 operations.
123 </Text>
124 </div>
···118 className="leading-relaxed"
119 >
120 When enabled, records for this lexicon will not be
121+ synced from the AT Protocol firehose or during sync
122 operations.
123 </Text>
124 </div>