forked from
atscan.net/plcbundle-rs
High-performance implementation of plcbundle written in Rust
1use anyhow::Result;
2use clap::{Args, ValueEnum};
3use plcbundle::BundleManager;
4use sonic_rs::JsonValueTrait;
5use std::collections::HashMap;
6use std::path::PathBuf;
7
8use super::utils;
9
10#[derive(Args)]
11#[command(
12 about = "Display statistics about bundles",
13 long_about = "Generate comprehensive statistics about bundles, operations, DIDs, and
14timeline patterns. Useful for understanding repository composition, analyzing
15growth trends, and identifying patterns in the data.
16
17Supports multiple statistic types:
18 • summary - Overall repository metrics (default)
19 • operations - Operation type distribution
20 • dids - DID activity patterns
21 • timeline - Temporal distribution and growth rates
22
23Use --stat-type to select which analysis to perform. Statistics can be
24computed for specific bundle ranges using --bundles, or for the entire
25repository if omitted. Use --json for machine-readable output suitable for
26further processing or visualization.
27
28This command provides insights into repository health, data distribution,
29and usage patterns that help with capacity planning and optimization.",
30 help_template = crate::clap_help!(
31 examples: " # Summary statistics (default)\n \
32 {bin} stats\n\n \
33 # Statistics for specific bundles\n \
34 {bin} stats --bundles 1-100\n\n \
35 # Operation type distribution\n \
36 {bin} stats --stat-type operations\n\n \
37 # DID statistics\n \
38 {bin} stats --stat-type dids\n\n \
39 # Timeline statistics\n \
40 {bin} stats --stat-type timeline\n\n \
41 # JSON output for scripting\n \
42 {bin} stats --json\n\n \
43 # Parallel processing\n \
44 {bin} stats --stat-type operations -j 8"
45 )
46)]
47pub struct StatsCommand {
48 /// Bundle range
49 #[arg(short, long)]
50 pub bundles: Option<String>,
51
52 /// Statistics type
53 #[arg(short = 't', long, default_value = "summary")]
54 pub stat_type: StatType,
55
56 /// Number of threads to use (0 = auto-detect)
57 #[arg(short = 'j', long, default_value = "0")]
58 pub threads: usize,
59
60 /// Output as JSON
61 #[arg(long)]
62 pub json: bool,
63}
64
65#[derive(Debug, Clone, ValueEnum)]
66pub enum StatType {
67 /// Summary statistics
68 Summary,
69 /// Operation type distribution
70 Operations,
71 /// DID statistics
72 Dids,
73 /// Timeline statistics
74 Timeline,
75}
76
77pub fn run(cmd: StatsCommand, dir: PathBuf) -> Result<()> {
78 let manager = utils::create_manager(dir.clone(), false, false, false)?;
79 let index = manager.get_index();
80
81 if utils::is_repository_empty(&manager) {
82 println!("Repository is empty (no bundles)");
83 return Ok(());
84 }
85
86 let bundle_nums = utils::parse_bundle_spec(cmd.bundles, index.last_bundle)?;
87
88 match cmd.stat_type {
89 StatType::Summary => {
90 let stats = collect_summary_stats(&manager, &index, &bundle_nums)?;
91 print_stats(&stats, cmd.json, StatType::Summary)?;
92 }
93 StatType::Operations => {
94 let stats = collect_operation_stats(&manager, &bundle_nums)?;
95 print_stats(&stats, cmd.json, StatType::Operations)?;
96 }
97 StatType::Dids => {
98 let stats = collect_did_stats(&manager, &index, &bundle_nums)?;
99 print_stats(&stats, cmd.json, StatType::Dids)?;
100 }
101 StatType::Timeline => {
102 let stats = collect_timeline_stats(&manager, &index, &bundle_nums)?;
103 print_stats(&stats, cmd.json, StatType::Timeline)?;
104 }
105 }
106
107 Ok(())
108}
109
110#[derive(Debug, Clone, serde::Serialize)]
111struct SummaryStats {
112 bundle_count: usize,
113 total_operations: u64,
114 total_dids: u64,
115 total_compressed_size: u64,
116 total_uncompressed_size: u64,
117 compression_ratio: f64,
118 avg_operations_per_bundle: f64,
119 avg_dids_per_bundle: f64,
120 bundle_range: String,
121}
122
123#[derive(Debug, Clone, serde::Serialize)]
124struct OperationStats {
125 total_operations: usize,
126 nullified_operations: usize,
127 operation_types: HashMap<String, usize>,
128 operation_type_percentages: HashMap<String, f64>,
129}
130
131#[derive(Debug, Clone, serde::Serialize)]
132struct DIDStats {
133 total_unique_dids: usize,
134 total_did_operations: u64,
135 avg_operations_per_did: f64,
136 dids_with_single_operation: usize,
137 dids_with_multiple_operations: usize,
138 max_operations_for_did: usize,
139}
140
141#[derive(Debug, Clone, serde::Serialize)]
142struct TimelineStats {
143 earliest_time: Option<String>,
144 latest_time: Option<String>,
145 time_span_days: Option<f64>,
146 operations_per_day: Option<f64>,
147 bundles_per_day: Option<f64>,
148 time_distribution: HashMap<String, usize>,
149}
150
151fn collect_summary_stats(
152 _manager: &BundleManager,
153 index: &crate::index::Index,
154 bundle_nums: &[u32],
155) -> Result<serde_json::Value> {
156 let bundle_metadatas: Vec<_> = index
157 .bundles
158 .iter()
159 .filter(|b| bundle_nums.contains(&b.bundle_number))
160 .collect();
161
162 let bundle_count = bundle_metadatas.len();
163 let total_operations: u64 = bundle_metadatas
164 .iter()
165 .map(|b| b.operation_count as u64)
166 .sum();
167 let total_dids: u64 = bundle_metadatas.iter().map(|b| b.did_count as u64).sum();
168 let total_compressed_size: u64 = bundle_metadatas.iter().map(|b| b.compressed_size).sum();
169 let total_uncompressed_size: u64 = bundle_metadatas.iter().map(|b| b.uncompressed_size).sum();
170
171 let compression_ratio = if total_uncompressed_size > 0 {
172 (1.0 - total_compressed_size as f64 / total_uncompressed_size as f64) * 100.0
173 } else {
174 0.0
175 };
176
177 let avg_operations_per_bundle = if bundle_count > 0 {
178 total_operations as f64 / bundle_count as f64
179 } else {
180 0.0
181 };
182
183 let avg_dids_per_bundle = if bundle_count > 0 {
184 total_dids as f64 / bundle_count as f64
185 } else {
186 0.0
187 };
188
189 let bundle_range = if bundle_nums.len() == 1 {
190 format!("{}", bundle_nums[0])
191 } else if let (Some(&min), Some(&max)) = (bundle_nums.first(), bundle_nums.last()) {
192 if min == max {
193 format!("{}", min)
194 } else {
195 format!("{}-{}", min, max)
196 }
197 } else {
198 "all".to_string()
199 };
200
201 let stats = SummaryStats {
202 bundle_count,
203 total_operations,
204 total_dids,
205 total_compressed_size,
206 total_uncompressed_size,
207 compression_ratio,
208 avg_operations_per_bundle,
209 avg_dids_per_bundle,
210 bundle_range,
211 };
212
213 Ok(serde_json::to_value(stats)?)
214}
215
216fn collect_operation_stats(
217 manager: &BundleManager,
218 bundle_nums: &[u32],
219) -> Result<serde_json::Value> {
220 let mut operation_types: HashMap<String, usize> = HashMap::new();
221 let mut total_operations = 0;
222 let mut nullified_operations = 0;
223
224 if let (Some(&start), Some(&end)) = (bundle_nums.first(), bundle_nums.last()) {
225 let iter = manager.get_operations_range(start, end, None);
226 for op_result in iter {
227 let op = op_result?;
228 total_operations += 1;
229
230 if op.nullified {
231 nullified_operations += 1;
232 }
233
234 if let Some(op_type) = op.operation.get("type").and_then(|v| v.as_str()) {
235 *operation_types.entry(op_type.to_string()).or_insert(0) += 1;
236 } else {
237 *operation_types.entry("unknown".to_string()).or_insert(0) += 1;
238 }
239 }
240 }
241
242 let mut operation_type_percentages: HashMap<String, f64> = HashMap::new();
243 if total_operations > 0 {
244 for (op_type, count) in &operation_types {
245 let percentage = (*count as f64 / total_operations as f64) * 100.0;
246 operation_type_percentages.insert(op_type.clone(), percentage);
247 }
248 }
249
250 let stats = OperationStats {
251 total_operations,
252 nullified_operations,
253 operation_types,
254 operation_type_percentages,
255 };
256
257 Ok(serde_json::to_value(stats)?)
258}
259
260fn collect_did_stats(
261 _manager: &BundleManager,
262 index: &crate::index::Index,
263 bundle_nums: &[u32],
264) -> Result<serde_json::Value> {
265 let bundle_metadatas: Vec<_> = index
266 .bundles
267 .iter()
268 .filter(|b| bundle_nums.contains(&b.bundle_number))
269 .collect();
270
271 let total_did_operations: u64 = bundle_metadatas
272 .iter()
273 .map(|b| b.operation_count as u64)
274 .sum();
275 let total_unique_dids: usize = bundle_metadatas.iter().map(|b| b.did_count as usize).sum();
276
277 // For more detailed stats, we'd need to iterate operations, but that's expensive
278 // So we'll use approximations from metadata
279 let avg_operations_per_did = if total_unique_dids > 0 {
280 total_did_operations as f64 / total_unique_dids as f64
281 } else {
282 0.0
283 };
284
285 // These would require full iteration, so we'll approximate or skip
286 let dids_with_single_operation = 0; // Would need full iteration
287 let dids_with_multiple_operations = 0; // Would need full iteration
288 let max_operations_for_did = 0; // Would need full iteration
289
290 let stats = DIDStats {
291 total_unique_dids,
292 total_did_operations,
293 avg_operations_per_did,
294 dids_with_single_operation,
295 dids_with_multiple_operations,
296 max_operations_for_did,
297 };
298
299 Ok(serde_json::to_value(stats)?)
300}
301
302fn collect_timeline_stats(
303 _manager: &BundleManager,
304 index: &crate::index::Index,
305 bundle_nums: &[u32],
306) -> Result<serde_json::Value> {
307 let bundle_metadatas: Vec<_> = index
308 .bundles
309 .iter()
310 .filter(|b| bundle_nums.contains(&b.bundle_number))
311 .collect();
312
313 if bundle_metadatas.is_empty() {
314 return Ok(serde_json::json!({
315 "earliest_time": null,
316 "latest_time": null,
317 "time_span_days": null,
318 "operations_per_day": null,
319 "bundles_per_day": null,
320 }));
321 }
322
323 let earliest_time = bundle_metadatas
324 .iter()
325 .map(|b| &b.start_time)
326 .min()
327 .cloned();
328 let latest_time = bundle_metadatas.iter().map(|b| &b.end_time).max().cloned();
329
330 let time_span_days =
331 if let (Some(earliest), Some(latest)) = (earliest_time.as_ref(), latest_time.as_ref()) {
332 if let (Ok(e), Ok(l)) = (
333 chrono::DateTime::parse_from_rfc3339(earliest),
334 chrono::DateTime::parse_from_rfc3339(latest),
335 ) {
336 let duration = l.signed_duration_since(e);
337 Some(duration.num_seconds() as f64 / 86400.0)
338 } else {
339 None
340 }
341 } else {
342 None
343 };
344
345 let total_operations: u64 = bundle_metadatas
346 .iter()
347 .map(|b| b.operation_count as u64)
348 .sum();
349 let operations_per_day = time_span_days.and_then(|days| {
350 if days > 0.0 {
351 Some(total_operations as f64 / days)
352 } else {
353 None
354 }
355 });
356
357 let bundles_per_day = time_span_days.and_then(|days| {
358 if days > 0.0 {
359 Some(bundle_metadatas.len() as f64 / days)
360 } else {
361 None
362 }
363 });
364
365 // Group by date (YYYY-MM-DD)
366 let mut time_distribution: HashMap<String, usize> = HashMap::new();
367 for meta in &bundle_metadatas {
368 if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&meta.start_time) {
369 let date_str = dt.format("%Y-%m-%d").to_string();
370 *time_distribution.entry(date_str).or_insert(0) += meta.operation_count as usize;
371 }
372 }
373
374 let stats = TimelineStats {
375 earliest_time,
376 latest_time,
377 time_span_days,
378 operations_per_day,
379 bundles_per_day,
380 time_distribution,
381 };
382
383 Ok(serde_json::to_value(stats)?)
384}
385
386fn print_stats(stats: &serde_json::Value, json: bool, stat_type: StatType) -> Result<()> {
387 if json {
388 println!("{}", sonic_rs::to_string_pretty(stats)?);
389 Ok(())
390 } else {
391 print_human_stats(stats, stat_type)
392 }
393}
394
395fn print_human_stats(stats: &serde_json::Value, stat_type: StatType) -> Result<()> {
396 match stat_type {
397 StatType::Summary => {
398 println!("📊 Summary Statistics");
399 println!("═══════════════════════════════════════════════════════════════");
400 println!();
401 println!(" Bundle Range: {}", stats["bundle_range"]);
402 println!(
403 " Total Bundles: {}",
404 utils::format_number(stats["bundle_count"].as_u64().unwrap_or(0))
405 );
406 println!(
407 " Total Operations: {}",
408 utils::format_number(stats["total_operations"].as_u64().unwrap_or(0))
409 );
410 println!(
411 " Total DIDs: {}",
412 utils::format_number(stats["total_dids"].as_u64().unwrap_or(0))
413 );
414 println!();
415 println!(" Storage:");
416 println!(
417 " Compressed: {}",
418 utils::format_bytes(stats["total_compressed_size"].as_u64().unwrap_or(0))
419 );
420 println!(
421 " Uncompressed: {}",
422 utils::format_bytes(stats["total_uncompressed_size"].as_u64().unwrap_or(0))
423 );
424 println!(
425 " Compression: {:.1}%",
426 stats["compression_ratio"].as_f64().unwrap_or(0.0)
427 );
428 println!();
429 println!(" Averages:");
430 println!(
431 " Ops per Bundle: {:.1}",
432 stats["avg_operations_per_bundle"].as_f64().unwrap_or(0.0)
433 );
434 println!(
435 " DIDs per Bundle: {:.1}",
436 stats["avg_dids_per_bundle"].as_f64().unwrap_or(0.0)
437 );
438 }
439 StatType::Operations => {
440 println!("🔧 Operation Statistics");
441 println!("═══════════════════════════════════════════════════════════════");
442 println!();
443 println!(
444 " Total Operations: {}",
445 utils::format_number(stats["total_operations"].as_u64().unwrap_or(0))
446 );
447 println!(
448 " Nullified: {}",
449 utils::format_number(stats["nullified_operations"].as_u64().unwrap_or(0))
450 );
451 println!();
452
453 if let Some(types) = stats.get("operation_types").and_then(|v| v.as_object()) {
454 println!(" Operation Types:");
455 let mut type_vec: Vec<_> = types.iter().collect();
456 type_vec.sort_by(|a, b| {
457 let count_a = a.1.as_u64().unwrap_or(0);
458 let count_b = b.1.as_u64().unwrap_or(0);
459 count_b.cmp(&count_a)
460 });
461
462 for (op_type, count_val) in type_vec {
463 let count = count_val.as_u64().unwrap_or(0);
464 let percentage = stats["operation_type_percentages"]
465 .as_object()
466 .and_then(|p| p.get(op_type))
467 .and_then(|v| v.as_f64())
468 .unwrap_or(0.0);
469 println!(
470 " {:<20} {:>10} ({:>5.1}%)",
471 op_type,
472 utils::format_number(count),
473 percentage
474 );
475 }
476 }
477 }
478 StatType::Dids => {
479 println!("🆔 DID Statistics");
480 println!("═══════════════════════════════════════════════════════════════");
481 println!();
482 println!(
483 " Unique DIDs: {}",
484 utils::format_number(stats["total_unique_dids"].as_u64().unwrap_or(0))
485 );
486 println!(
487 " Total Operations: {}",
488 utils::format_number(stats["total_did_operations"].as_u64().unwrap_or(0))
489 );
490 println!(
491 " Avg Ops per DID: {:.2}",
492 stats["avg_operations_per_did"].as_f64().unwrap_or(0.0)
493 );
494 }
495 StatType::Timeline => {
496 println!("⏰ Timeline Statistics");
497 println!("═══════════════════════════════════════════════════════════════");
498 println!();
499 if let Some(earliest) = stats["earliest_time"].as_str() {
500 println!(" Earliest: {}", earliest);
501 }
502 if let Some(latest) = stats["latest_time"].as_str() {
503 println!(" Latest: {}", latest);
504 }
505 if let Some(days) = stats["time_span_days"].as_f64() {
506 println!(" Time Span: {:.1} days", days);
507 }
508 if let Some(ops_per_day) = stats["operations_per_day"].as_f64() {
509 println!(" Operations/Day: {:.1}", ops_per_day);
510 }
511 if let Some(bundles_per_day) = stats["bundles_per_day"].as_f64() {
512 println!(" Bundles/Day: {:.2}", bundles_per_day);
513 }
514 }
515 }
516 println!();
517 Ok(())
518}