High-performance implementation of plcbundle written in Rust
at main 518 lines 18 kB view raw
1use anyhow::Result; 2use clap::{Args, ValueEnum}; 3use plcbundle::BundleManager; 4use sonic_rs::JsonValueTrait; 5use std::collections::HashMap; 6use std::path::PathBuf; 7 8use super::utils; 9 10#[derive(Args)] 11#[command( 12 about = "Display statistics about bundles", 13 long_about = "Generate comprehensive statistics about bundles, operations, DIDs, and 14timeline patterns. Useful for understanding repository composition, analyzing 15growth trends, and identifying patterns in the data. 16 17Supports multiple statistic types: 18 • summary - Overall repository metrics (default) 19 • operations - Operation type distribution 20 • dids - DID activity patterns 21 • timeline - Temporal distribution and growth rates 22 23Use --stat-type to select which analysis to perform. Statistics can be 24computed for specific bundle ranges using --bundles, or for the entire 25repository if omitted. Use --json for machine-readable output suitable for 26further processing or visualization. 27 28This command provides insights into repository health, data distribution, 29and usage patterns that help with capacity planning and optimization.", 30 help_template = crate::clap_help!( 31 examples: " # Summary statistics (default)\n \ 32 {bin} stats\n\n \ 33 # Statistics for specific bundles\n \ 34 {bin} stats --bundles 1-100\n\n \ 35 # Operation type distribution\n \ 36 {bin} stats --stat-type operations\n\n \ 37 # DID statistics\n \ 38 {bin} stats --stat-type dids\n\n \ 39 # Timeline statistics\n \ 40 {bin} stats --stat-type timeline\n\n \ 41 # JSON output for scripting\n \ 42 {bin} stats --json\n\n \ 43 # Parallel processing\n \ 44 {bin} stats --stat-type operations -j 8" 45 ) 46)] 47pub struct StatsCommand { 48 /// Bundle range 49 #[arg(short, long)] 50 pub bundles: Option<String>, 51 52 /// Statistics type 53 #[arg(short = 't', long, default_value = "summary")] 54 pub stat_type: StatType, 55 56 /// Number of threads to use (0 = auto-detect) 57 #[arg(short = 'j', long, default_value = "0")] 58 pub threads: usize, 59 60 /// Output as JSON 61 #[arg(long)] 62 pub json: bool, 63} 64 65#[derive(Debug, Clone, ValueEnum)] 66pub enum StatType { 67 /// Summary statistics 68 Summary, 69 /// Operation type distribution 70 Operations, 71 /// DID statistics 72 Dids, 73 /// Timeline statistics 74 Timeline, 75} 76 77pub fn run(cmd: StatsCommand, dir: PathBuf) -> Result<()> { 78 let manager = utils::create_manager(dir.clone(), false, false, false)?; 79 let index = manager.get_index(); 80 81 if utils::is_repository_empty(&manager) { 82 println!("Repository is empty (no bundles)"); 83 return Ok(()); 84 } 85 86 let bundle_nums = utils::parse_bundle_spec(cmd.bundles, index.last_bundle)?; 87 88 match cmd.stat_type { 89 StatType::Summary => { 90 let stats = collect_summary_stats(&manager, &index, &bundle_nums)?; 91 print_stats(&stats, cmd.json, StatType::Summary)?; 92 } 93 StatType::Operations => { 94 let stats = collect_operation_stats(&manager, &bundle_nums)?; 95 print_stats(&stats, cmd.json, StatType::Operations)?; 96 } 97 StatType::Dids => { 98 let stats = collect_did_stats(&manager, &index, &bundle_nums)?; 99 print_stats(&stats, cmd.json, StatType::Dids)?; 100 } 101 StatType::Timeline => { 102 let stats = collect_timeline_stats(&manager, &index, &bundle_nums)?; 103 print_stats(&stats, cmd.json, StatType::Timeline)?; 104 } 105 } 106 107 Ok(()) 108} 109 110#[derive(Debug, Clone, serde::Serialize)] 111struct SummaryStats { 112 bundle_count: usize, 113 total_operations: u64, 114 total_dids: u64, 115 total_compressed_size: u64, 116 total_uncompressed_size: u64, 117 compression_ratio: f64, 118 avg_operations_per_bundle: f64, 119 avg_dids_per_bundle: f64, 120 bundle_range: String, 121} 122 123#[derive(Debug, Clone, serde::Serialize)] 124struct OperationStats { 125 total_operations: usize, 126 nullified_operations: usize, 127 operation_types: HashMap<String, usize>, 128 operation_type_percentages: HashMap<String, f64>, 129} 130 131#[derive(Debug, Clone, serde::Serialize)] 132struct DIDStats { 133 total_unique_dids: usize, 134 total_did_operations: u64, 135 avg_operations_per_did: f64, 136 dids_with_single_operation: usize, 137 dids_with_multiple_operations: usize, 138 max_operations_for_did: usize, 139} 140 141#[derive(Debug, Clone, serde::Serialize)] 142struct TimelineStats { 143 earliest_time: Option<String>, 144 latest_time: Option<String>, 145 time_span_days: Option<f64>, 146 operations_per_day: Option<f64>, 147 bundles_per_day: Option<f64>, 148 time_distribution: HashMap<String, usize>, 149} 150 151fn collect_summary_stats( 152 _manager: &BundleManager, 153 index: &crate::index::Index, 154 bundle_nums: &[u32], 155) -> Result<serde_json::Value> { 156 let bundle_metadatas: Vec<_> = index 157 .bundles 158 .iter() 159 .filter(|b| bundle_nums.contains(&b.bundle_number)) 160 .collect(); 161 162 let bundle_count = bundle_metadatas.len(); 163 let total_operations: u64 = bundle_metadatas 164 .iter() 165 .map(|b| b.operation_count as u64) 166 .sum(); 167 let total_dids: u64 = bundle_metadatas.iter().map(|b| b.did_count as u64).sum(); 168 let total_compressed_size: u64 = bundle_metadatas.iter().map(|b| b.compressed_size).sum(); 169 let total_uncompressed_size: u64 = bundle_metadatas.iter().map(|b| b.uncompressed_size).sum(); 170 171 let compression_ratio = if total_uncompressed_size > 0 { 172 (1.0 - total_compressed_size as f64 / total_uncompressed_size as f64) * 100.0 173 } else { 174 0.0 175 }; 176 177 let avg_operations_per_bundle = if bundle_count > 0 { 178 total_operations as f64 / bundle_count as f64 179 } else { 180 0.0 181 }; 182 183 let avg_dids_per_bundle = if bundle_count > 0 { 184 total_dids as f64 / bundle_count as f64 185 } else { 186 0.0 187 }; 188 189 let bundle_range = if bundle_nums.len() == 1 { 190 format!("{}", bundle_nums[0]) 191 } else if let (Some(&min), Some(&max)) = (bundle_nums.first(), bundle_nums.last()) { 192 if min == max { 193 format!("{}", min) 194 } else { 195 format!("{}-{}", min, max) 196 } 197 } else { 198 "all".to_string() 199 }; 200 201 let stats = SummaryStats { 202 bundle_count, 203 total_operations, 204 total_dids, 205 total_compressed_size, 206 total_uncompressed_size, 207 compression_ratio, 208 avg_operations_per_bundle, 209 avg_dids_per_bundle, 210 bundle_range, 211 }; 212 213 Ok(serde_json::to_value(stats)?) 214} 215 216fn collect_operation_stats( 217 manager: &BundleManager, 218 bundle_nums: &[u32], 219) -> Result<serde_json::Value> { 220 let mut operation_types: HashMap<String, usize> = HashMap::new(); 221 let mut total_operations = 0; 222 let mut nullified_operations = 0; 223 224 if let (Some(&start), Some(&end)) = (bundle_nums.first(), bundle_nums.last()) { 225 let iter = manager.get_operations_range(start, end, None); 226 for op_result in iter { 227 let op = op_result?; 228 total_operations += 1; 229 230 if op.nullified { 231 nullified_operations += 1; 232 } 233 234 if let Some(op_type) = op.operation.get("type").and_then(|v| v.as_str()) { 235 *operation_types.entry(op_type.to_string()).or_insert(0) += 1; 236 } else { 237 *operation_types.entry("unknown".to_string()).or_insert(0) += 1; 238 } 239 } 240 } 241 242 let mut operation_type_percentages: HashMap<String, f64> = HashMap::new(); 243 if total_operations > 0 { 244 for (op_type, count) in &operation_types { 245 let percentage = (*count as f64 / total_operations as f64) * 100.0; 246 operation_type_percentages.insert(op_type.clone(), percentage); 247 } 248 } 249 250 let stats = OperationStats { 251 total_operations, 252 nullified_operations, 253 operation_types, 254 operation_type_percentages, 255 }; 256 257 Ok(serde_json::to_value(stats)?) 258} 259 260fn collect_did_stats( 261 _manager: &BundleManager, 262 index: &crate::index::Index, 263 bundle_nums: &[u32], 264) -> Result<serde_json::Value> { 265 let bundle_metadatas: Vec<_> = index 266 .bundles 267 .iter() 268 .filter(|b| bundle_nums.contains(&b.bundle_number)) 269 .collect(); 270 271 let total_did_operations: u64 = bundle_metadatas 272 .iter() 273 .map(|b| b.operation_count as u64) 274 .sum(); 275 let total_unique_dids: usize = bundle_metadatas.iter().map(|b| b.did_count as usize).sum(); 276 277 // For more detailed stats, we'd need to iterate operations, but that's expensive 278 // So we'll use approximations from metadata 279 let avg_operations_per_did = if total_unique_dids > 0 { 280 total_did_operations as f64 / total_unique_dids as f64 281 } else { 282 0.0 283 }; 284 285 // These would require full iteration, so we'll approximate or skip 286 let dids_with_single_operation = 0; // Would need full iteration 287 let dids_with_multiple_operations = 0; // Would need full iteration 288 let max_operations_for_did = 0; // Would need full iteration 289 290 let stats = DIDStats { 291 total_unique_dids, 292 total_did_operations, 293 avg_operations_per_did, 294 dids_with_single_operation, 295 dids_with_multiple_operations, 296 max_operations_for_did, 297 }; 298 299 Ok(serde_json::to_value(stats)?) 300} 301 302fn collect_timeline_stats( 303 _manager: &BundleManager, 304 index: &crate::index::Index, 305 bundle_nums: &[u32], 306) -> Result<serde_json::Value> { 307 let bundle_metadatas: Vec<_> = index 308 .bundles 309 .iter() 310 .filter(|b| bundle_nums.contains(&b.bundle_number)) 311 .collect(); 312 313 if bundle_metadatas.is_empty() { 314 return Ok(serde_json::json!({ 315 "earliest_time": null, 316 "latest_time": null, 317 "time_span_days": null, 318 "operations_per_day": null, 319 "bundles_per_day": null, 320 })); 321 } 322 323 let earliest_time = bundle_metadatas 324 .iter() 325 .map(|b| &b.start_time) 326 .min() 327 .cloned(); 328 let latest_time = bundle_metadatas.iter().map(|b| &b.end_time).max().cloned(); 329 330 let time_span_days = 331 if let (Some(earliest), Some(latest)) = (earliest_time.as_ref(), latest_time.as_ref()) { 332 if let (Ok(e), Ok(l)) = ( 333 chrono::DateTime::parse_from_rfc3339(earliest), 334 chrono::DateTime::parse_from_rfc3339(latest), 335 ) { 336 let duration = l.signed_duration_since(e); 337 Some(duration.num_seconds() as f64 / 86400.0) 338 } else { 339 None 340 } 341 } else { 342 None 343 }; 344 345 let total_operations: u64 = bundle_metadatas 346 .iter() 347 .map(|b| b.operation_count as u64) 348 .sum(); 349 let operations_per_day = time_span_days.and_then(|days| { 350 if days > 0.0 { 351 Some(total_operations as f64 / days) 352 } else { 353 None 354 } 355 }); 356 357 let bundles_per_day = time_span_days.and_then(|days| { 358 if days > 0.0 { 359 Some(bundle_metadatas.len() as f64 / days) 360 } else { 361 None 362 } 363 }); 364 365 // Group by date (YYYY-MM-DD) 366 let mut time_distribution: HashMap<String, usize> = HashMap::new(); 367 for meta in &bundle_metadatas { 368 if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&meta.start_time) { 369 let date_str = dt.format("%Y-%m-%d").to_string(); 370 *time_distribution.entry(date_str).or_insert(0) += meta.operation_count as usize; 371 } 372 } 373 374 let stats = TimelineStats { 375 earliest_time, 376 latest_time, 377 time_span_days, 378 operations_per_day, 379 bundles_per_day, 380 time_distribution, 381 }; 382 383 Ok(serde_json::to_value(stats)?) 384} 385 386fn print_stats(stats: &serde_json::Value, json: bool, stat_type: StatType) -> Result<()> { 387 if json { 388 println!("{}", sonic_rs::to_string_pretty(stats)?); 389 Ok(()) 390 } else { 391 print_human_stats(stats, stat_type) 392 } 393} 394 395fn print_human_stats(stats: &serde_json::Value, stat_type: StatType) -> Result<()> { 396 match stat_type { 397 StatType::Summary => { 398 println!("📊 Summary Statistics"); 399 println!("═══════════════════════════════════════════════════════════════"); 400 println!(); 401 println!(" Bundle Range: {}", stats["bundle_range"]); 402 println!( 403 " Total Bundles: {}", 404 utils::format_number(stats["bundle_count"].as_u64().unwrap_or(0)) 405 ); 406 println!( 407 " Total Operations: {}", 408 utils::format_number(stats["total_operations"].as_u64().unwrap_or(0)) 409 ); 410 println!( 411 " Total DIDs: {}", 412 utils::format_number(stats["total_dids"].as_u64().unwrap_or(0)) 413 ); 414 println!(); 415 println!(" Storage:"); 416 println!( 417 " Compressed: {}", 418 utils::format_bytes(stats["total_compressed_size"].as_u64().unwrap_or(0)) 419 ); 420 println!( 421 " Uncompressed: {}", 422 utils::format_bytes(stats["total_uncompressed_size"].as_u64().unwrap_or(0)) 423 ); 424 println!( 425 " Compression: {:.1}%", 426 stats["compression_ratio"].as_f64().unwrap_or(0.0) 427 ); 428 println!(); 429 println!(" Averages:"); 430 println!( 431 " Ops per Bundle: {:.1}", 432 stats["avg_operations_per_bundle"].as_f64().unwrap_or(0.0) 433 ); 434 println!( 435 " DIDs per Bundle: {:.1}", 436 stats["avg_dids_per_bundle"].as_f64().unwrap_or(0.0) 437 ); 438 } 439 StatType::Operations => { 440 println!("🔧 Operation Statistics"); 441 println!("═══════════════════════════════════════════════════════════════"); 442 println!(); 443 println!( 444 " Total Operations: {}", 445 utils::format_number(stats["total_operations"].as_u64().unwrap_or(0)) 446 ); 447 println!( 448 " Nullified: {}", 449 utils::format_number(stats["nullified_operations"].as_u64().unwrap_or(0)) 450 ); 451 println!(); 452 453 if let Some(types) = stats.get("operation_types").and_then(|v| v.as_object()) { 454 println!(" Operation Types:"); 455 let mut type_vec: Vec<_> = types.iter().collect(); 456 type_vec.sort_by(|a, b| { 457 let count_a = a.1.as_u64().unwrap_or(0); 458 let count_b = b.1.as_u64().unwrap_or(0); 459 count_b.cmp(&count_a) 460 }); 461 462 for (op_type, count_val) in type_vec { 463 let count = count_val.as_u64().unwrap_or(0); 464 let percentage = stats["operation_type_percentages"] 465 .as_object() 466 .and_then(|p| p.get(op_type)) 467 .and_then(|v| v.as_f64()) 468 .unwrap_or(0.0); 469 println!( 470 " {:<20} {:>10} ({:>5.1}%)", 471 op_type, 472 utils::format_number(count), 473 percentage 474 ); 475 } 476 } 477 } 478 StatType::Dids => { 479 println!("🆔 DID Statistics"); 480 println!("═══════════════════════════════════════════════════════════════"); 481 println!(); 482 println!( 483 " Unique DIDs: {}", 484 utils::format_number(stats["total_unique_dids"].as_u64().unwrap_or(0)) 485 ); 486 println!( 487 " Total Operations: {}", 488 utils::format_number(stats["total_did_operations"].as_u64().unwrap_or(0)) 489 ); 490 println!( 491 " Avg Ops per DID: {:.2}", 492 stats["avg_operations_per_did"].as_f64().unwrap_or(0.0) 493 ); 494 } 495 StatType::Timeline => { 496 println!("⏰ Timeline Statistics"); 497 println!("═══════════════════════════════════════════════════════════════"); 498 println!(); 499 if let Some(earliest) = stats["earliest_time"].as_str() { 500 println!(" Earliest: {}", earliest); 501 } 502 if let Some(latest) = stats["latest_time"].as_str() { 503 println!(" Latest: {}", latest); 504 } 505 if let Some(days) = stats["time_span_days"].as_f64() { 506 println!(" Time Span: {:.1} days", days); 507 } 508 if let Some(ops_per_day) = stats["operations_per_day"].as_f64() { 509 println!(" Operations/Day: {:.1}", ops_per_day); 510 } 511 if let Some(bundles_per_day) = stats["bundles_per_day"].as_f64() { 512 println!(" Bundles/Day: {:.2}", bundles_per_day); 513 } 514 } 515 } 516 println!(); 517 Ok(()) 518}