[DEPRECATED] Go implementation of plcbundle
at rust-test 982 lines 31 kB view raw
1package commands 2 3import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sort" 9 "strings" 10 "time" 11 12 "github.com/goccy/go-json" 13 "github.com/spf13/cobra" 14 "tangled.org/atscan.net/plcbundle/internal/storage" 15) 16 17// ============================================================================ 18// TYPES (defined at package level to avoid conflicts) 19// ============================================================================ 20 21type DIDActivity struct { 22 DID string `json:"did"` 23 Count int `json:"count"` 24} 25 26type DomainCount struct { 27 Domain string `json:"domain"` 28 Count int `json:"count"` 29} 30 31type EndpointCount struct { 32 Endpoint string `json:"endpoint"` 33 Count int `json:"count"` 34} 35 36type TimeSlot struct { 37 Time time.Time `json:"time"` 38 Count int `json:"count"` 39} 40 41type inspectOptions struct { 42 showJSON bool 43 verify bool 44 showSamples bool 45 sampleCount int 46 skipMetadata bool 47 skipPatterns bool 48 skipCrypto bool 49 verbose bool 50} 51 52type inspectResult struct { 53 // Metadata 54 Metadata *storage.BundleMetadata `json:"metadata,omitempty"` 55 56 // Basic stats 57 FilePath string `json:"file_path"` 58 FileSize int64 `json:"file_size"` 59 HasMetadataFrame bool `json:"has_metadata_frame"` 60 HasFrameIndex bool `json:"has_frame_index"` 61 62 // Operation analysis 63 TotalOps int `json:"total_ops"` 64 NullifiedOps int `json:"nullified_ops"` 65 ActiveOps int `json:"active_ops"` 66 UniqueDIDs int `json:"unique_dids"` 67 OperationTypes map[string]int `json:"operation_types"` 68 69 // DID patterns 70 TopDIDs []DIDActivity `json:"top_dids"` 71 SingleOpDIDs int `json:"single_op_dids"` 72 MultiOpDIDs int `json:"multi_op_dids"` 73 74 // Handle patterns 75 TotalHandles int `json:"total_handles"` 76 TopDomains []DomainCount `json:"top_domains"` 77 InvalidHandles int `json:"invalid_handles"` 78 79 // Service patterns 80 TotalServices int `json:"total_services"` 81 UniqueEndpoints int `json:"unique_endpoints"` 82 TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"` 83 84 // Temporal 85 TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"` 86 AvgOpsPerMinute float64 `json:"avg_ops_per_minute"` 87 88 // Size analysis 89 AvgOpSize int `json:"avg_op_size"` 90 MinOpSize int `json:"min_op_size"` 91 MaxOpSize int `json:"max_op_size"` 92 TotalOpSize int64 `json:"total_op_size"` 93 94 // Crypto verification 95 ContentHashValid bool `json:"content_hash_valid"` 96 CompressedHashValid bool `json:"compressed_hash_valid"` 97 MetadataValid bool `json:"metadata_valid"` 98 99 // Timing 100 LoadTime time.Duration `json:"load_time"` 101 AnalyzeTime time.Duration `json:"analyze_time"` 102 VerifyTime time.Duration `json:"verify_time"` 103 TotalTime time.Duration `json:"total_time"` 104} 105 106type bundleAnalysis struct { 107 TotalOps int `json:"total_ops"` 108 NullifiedOps int `json:"nullified_ops"` 109 ActiveOps int `json:"active_ops"` 110 UniqueDIDs int `json:"unique_dids"` 111 OperationTypes map[string]int `json:"operation_types"` 112 SingleOpDIDs int `json:"single_op_dids"` 113 MultiOpDIDs int `json:"multi_op_dids"` 114 TotalHandles int `json:"total_handles"` 115 InvalidHandles int `json:"invalid_handles"` 116 TotalServices int `json:"total_services"` 117 UniqueEndpoints int `json:"unique_endpoints"` 118 AvgOpsPerMinute float64 `json:"avg_ops_per_minute"` 119 AvgOpSize int `json:"avg_op_size"` 120 MinOpSize int `json:"min_op_size"` 121 MaxOpSize int `json:"max_op_size"` 122 TotalOpSize int64 `json:"total_op_size"` 123 124 // For top-N calculations (unexported, won't appear in JSON) 125 didActivity map[string]int 126 domainCounts map[string]int 127 endpointCounts map[string]int 128 129 // For time calculations 130 timeSlots map[int64]int 131 132 // Results 133 TopDIDs []DIDActivity `json:"top_dids"` 134 TopDomains []DomainCount `json:"top_domains"` 135 TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"` 136 TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"` 137} 138 139type TimeDistributionSummary struct { 140 EarliestOp time.Time `json:"earliest_op"` 141 LatestOp time.Time `json:"latest_op"` 142 TimeSpan string `json:"time_span"` 143 PeakHour time.Time `json:"peak_hour"` 144 PeakHourOps int `json:"peak_hour_ops"` 145 TotalHours int `json:"total_hours"` 146} 147 148// ============================================================================ 149// COMMAND DEFINITION 150// ============================================================================ 151 152func NewInspectCommand() *cobra.Command { 153 var ( 154 showJSON bool 155 verify bool 156 showSamples bool 157 sampleCount int 158 skipMetadata bool 159 skipPatterns bool 160 skipCrypto bool 161 ) 162 163 cmd := &cobra.Command{ 164 Use: "inspect <bundle-number|bundle-file>", 165 Short: "Deep analysis of bundle contents", 166 Long: `Deep analysis of bundle contents 167 168Performs comprehensive analysis of a bundle including: 169 • Embedded metadata (from skippable frame) 170 • Operation type breakdown 171 • DID activity patterns 172 • Handle and domain statistics 173 • Service endpoint analysis 174 • Temporal distribution 175 • Cryptographic verification 176 • Size analysis 177 178Can inspect either by bundle number (from repository) or direct file path.`, 179 180 Example: ` # Inspect from repository 181 plcbundle inspect 42 182 183 # Inspect specific file 184 plcbundle inspect /path/to/000042.jsonl.zst 185 plcbundle inspect 000042.jsonl.zst 186 187 # Skip certain analysis sections 188 plcbundle inspect 42 --skip-patterns --skip-crypto 189 190 # Show sample operations 191 plcbundle inspect 42 --samples --sample-count 20 192 193 # Verify all hashes 194 plcbundle inspect 42 --verify 195 196 # JSON output (for scripting) 197 plcbundle inspect 42 --json`, 198 199 Args: cobra.ExactArgs(1), 200 201 RunE: func(cmd *cobra.Command, args []string) error { 202 input := args[0] 203 verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose") 204 205 return runInspect(cmd, input, inspectOptions{ 206 showJSON: showJSON, 207 verify: verify, 208 showSamples: showSamples, 209 sampleCount: sampleCount, 210 skipMetadata: skipMetadata, 211 skipPatterns: skipPatterns, 212 skipCrypto: skipCrypto, 213 verbose: verbose, 214 }) 215 }, 216 } 217 218 cmd.Flags().BoolVar(&showJSON, "json", false, "Output as JSON") 219 cmd.Flags().BoolVar(&verify, "verify", false, "Verify cryptographic hashes") 220 cmd.Flags().BoolVar(&showSamples, "samples", false, "Show sample operations") 221 cmd.Flags().IntVar(&sampleCount, "sample-count", 10, "Number of samples to show") 222 cmd.Flags().BoolVar(&skipMetadata, "skip-metadata", false, "Skip embedded metadata section") 223 cmd.Flags().BoolVar(&skipPatterns, "skip-patterns", false, "Skip pattern analysis") 224 cmd.Flags().BoolVar(&skipCrypto, "skip-crypto", false, "Skip cryptographic verification") 225 226 return cmd 227} 228 229// ============================================================================ 230// MAIN LOGIC 231// ============================================================================ 232 233func runInspect(cmd *cobra.Command, input string, opts inspectOptions) error { 234 totalStart := time.Now() 235 236 // Determine if input is bundle number or file path 237 bundlePath, bundleNum, err := resolveBundlePath(cmd, input) 238 if err != nil { 239 return err 240 } 241 242 result := &inspectResult{ 243 FilePath: bundlePath, 244 OperationTypes: make(map[string]int), 245 TopDIDs: make([]DIDActivity, 0), 246 TopDomains: make([]DomainCount, 0), 247 TopPDSEndpoints: make([]EndpointCount, 0), 248 } 249 250 // Check file exists 251 info, err := os.Stat(bundlePath) 252 if err != nil { 253 return fmt.Errorf("bundle file not found: %w", err) 254 } 255 result.FileSize = info.Size() 256 257 // Check for frame index 258 ops, _ := storage.NewOperations(nil, opts.verbose) 259 260 if _, err := ops.ExtractBundleMetadata(bundlePath); err == nil { 261 result.HasFrameIndex = true // Has embedded index 262 } else { 263 // Check for external .idx file (legacy) 264 indexPath := bundlePath + ".idx" 265 if _, err := os.Stat(indexPath); err == nil { 266 result.HasFrameIndex = true 267 } 268 } 269 270 fmt.Fprintf(os.Stderr, "Inspecting: %s\n", filepath.Base(bundlePath)) 271 fmt.Fprintf(os.Stderr, "File size: %s\n\n", formatBytes(result.FileSize)) 272 273 // SECTION 1: Extract embedded metadata (fast!) 274 if !opts.skipMetadata { 275 fmt.Fprintf(os.Stderr, "Reading embedded metadata...\n") 276 metaStart := time.Now() 277 278 ops, _ := storage.NewOperations(nil, opts.verbose) 279 280 meta, err := ops.ExtractBundleMetadata(bundlePath) 281 if err != nil { 282 if opts.verbose { 283 fmt.Fprintf(os.Stderr, " No embedded metadata: %v\n", err) 284 } 285 result.HasMetadataFrame = false 286 } else { 287 result.HasMetadataFrame = true 288 result.Metadata = meta 289 if opts.verbose { 290 fmt.Fprintf(os.Stderr, " ✓ Extracted in %s\n", time.Since(metaStart)) 291 } 292 } 293 fmt.Fprintf(os.Stderr, "\n") 294 } 295 296 // SECTION 2: Load and analyze operations 297 fmt.Fprintf(os.Stderr, "Loading and analyzing operations...\n") 298 loadStart := time.Now() 299 300 analysis, err := analyzeBundle(bundlePath, opts) 301 if err != nil { 302 return fmt.Errorf("analysis failed: %w", err) 303 } 304 305 result.LoadTime = time.Since(loadStart) 306 result.TotalOps = analysis.TotalOps 307 result.NullifiedOps = analysis.NullifiedOps 308 result.ActiveOps = analysis.ActiveOps 309 result.UniqueDIDs = analysis.UniqueDIDs 310 result.OperationTypes = analysis.OperationTypes 311 result.TopDIDs = analysis.TopDIDs 312 result.SingleOpDIDs = analysis.SingleOpDIDs 313 result.MultiOpDIDs = analysis.MultiOpDIDs 314 result.TotalHandles = analysis.TotalHandles 315 result.TopDomains = analysis.TopDomains 316 result.InvalidHandles = analysis.InvalidHandles 317 result.TotalServices = analysis.TotalServices 318 result.UniqueEndpoints = analysis.UniqueEndpoints 319 result.TopPDSEndpoints = analysis.TopPDSEndpoints 320 result.TimeDistribution = analysis.TimeDistribution 321 result.AvgOpsPerMinute = analysis.AvgOpsPerMinute 322 result.AvgOpSize = analysis.AvgOpSize 323 result.MinOpSize = analysis.MinOpSize 324 result.MaxOpSize = analysis.MaxOpSize 325 result.TotalOpSize = analysis.TotalOpSize 326 327 fmt.Fprintf(os.Stderr, " ✓ Analyzed in %s\n\n", result.LoadTime) 328 329 // SECTION 3: Cryptographic verification 330 if opts.verify && !opts.skipCrypto { 331 fmt.Fprintf(os.Stderr, "Verifying cryptographic hashes...\n") 332 verifyStart := time.Now() 333 334 // Pass cmd parameter 335 result.ContentHashValid, result.CompressedHashValid, result.MetadataValid = 336 verifyCrypto(cmd, bundlePath, result.Metadata, bundleNum, opts.verbose) 337 338 result.VerifyTime = time.Since(verifyStart) 339 fmt.Fprintf(os.Stderr, " ✓ Verified in %s\n\n", result.VerifyTime) 340 } 341 342 result.TotalTime = time.Since(totalStart) 343 344 // Display results 345 if opts.showJSON { 346 return displayInspectJSON(result) 347 } 348 349 return displayInspectHuman(result, analysis, opts) 350} 351 352// ============================================================================ 353// ANALYSIS FUNCTIONS 354// ============================================================================ 355 356func analyzeBundle(path string, opts inspectOptions) (*bundleAnalysis, error) { 357 ops, _ := storage.NewOperations(nil, opts.verbose) 358 operations, err := ops.LoadBundle(path) 359 if err != nil { 360 return nil, err 361 } 362 363 analysis := &bundleAnalysis{ 364 TotalOps: len(operations), 365 OperationTypes: make(map[string]int), 366 didActivity: make(map[string]int), 367 domainCounts: make(map[string]int), 368 endpointCounts: make(map[string]int), 369 timeSlots: make(map[int64]int), 370 } 371 372 // Analyze each operation 373 for _, op := range operations { 374 // Nullification 375 if op.IsNullified() { 376 analysis.NullifiedOps++ 377 } else { 378 analysis.ActiveOps++ 379 } 380 381 // DID activity 382 analysis.didActivity[op.DID]++ 383 384 // Size stats 385 opSize := len(op.RawJSON) 386 if opSize == 0 { 387 data, _ := json.Marshal(op) 388 opSize = len(data) 389 } 390 391 analysis.TotalOpSize += int64(opSize) 392 if analysis.MinOpSize == 0 || opSize < analysis.MinOpSize { 393 analysis.MinOpSize = opSize 394 } 395 if opSize > analysis.MaxOpSize { 396 analysis.MaxOpSize = opSize 397 } 398 399 // Parse operation for detailed analysis 400 opData, err := op.GetOperationData() 401 if err != nil || opData == nil { 402 continue 403 } 404 405 // Operation type 406 if opType, ok := opData["type"].(string); ok { 407 analysis.OperationTypes[opType]++ 408 } 409 410 // Handle analysis 411 if !opts.skipPatterns { 412 analyzeHandles(opData, analysis) 413 analyzeServices(opData, analysis) 414 } 415 416 // Time distribution (group by minute) 417 timeSlot := op.CreatedAt.Unix() / 60 418 analysis.timeSlots[timeSlot]++ 419 } 420 421 // Calculate derived stats 422 analysis.UniqueDIDs = len(analysis.didActivity) 423 if analysis.TotalOps > 0 { 424 analysis.AvgOpSize = int(analysis.TotalOpSize / int64(analysis.TotalOps)) 425 } 426 427 // Count single vs multi-op DIDs 428 for _, count := range analysis.didActivity { 429 if count == 1 { 430 analysis.SingleOpDIDs++ 431 } else { 432 analysis.MultiOpDIDs++ 433 } 434 } 435 436 // Top DIDs 437 analysis.TopDIDs = getTopDIDs(analysis.didActivity, 10) 438 439 // Top domains 440 analysis.TopDomains = getTopDomains(analysis.domainCounts, 10) 441 442 // Top endpoints 443 analysis.TopPDSEndpoints = getTopEndpoints(analysis.endpointCounts, 10) 444 445 // Unique endpoints 446 analysis.UniqueEndpoints = len(analysis.endpointCounts) 447 448 // Time distribution 449 analysis.TimeDistribution = calculateTimeDistributionSummary(analysis.timeSlots) 450 451 // Calculate ops per minute 452 if len(operations) > 1 { 453 duration := operations[len(operations)-1].CreatedAt.Sub(operations[0].CreatedAt) 454 if duration.Minutes() > 0 { 455 analysis.AvgOpsPerMinute = float64(len(operations)) / duration.Minutes() 456 } 457 } 458 459 return analysis, nil 460} 461 462func analyzeHandles(opData map[string]interface{}, analysis *bundleAnalysis) { 463 if aka, ok := opData["alsoKnownAs"].([]interface{}); ok { 464 for _, a := range aka { 465 if akaStr, ok := a.(string); ok { 466 if strings.HasPrefix(akaStr, "at://") { 467 analysis.TotalHandles++ 468 469 // Extract domain 470 handle := strings.TrimPrefix(akaStr, "at://") 471 if idx := strings.Index(handle, "/"); idx > 0 { 472 handle = handle[:idx] 473 } 474 475 // Count domain (TLD) 476 parts := strings.Split(handle, ".") 477 if len(parts) >= 2 { 478 domain := parts[len(parts)-1] 479 if len(parts) >= 2 { 480 domain = parts[len(parts)-2] + "." + domain 481 } 482 analysis.domainCounts[domain]++ 483 } 484 485 // Check for invalid patterns 486 if strings.Contains(handle, "_") { 487 analysis.InvalidHandles++ 488 } 489 } 490 } 491 } 492 } 493} 494 495func analyzeServices(opData map[string]interface{}, analysis *bundleAnalysis) { 496 if services, ok := opData["services"].(map[string]interface{}); ok { 497 analysis.TotalServices += len(services) 498 499 // Extract PDS endpoints 500 if pds, ok := services["atproto_pds"].(map[string]interface{}); ok { 501 if endpoint, ok := pds["endpoint"].(string); ok { 502 // Normalize endpoint 503 endpoint = strings.TrimPrefix(endpoint, "https://") 504 endpoint = strings.TrimPrefix(endpoint, "http://") 505 if idx := strings.Index(endpoint, "/"); idx > 0 { 506 endpoint = endpoint[:idx] 507 } 508 analysis.endpointCounts[endpoint]++ 509 } 510 } 511 } 512} 513 514func getTopDIDs(didActivity map[string]int, limit int) []DIDActivity { 515 var results []DIDActivity 516 for did, count := range didActivity { 517 results = append(results, DIDActivity{DID: did, Count: count}) 518 } 519 520 sort.Slice(results, func(i, j int) bool { 521 return results[i].Count > results[j].Count 522 }) 523 524 if len(results) > limit { 525 results = results[:limit] 526 } 527 528 return results 529} 530 531func getTopDomains(domainCounts map[string]int, limit int) []DomainCount { 532 var results []DomainCount 533 for domain, count := range domainCounts { 534 results = append(results, DomainCount{Domain: domain, Count: count}) 535 } 536 537 sort.Slice(results, func(i, j int) bool { 538 return results[i].Count > results[j].Count 539 }) 540 541 if len(results) > limit { 542 results = results[:limit] 543 } 544 545 return results 546} 547 548func getTopEndpoints(endpointCounts map[string]int, limit int) []EndpointCount { 549 var results []EndpointCount 550 for endpoint, count := range endpointCounts { 551 results = append(results, EndpointCount{Endpoint: endpoint, Count: count}) 552 } 553 554 sort.Slice(results, func(i, j int) bool { 555 return results[i].Count > results[j].Count 556 }) 557 558 if len(results) > limit { 559 results = results[:limit] 560 } 561 562 return results 563} 564 565func calculateTimeDistributionSummary(timeSlots map[int64]int) *TimeDistributionSummary { 566 if len(timeSlots) == 0 { 567 return nil 568 } 569 570 var earliest, latest int64 571 var peakHour int64 572 var peakCount int 573 574 // Group by hour and find stats 575 hourlySlots := make(map[int64]int) 576 577 for ts, count := range timeSlots { 578 // Track earliest/latest 579 if earliest == 0 || ts < earliest { 580 earliest = ts 581 } 582 if ts > latest { 583 latest = ts 584 } 585 586 // Group by hour 587 hour := (ts / 3600) * 3600 // Truncate to hour 588 hourlySlots[hour] += count 589 } 590 591 // Find peak hour 592 for hour, count := range hourlySlots { 593 if count > peakCount { 594 peakCount = count 595 peakHour = hour 596 } 597 } 598 599 // Calculate time span 600 duration := time.Unix(latest, 0).Sub(time.Unix(earliest, 0)) 601 timeSpan := formatDuration(duration) 602 603 // Calculate total hours covered 604 totalHours := len(hourlySlots) 605 606 return &TimeDistributionSummary{ 607 EarliestOp: time.Unix(earliest, 0).UTC(), 608 LatestOp: time.Unix(latest, 0).UTC(), 609 TimeSpan: timeSpan, 610 PeakHour: time.Unix(peakHour, 0).UTC(), 611 PeakHourOps: peakCount, 612 TotalHours: totalHours, 613 } 614} 615 616// ============================================================================ 617// DISPLAY FUNCTIONS 618// ============================================================================ 619 620func displayInspectHuman(result *inspectResult, _ *bundleAnalysis, opts inspectOptions) error { 621 fmt.Printf("\n") 622 fmt.Printf("═══════════════════════════════════════════════════════════════\n") 623 fmt.Printf(" Bundle Deep Inspection\n") 624 fmt.Printf("═══════════════════════════════════════════════════════════════\n\n") 625 626 // File info 627 fmt.Printf("📁 File Information\n") 628 fmt.Printf("───────────────────\n") 629 fmt.Printf(" Path: %s\n", filepath.Base(result.FilePath)) 630 fmt.Printf(" Size: %s\n", formatBytes(result.FileSize)) 631 fmt.Printf(" Has metadata frame: %v\n", result.HasMetadataFrame) 632 fmt.Printf(" Has frame index: %v\n\n", result.HasFrameIndex) 633 634 // Embedded metadata 635 if result.HasMetadataFrame && result.Metadata != nil && !opts.skipMetadata { 636 meta := result.Metadata 637 fmt.Printf("📋 Embedded Metadata (Skippable Frame)\n") 638 fmt.Printf("──────────────────────────────────────\n") 639 fmt.Printf(" Format: %s\n", meta.Format) 640 fmt.Printf(" Origin: %s\n", meta.Origin) 641 fmt.Printf(" Bundle Number: %06d\n", meta.BundleNumber) 642 if meta.CreatedBy != "" { 643 fmt.Printf(" Created by: %s\n", meta.CreatedBy) 644 } 645 if meta.CreatedByHost != "" { 646 fmt.Printf(" Created on: %s\n", meta.CreatedByHost) 647 } 648 fmt.Printf(" Created at: %s\n", meta.CreatedAt.Format("2006-01-02 15:04:05 MST")) 649 650 fmt.Printf("\n Content:\n") 651 fmt.Printf(" Operations: %s\n", formatNumber(meta.OperationCount)) 652 fmt.Printf(" Unique DIDs: %s\n", formatNumber(meta.DIDCount)) 653 fmt.Printf(" Frames: %d × %d ops\n", meta.FrameCount, meta.FrameSize) 654 fmt.Printf(" Timespan: %s → %s\n", 655 meta.StartTime.Format("2006-01-02 15:04:05"), 656 meta.EndTime.Format("2006-01-02 15:04:05")) 657 fmt.Printf(" Duration: %s\n", 658 formatDuration(meta.EndTime.Sub(meta.StartTime))) 659 660 fmt.Printf("\n Integrity:\n") 661 fmt.Printf(" Content hash: %s\n", meta.ContentHash) 662 if meta.ParentHash != "" { 663 fmt.Printf(" Parent hash: %s\n", meta.ParentHash) 664 } 665 666 if len(meta.FrameOffsets) > 0 { 667 // Calculate metadata size (size of the metadata frame itself) 668 metadataSize := int64(0) 669 if result.HasMetadataFrame { 670 // Metadata is at the end of file, after all data frames 671 // Size = file size - last frame offset 672 if len(meta.FrameOffsets) > 0 { 673 lastFrameOffset := meta.FrameOffsets[len(meta.FrameOffsets)-1] 674 metadataSize = result.FileSize - lastFrameOffset 675 } 676 } 677 678 // Print with fixes 679 fmt.Printf(" Ops Frame Index: %d offsets (embedded)\n", len(meta.FrameOffsets)) 680 fmt.Printf(" Metadata size: %s\n", formatBytes(metadataSize)) 681 fmt.Printf(" Frame offsets: %v\n", formatOffsetArray(meta.FrameOffsets, 5)) // Show first 5 682 } 683 fmt.Printf("\n") 684 } 685 686 // Operations breakdown 687 fmt.Printf("📊 Operations Analysis\n") 688 fmt.Printf("──────────────────────\n") 689 fmt.Printf(" Total operations: %s\n", formatNumber(result.TotalOps)) 690 fmt.Printf(" Active: %s (%.1f%%)\n", 691 formatNumber(result.ActiveOps), 692 float64(result.ActiveOps)/float64(result.TotalOps)*100) 693 if result.NullifiedOps > 0 { 694 fmt.Printf(" Nullified: %s (%.1f%%)\n", 695 formatNumber(result.NullifiedOps), 696 float64(result.NullifiedOps)/float64(result.TotalOps)*100) 697 } 698 699 if len(result.OperationTypes) > 0 { 700 fmt.Printf("\n Operation Types:\n") 701 702 // Sort by count 703 var types []struct { 704 name string 705 count int 706 } 707 for name, count := range result.OperationTypes { 708 types = append(types, struct { 709 name string 710 count int 711 }{name, count}) 712 } 713 sort.Slice(types, func(i, j int) bool { 714 return types[i].count > types[j].count 715 }) 716 717 for _, t := range types { 718 pct := float64(t.count) / float64(result.TotalOps) * 100 719 fmt.Printf(" %-25s %s (%.1f%%)\n", t.name, formatNumber(t.count), pct) 720 } 721 } 722 fmt.Printf("\n") 723 724 // DID patterns 725 fmt.Printf("👤 DID Activity Patterns\n") 726 fmt.Printf("────────────────────────\n") 727 fmt.Printf(" Unique DIDs: %s\n", formatNumber(result.UniqueDIDs)) 728 fmt.Printf(" Single-op DIDs: %s (%.1f%%)\n", 729 formatNumber(result.SingleOpDIDs), 730 float64(result.SingleOpDIDs)/float64(result.UniqueDIDs)*100) 731 fmt.Printf(" Multi-op DIDs: %s (%.1f%%)\n", 732 formatNumber(result.MultiOpDIDs), 733 float64(result.MultiOpDIDs)/float64(result.UniqueDIDs)*100) 734 735 if len(result.TopDIDs) > 0 { 736 fmt.Printf("\n Most Active DIDs:\n") 737 for i, da := range result.TopDIDs { 738 if i >= 5 { 739 break 740 } 741 fmt.Printf(" %d. %s (%d ops)\n", i+1, da.DID, da.Count) 742 } 743 } 744 fmt.Printf("\n") 745 746 // Handle patterns 747 if !opts.skipPatterns && result.TotalHandles > 0 { 748 fmt.Printf("🏷️ Handle Statistics\n") 749 fmt.Printf("────────────────────\n") 750 fmt.Printf(" Total handles: %s\n", formatNumber(result.TotalHandles)) 751 if result.InvalidHandles > 0 { 752 fmt.Printf(" Invalid patterns: %s (%.1f%%)\n", 753 formatNumber(result.InvalidHandles), 754 float64(result.InvalidHandles)/float64(result.TotalHandles)*100) 755 } 756 757 if len(result.TopDomains) > 0 { 758 fmt.Printf("\n Top Domains:\n") 759 for i, dc := range result.TopDomains { 760 if i >= 10 { 761 break 762 } 763 pct := float64(dc.Count) / float64(result.TotalHandles) * 100 764 fmt.Printf(" %-25s %s (%.1f%%)\n", dc.Domain, formatNumber(dc.Count), pct) 765 } 766 } 767 fmt.Printf("\n") 768 } 769 770 // Service patterns 771 if !opts.skipPatterns && result.TotalServices > 0 { 772 fmt.Printf("🌐 Service Endpoints\n") 773 fmt.Printf("────────────────────\n") 774 fmt.Printf(" Total services: %s\n", formatNumber(result.TotalServices)) 775 fmt.Printf(" Unique endpoints: %s\n", formatNumber(result.UniqueEndpoints)) 776 777 if len(result.TopPDSEndpoints) > 0 { 778 fmt.Printf("\n Top PDS Endpoints:\n") 779 for i, ec := range result.TopPDSEndpoints { 780 if i >= 10 { 781 break 782 } 783 fmt.Printf(" %-40s %s ops\n", ec.Endpoint, formatNumber(ec.Count)) 784 } 785 } 786 fmt.Printf("\n") 787 } 788 789 // Temporal analysis 790 fmt.Printf("⏱️ Time Distribution\n") 791 fmt.Printf("───────────────────────\n") 792 if result.TimeDistribution != nil { 793 td := result.TimeDistribution 794 fmt.Printf(" Earliest operation: %s\n", td.EarliestOp.Format(time.RFC3339)) 795 fmt.Printf(" Latest operation: %s\n", td.LatestOp.Format(time.RFC3339)) 796 fmt.Printf(" Time span: %s\n", td.TimeSpan) 797 fmt.Printf(" Peak hour: %s (%d ops)\n", 798 td.PeakHour.Format("2006-01-02 15:04"), td.PeakHourOps) 799 fmt.Printf(" Total active hours: %d\n", td.TotalHours) 800 fmt.Printf(" Avg ops/minute: %.1f\n", result.AvgOpsPerMinute) 801 } 802 fmt.Printf("\n") 803 804 // Size analysis 805 fmt.Printf("📏 Size Analysis\n") 806 fmt.Printf("────────────────\n") 807 fmt.Printf(" Total data: %s\n", formatBytes(result.TotalOpSize)) 808 fmt.Printf(" Average per op: %s\n", formatBytes(int64(result.AvgOpSize))) 809 fmt.Printf(" Min operation: %s\n", formatBytes(int64(result.MinOpSize))) 810 fmt.Printf(" Max operation: %s\n\n", formatBytes(int64(result.MaxOpSize))) 811 812 // Cryptographic verification 813 if opts.verify && !opts.skipCrypto { 814 fmt.Printf("🔐 Cryptographic Verification\n") 815 fmt.Printf("─────────────────────────────\n") 816 817 status := func(valid bool) string { 818 if valid { 819 return "✓ Valid" 820 } 821 return "✗ Invalid" 822 } 823 824 fmt.Printf(" Content hash: %s\n", status(result.ContentHashValid)) 825 fmt.Printf(" Compressed hash: %s\n", status(result.CompressedHashValid)) 826 if result.HasMetadataFrame { 827 fmt.Printf(" Metadata integrity: %s\n", status(result.MetadataValid)) 828 } 829 fmt.Printf("\n") 830 } 831 832 // Performance summary 833 fmt.Printf("⚡ Performance\n") 834 fmt.Printf("──────────────\n") 835 fmt.Printf(" Load time: %s\n", result.LoadTime) 836 if opts.verify { 837 fmt.Printf(" Verify time: %s\n", result.VerifyTime) 838 } 839 fmt.Printf(" Total time: %s\n", result.TotalTime) 840 if result.LoadTime.Seconds() > 0 { 841 opsPerSec := float64(result.TotalOps) / result.LoadTime.Seconds() 842 mbPerSec := float64(result.TotalOpSize) / result.LoadTime.Seconds() / (1024 * 1024) 843 fmt.Printf(" Throughput: %.0f ops/sec, %.2f MB/s\n", opsPerSec, mbPerSec) 844 } 845 fmt.Printf("\n") 846 847 return nil 848} 849 850func displayInspectJSON(result *inspectResult) error { 851 data, _ := json.MarshalIndent(result, "", " ") 852 fmt.Println(string(data)) 853 return nil 854} 855 856func verifyCrypto(cmd *cobra.Command, path string, meta *storage.BundleMetadata, bundleNum int, verbose bool) (contentValid, compressedValid, metadataValid bool) { 857 ops, _ := storage.NewOperations(nil, verbose) 858 859 // Calculate actual hashes from file 860 compHash, compSize, contentHash, contentSize, err := ops.CalculateFileHashes(path) 861 if err != nil { 862 if verbose { 863 fmt.Fprintf(os.Stderr, " Hash calculation failed: %v\n", err) 864 } 865 return false, false, false 866 } 867 868 contentValid = true 869 compressedValid = true 870 metadataValid = true 871 872 // Verify against embedded metadata if available 873 if meta != nil { 874 // Check content hash (this is in the metadata) 875 if meta.ContentHash != "" && meta.ContentHash != contentHash { 876 contentValid = false 877 if verbose { 878 fmt.Fprintf(os.Stderr, " ✗ Content hash mismatch!\n") 879 fmt.Fprintf(os.Stderr, " Expected: %s\n", meta.ContentHash) 880 fmt.Fprintf(os.Stderr, " Actual: %s\n", contentHash) 881 } 882 } 883 884 if meta.OperationCount > 0 { 885 // We can't verify this without loading, so skip 886 metadataValid = true 887 } 888 889 // Note: We don't check compressed hash/size because they're not in metadata 890 // (The file IS the compressed data, so it's redundant) 891 892 if verbose { 893 fmt.Fprintf(os.Stderr, " Embedded metadata:\n") 894 fmt.Fprintf(os.Stderr, " Content hash: %s\n", meta.ContentHash[:16]+"...") 895 fmt.Fprintf(os.Stderr, " Operations: %d\n", meta.OperationCount) 896 fmt.Fprintf(os.Stderr, " DIDs: %d\n", meta.DIDCount) 897 } 898 } 899 900 // Also verify against repository index if bundle number is known 901 if bundleNum > 0 { 902 mgr, _, err := getManager(&ManagerOptions{Cmd: cmd}) 903 if err == nil { 904 defer mgr.Close() 905 906 ctx := context.Background() 907 vr, err := mgr.VerifyBundle(ctx, bundleNum) 908 if err == nil && vr != nil { 909 // Index verification 910 indexContentValid := vr.Valid 911 indexHashMatch := vr.HashMatch 912 913 if verbose { 914 fmt.Fprintf(os.Stderr, " Repository index:\n") 915 fmt.Fprintf(os.Stderr, " Content valid: %v\n", indexContentValid) 916 fmt.Fprintf(os.Stderr, " Hash match: %v\n", indexHashMatch) 917 } 918 919 contentValid = contentValid && indexContentValid 920 compressedValid = compressedValid && indexHashMatch 921 } 922 } 923 } 924 925 if verbose { 926 fmt.Fprintf(os.Stderr, " Calculated hashes:\n") 927 fmt.Fprintf(os.Stderr, " Content: %s (%s)\n", contentHash[:16]+"...", formatBytes(contentSize)) 928 fmt.Fprintf(os.Stderr, " Compressed: %s (%s)\n", compHash[:16]+"...", formatBytes(compSize)) 929 } 930 931 return contentValid, compressedValid, metadataValid 932} 933 934func resolveBundlePath(cmd *cobra.Command, input string) (path string, bundleNum int, err error) { 935 // Check if it's a file path 936 if strings.HasSuffix(input, ".zst") || strings.Contains(input, "/") || strings.Contains(input, "\\") { 937 absPath, err := filepath.Abs(input) 938 if err != nil { 939 return "", 0, err 940 } 941 942 // Try to extract bundle number from filename 943 base := filepath.Base(absPath) 944 fmt.Sscanf(base, "%d", &bundleNum) 945 946 return absPath, bundleNum, nil 947 } 948 949 // Try to parse as bundle number 950 if _, err := fmt.Sscanf(input, "%d", &bundleNum); err == nil { 951 // Load from repository 952 mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd}) 953 if err != nil { 954 return "", 0, err 955 } 956 defer mgr.Close() 957 958 path := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)) 959 if _, err := os.Stat(path); err != nil { 960 return "", 0, fmt.Errorf("bundle %d not found in repository", bundleNum) 961 } 962 963 return path, bundleNum, nil 964 } 965 966 return "", 0, fmt.Errorf("invalid input: must be bundle number or file path") 967} 968 969func formatOffsetArray(offsets []int64, maxShow int) string { 970 if len(offsets) == 0 { 971 return "[]" 972 } 973 974 if len(offsets) <= maxShow { 975 return fmt.Sprintf("%v", offsets) 976 } 977 978 // Show first maxShow elements 979 shown := make([]int64, maxShow) 980 copy(shown, offsets[:maxShow]) 981 return fmt.Sprintf("%v ... (%d more)", shown, len(offsets)-maxShow) 982}