[DEPRECATED] Go implementation of plcbundle
1package commands
2
3import (
4 "context"
5 "fmt"
6 "os"
7 "path/filepath"
8 "sort"
9 "strings"
10 "time"
11
12 "github.com/goccy/go-json"
13 "github.com/spf13/cobra"
14 "tangled.org/atscan.net/plcbundle/internal/storage"
15)
16
17// ============================================================================
18// TYPES (defined at package level to avoid conflicts)
19// ============================================================================
20
21type DIDActivity struct {
22 DID string `json:"did"`
23 Count int `json:"count"`
24}
25
26type DomainCount struct {
27 Domain string `json:"domain"`
28 Count int `json:"count"`
29}
30
31type EndpointCount struct {
32 Endpoint string `json:"endpoint"`
33 Count int `json:"count"`
34}
35
36type TimeSlot struct {
37 Time time.Time `json:"time"`
38 Count int `json:"count"`
39}
40
41type inspectOptions struct {
42 showJSON bool
43 verify bool
44 showSamples bool
45 sampleCount int
46 skipMetadata bool
47 skipPatterns bool
48 skipCrypto bool
49 verbose bool
50}
51
52type inspectResult struct {
53 // Metadata
54 Metadata *storage.BundleMetadata `json:"metadata,omitempty"`
55
56 // Basic stats
57 FilePath string `json:"file_path"`
58 FileSize int64 `json:"file_size"`
59 HasMetadataFrame bool `json:"has_metadata_frame"`
60 HasFrameIndex bool `json:"has_frame_index"`
61
62 // Operation analysis
63 TotalOps int `json:"total_ops"`
64 NullifiedOps int `json:"nullified_ops"`
65 ActiveOps int `json:"active_ops"`
66 UniqueDIDs int `json:"unique_dids"`
67 OperationTypes map[string]int `json:"operation_types"`
68
69 // DID patterns
70 TopDIDs []DIDActivity `json:"top_dids"`
71 SingleOpDIDs int `json:"single_op_dids"`
72 MultiOpDIDs int `json:"multi_op_dids"`
73
74 // Handle patterns
75 TotalHandles int `json:"total_handles"`
76 TopDomains []DomainCount `json:"top_domains"`
77 InvalidHandles int `json:"invalid_handles"`
78
79 // Service patterns
80 TotalServices int `json:"total_services"`
81 UniqueEndpoints int `json:"unique_endpoints"`
82 TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"`
83
84 // Temporal
85 TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"`
86 AvgOpsPerMinute float64 `json:"avg_ops_per_minute"`
87
88 // Size analysis
89 AvgOpSize int `json:"avg_op_size"`
90 MinOpSize int `json:"min_op_size"`
91 MaxOpSize int `json:"max_op_size"`
92 TotalOpSize int64 `json:"total_op_size"`
93
94 // Crypto verification
95 ContentHashValid bool `json:"content_hash_valid"`
96 CompressedHashValid bool `json:"compressed_hash_valid"`
97 MetadataValid bool `json:"metadata_valid"`
98
99 // Timing
100 LoadTime time.Duration `json:"load_time"`
101 AnalyzeTime time.Duration `json:"analyze_time"`
102 VerifyTime time.Duration `json:"verify_time"`
103 TotalTime time.Duration `json:"total_time"`
104}
105
106type bundleAnalysis struct {
107 TotalOps int `json:"total_ops"`
108 NullifiedOps int `json:"nullified_ops"`
109 ActiveOps int `json:"active_ops"`
110 UniqueDIDs int `json:"unique_dids"`
111 OperationTypes map[string]int `json:"operation_types"`
112 SingleOpDIDs int `json:"single_op_dids"`
113 MultiOpDIDs int `json:"multi_op_dids"`
114 TotalHandles int `json:"total_handles"`
115 InvalidHandles int `json:"invalid_handles"`
116 TotalServices int `json:"total_services"`
117 UniqueEndpoints int `json:"unique_endpoints"`
118 AvgOpsPerMinute float64 `json:"avg_ops_per_minute"`
119 AvgOpSize int `json:"avg_op_size"`
120 MinOpSize int `json:"min_op_size"`
121 MaxOpSize int `json:"max_op_size"`
122 TotalOpSize int64 `json:"total_op_size"`
123
124 // For top-N calculations (unexported, won't appear in JSON)
125 didActivity map[string]int
126 domainCounts map[string]int
127 endpointCounts map[string]int
128
129 // For time calculations
130 timeSlots map[int64]int
131
132 // Results
133 TopDIDs []DIDActivity `json:"top_dids"`
134 TopDomains []DomainCount `json:"top_domains"`
135 TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"`
136 TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"`
137}
138
139type TimeDistributionSummary struct {
140 EarliestOp time.Time `json:"earliest_op"`
141 LatestOp time.Time `json:"latest_op"`
142 TimeSpan string `json:"time_span"`
143 PeakHour time.Time `json:"peak_hour"`
144 PeakHourOps int `json:"peak_hour_ops"`
145 TotalHours int `json:"total_hours"`
146}
147
148// ============================================================================
149// COMMAND DEFINITION
150// ============================================================================
151
152func NewInspectCommand() *cobra.Command {
153 var (
154 showJSON bool
155 verify bool
156 showSamples bool
157 sampleCount int
158 skipMetadata bool
159 skipPatterns bool
160 skipCrypto bool
161 )
162
163 cmd := &cobra.Command{
164 Use: "inspect <bundle-number|bundle-file>",
165 Short: "Deep analysis of bundle contents",
166 Long: `Deep analysis of bundle contents
167
168Performs comprehensive analysis of a bundle including:
169 • Embedded metadata (from skippable frame)
170 • Operation type breakdown
171 • DID activity patterns
172 • Handle and domain statistics
173 • Service endpoint analysis
174 • Temporal distribution
175 • Cryptographic verification
176 • Size analysis
177
178Can inspect either by bundle number (from repository) or direct file path.`,
179
180 Example: ` # Inspect from repository
181 plcbundle inspect 42
182
183 # Inspect specific file
184 plcbundle inspect /path/to/000042.jsonl.zst
185 plcbundle inspect 000042.jsonl.zst
186
187 # Skip certain analysis sections
188 plcbundle inspect 42 --skip-patterns --skip-crypto
189
190 # Show sample operations
191 plcbundle inspect 42 --samples --sample-count 20
192
193 # Verify all hashes
194 plcbundle inspect 42 --verify
195
196 # JSON output (for scripting)
197 plcbundle inspect 42 --json`,
198
199 Args: cobra.ExactArgs(1),
200
201 RunE: func(cmd *cobra.Command, args []string) error {
202 input := args[0]
203 verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
204
205 return runInspect(cmd, input, inspectOptions{
206 showJSON: showJSON,
207 verify: verify,
208 showSamples: showSamples,
209 sampleCount: sampleCount,
210 skipMetadata: skipMetadata,
211 skipPatterns: skipPatterns,
212 skipCrypto: skipCrypto,
213 verbose: verbose,
214 })
215 },
216 }
217
218 cmd.Flags().BoolVar(&showJSON, "json", false, "Output as JSON")
219 cmd.Flags().BoolVar(&verify, "verify", false, "Verify cryptographic hashes")
220 cmd.Flags().BoolVar(&showSamples, "samples", false, "Show sample operations")
221 cmd.Flags().IntVar(&sampleCount, "sample-count", 10, "Number of samples to show")
222 cmd.Flags().BoolVar(&skipMetadata, "skip-metadata", false, "Skip embedded metadata section")
223 cmd.Flags().BoolVar(&skipPatterns, "skip-patterns", false, "Skip pattern analysis")
224 cmd.Flags().BoolVar(&skipCrypto, "skip-crypto", false, "Skip cryptographic verification")
225
226 return cmd
227}
228
229// ============================================================================
230// MAIN LOGIC
231// ============================================================================
232
233func runInspect(cmd *cobra.Command, input string, opts inspectOptions) error {
234 totalStart := time.Now()
235
236 // Determine if input is bundle number or file path
237 bundlePath, bundleNum, err := resolveBundlePath(cmd, input)
238 if err != nil {
239 return err
240 }
241
242 result := &inspectResult{
243 FilePath: bundlePath,
244 OperationTypes: make(map[string]int),
245 TopDIDs: make([]DIDActivity, 0),
246 TopDomains: make([]DomainCount, 0),
247 TopPDSEndpoints: make([]EndpointCount, 0),
248 }
249
250 // Check file exists
251 info, err := os.Stat(bundlePath)
252 if err != nil {
253 return fmt.Errorf("bundle file not found: %w", err)
254 }
255 result.FileSize = info.Size()
256
257 // Check for frame index
258 ops, _ := storage.NewOperations(nil, opts.verbose)
259
260 if _, err := ops.ExtractBundleMetadata(bundlePath); err == nil {
261 result.HasFrameIndex = true // Has embedded index
262 } else {
263 // Check for external .idx file (legacy)
264 indexPath := bundlePath + ".idx"
265 if _, err := os.Stat(indexPath); err == nil {
266 result.HasFrameIndex = true
267 }
268 }
269
270 fmt.Fprintf(os.Stderr, "Inspecting: %s\n", filepath.Base(bundlePath))
271 fmt.Fprintf(os.Stderr, "File size: %s\n\n", formatBytes(result.FileSize))
272
273 // SECTION 1: Extract embedded metadata (fast!)
274 if !opts.skipMetadata {
275 fmt.Fprintf(os.Stderr, "Reading embedded metadata...\n")
276 metaStart := time.Now()
277
278 ops, _ := storage.NewOperations(nil, opts.verbose)
279
280 meta, err := ops.ExtractBundleMetadata(bundlePath)
281 if err != nil {
282 if opts.verbose {
283 fmt.Fprintf(os.Stderr, " No embedded metadata: %v\n", err)
284 }
285 result.HasMetadataFrame = false
286 } else {
287 result.HasMetadataFrame = true
288 result.Metadata = meta
289 if opts.verbose {
290 fmt.Fprintf(os.Stderr, " ✓ Extracted in %s\n", time.Since(metaStart))
291 }
292 }
293 fmt.Fprintf(os.Stderr, "\n")
294 }
295
296 // SECTION 2: Load and analyze operations
297 fmt.Fprintf(os.Stderr, "Loading and analyzing operations...\n")
298 loadStart := time.Now()
299
300 analysis, err := analyzeBundle(bundlePath, opts)
301 if err != nil {
302 return fmt.Errorf("analysis failed: %w", err)
303 }
304
305 result.LoadTime = time.Since(loadStart)
306 result.TotalOps = analysis.TotalOps
307 result.NullifiedOps = analysis.NullifiedOps
308 result.ActiveOps = analysis.ActiveOps
309 result.UniqueDIDs = analysis.UniqueDIDs
310 result.OperationTypes = analysis.OperationTypes
311 result.TopDIDs = analysis.TopDIDs
312 result.SingleOpDIDs = analysis.SingleOpDIDs
313 result.MultiOpDIDs = analysis.MultiOpDIDs
314 result.TotalHandles = analysis.TotalHandles
315 result.TopDomains = analysis.TopDomains
316 result.InvalidHandles = analysis.InvalidHandles
317 result.TotalServices = analysis.TotalServices
318 result.UniqueEndpoints = analysis.UniqueEndpoints
319 result.TopPDSEndpoints = analysis.TopPDSEndpoints
320 result.TimeDistribution = analysis.TimeDistribution
321 result.AvgOpsPerMinute = analysis.AvgOpsPerMinute
322 result.AvgOpSize = analysis.AvgOpSize
323 result.MinOpSize = analysis.MinOpSize
324 result.MaxOpSize = analysis.MaxOpSize
325 result.TotalOpSize = analysis.TotalOpSize
326
327 fmt.Fprintf(os.Stderr, " ✓ Analyzed in %s\n\n", result.LoadTime)
328
329 // SECTION 3: Cryptographic verification
330 if opts.verify && !opts.skipCrypto {
331 fmt.Fprintf(os.Stderr, "Verifying cryptographic hashes...\n")
332 verifyStart := time.Now()
333
334 // Pass cmd parameter
335 result.ContentHashValid, result.CompressedHashValid, result.MetadataValid =
336 verifyCrypto(cmd, bundlePath, result.Metadata, bundleNum, opts.verbose)
337
338 result.VerifyTime = time.Since(verifyStart)
339 fmt.Fprintf(os.Stderr, " ✓ Verified in %s\n\n", result.VerifyTime)
340 }
341
342 result.TotalTime = time.Since(totalStart)
343
344 // Display results
345 if opts.showJSON {
346 return displayInspectJSON(result)
347 }
348
349 return displayInspectHuman(result, analysis, opts)
350}
351
352// ============================================================================
353// ANALYSIS FUNCTIONS
354// ============================================================================
355
356func analyzeBundle(path string, opts inspectOptions) (*bundleAnalysis, error) {
357 ops, _ := storage.NewOperations(nil, opts.verbose)
358 operations, err := ops.LoadBundle(path)
359 if err != nil {
360 return nil, err
361 }
362
363 analysis := &bundleAnalysis{
364 TotalOps: len(operations),
365 OperationTypes: make(map[string]int),
366 didActivity: make(map[string]int),
367 domainCounts: make(map[string]int),
368 endpointCounts: make(map[string]int),
369 timeSlots: make(map[int64]int),
370 }
371
372 // Analyze each operation
373 for _, op := range operations {
374 // Nullification
375 if op.IsNullified() {
376 analysis.NullifiedOps++
377 } else {
378 analysis.ActiveOps++
379 }
380
381 // DID activity
382 analysis.didActivity[op.DID]++
383
384 // Size stats
385 opSize := len(op.RawJSON)
386 if opSize == 0 {
387 data, _ := json.Marshal(op)
388 opSize = len(data)
389 }
390
391 analysis.TotalOpSize += int64(opSize)
392 if analysis.MinOpSize == 0 || opSize < analysis.MinOpSize {
393 analysis.MinOpSize = opSize
394 }
395 if opSize > analysis.MaxOpSize {
396 analysis.MaxOpSize = opSize
397 }
398
399 // Parse operation for detailed analysis
400 opData, err := op.GetOperationData()
401 if err != nil || opData == nil {
402 continue
403 }
404
405 // Operation type
406 if opType, ok := opData["type"].(string); ok {
407 analysis.OperationTypes[opType]++
408 }
409
410 // Handle analysis
411 if !opts.skipPatterns {
412 analyzeHandles(opData, analysis)
413 analyzeServices(opData, analysis)
414 }
415
416 // Time distribution (group by minute)
417 timeSlot := op.CreatedAt.Unix() / 60
418 analysis.timeSlots[timeSlot]++
419 }
420
421 // Calculate derived stats
422 analysis.UniqueDIDs = len(analysis.didActivity)
423 if analysis.TotalOps > 0 {
424 analysis.AvgOpSize = int(analysis.TotalOpSize / int64(analysis.TotalOps))
425 }
426
427 // Count single vs multi-op DIDs
428 for _, count := range analysis.didActivity {
429 if count == 1 {
430 analysis.SingleOpDIDs++
431 } else {
432 analysis.MultiOpDIDs++
433 }
434 }
435
436 // Top DIDs
437 analysis.TopDIDs = getTopDIDs(analysis.didActivity, 10)
438
439 // Top domains
440 analysis.TopDomains = getTopDomains(analysis.domainCounts, 10)
441
442 // Top endpoints
443 analysis.TopPDSEndpoints = getTopEndpoints(analysis.endpointCounts, 10)
444
445 // Unique endpoints
446 analysis.UniqueEndpoints = len(analysis.endpointCounts)
447
448 // Time distribution
449 analysis.TimeDistribution = calculateTimeDistributionSummary(analysis.timeSlots)
450
451 // Calculate ops per minute
452 if len(operations) > 1 {
453 duration := operations[len(operations)-1].CreatedAt.Sub(operations[0].CreatedAt)
454 if duration.Minutes() > 0 {
455 analysis.AvgOpsPerMinute = float64(len(operations)) / duration.Minutes()
456 }
457 }
458
459 return analysis, nil
460}
461
462func analyzeHandles(opData map[string]interface{}, analysis *bundleAnalysis) {
463 if aka, ok := opData["alsoKnownAs"].([]interface{}); ok {
464 for _, a := range aka {
465 if akaStr, ok := a.(string); ok {
466 if strings.HasPrefix(akaStr, "at://") {
467 analysis.TotalHandles++
468
469 // Extract domain
470 handle := strings.TrimPrefix(akaStr, "at://")
471 if idx := strings.Index(handle, "/"); idx > 0 {
472 handle = handle[:idx]
473 }
474
475 // Count domain (TLD)
476 parts := strings.Split(handle, ".")
477 if len(parts) >= 2 {
478 domain := parts[len(parts)-1]
479 if len(parts) >= 2 {
480 domain = parts[len(parts)-2] + "." + domain
481 }
482 analysis.domainCounts[domain]++
483 }
484
485 // Check for invalid patterns
486 if strings.Contains(handle, "_") {
487 analysis.InvalidHandles++
488 }
489 }
490 }
491 }
492 }
493}
494
495func analyzeServices(opData map[string]interface{}, analysis *bundleAnalysis) {
496 if services, ok := opData["services"].(map[string]interface{}); ok {
497 analysis.TotalServices += len(services)
498
499 // Extract PDS endpoints
500 if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
501 if endpoint, ok := pds["endpoint"].(string); ok {
502 // Normalize endpoint
503 endpoint = strings.TrimPrefix(endpoint, "https://")
504 endpoint = strings.TrimPrefix(endpoint, "http://")
505 if idx := strings.Index(endpoint, "/"); idx > 0 {
506 endpoint = endpoint[:idx]
507 }
508 analysis.endpointCounts[endpoint]++
509 }
510 }
511 }
512}
513
514func getTopDIDs(didActivity map[string]int, limit int) []DIDActivity {
515 var results []DIDActivity
516 for did, count := range didActivity {
517 results = append(results, DIDActivity{DID: did, Count: count})
518 }
519
520 sort.Slice(results, func(i, j int) bool {
521 return results[i].Count > results[j].Count
522 })
523
524 if len(results) > limit {
525 results = results[:limit]
526 }
527
528 return results
529}
530
531func getTopDomains(domainCounts map[string]int, limit int) []DomainCount {
532 var results []DomainCount
533 for domain, count := range domainCounts {
534 results = append(results, DomainCount{Domain: domain, Count: count})
535 }
536
537 sort.Slice(results, func(i, j int) bool {
538 return results[i].Count > results[j].Count
539 })
540
541 if len(results) > limit {
542 results = results[:limit]
543 }
544
545 return results
546}
547
548func getTopEndpoints(endpointCounts map[string]int, limit int) []EndpointCount {
549 var results []EndpointCount
550 for endpoint, count := range endpointCounts {
551 results = append(results, EndpointCount{Endpoint: endpoint, Count: count})
552 }
553
554 sort.Slice(results, func(i, j int) bool {
555 return results[i].Count > results[j].Count
556 })
557
558 if len(results) > limit {
559 results = results[:limit]
560 }
561
562 return results
563}
564
565func calculateTimeDistributionSummary(timeSlots map[int64]int) *TimeDistributionSummary {
566 if len(timeSlots) == 0 {
567 return nil
568 }
569
570 var earliest, latest int64
571 var peakHour int64
572 var peakCount int
573
574 // Group by hour and find stats
575 hourlySlots := make(map[int64]int)
576
577 for ts, count := range timeSlots {
578 // Track earliest/latest
579 if earliest == 0 || ts < earliest {
580 earliest = ts
581 }
582 if ts > latest {
583 latest = ts
584 }
585
586 // Group by hour
587 hour := (ts / 3600) * 3600 // Truncate to hour
588 hourlySlots[hour] += count
589 }
590
591 // Find peak hour
592 for hour, count := range hourlySlots {
593 if count > peakCount {
594 peakCount = count
595 peakHour = hour
596 }
597 }
598
599 // Calculate time span
600 duration := time.Unix(latest, 0).Sub(time.Unix(earliest, 0))
601 timeSpan := formatDuration(duration)
602
603 // Calculate total hours covered
604 totalHours := len(hourlySlots)
605
606 return &TimeDistributionSummary{
607 EarliestOp: time.Unix(earliest, 0).UTC(),
608 LatestOp: time.Unix(latest, 0).UTC(),
609 TimeSpan: timeSpan,
610 PeakHour: time.Unix(peakHour, 0).UTC(),
611 PeakHourOps: peakCount,
612 TotalHours: totalHours,
613 }
614}
615
616// ============================================================================
617// DISPLAY FUNCTIONS
618// ============================================================================
619
620func displayInspectHuman(result *inspectResult, _ *bundleAnalysis, opts inspectOptions) error {
621 fmt.Printf("\n")
622 fmt.Printf("═══════════════════════════════════════════════════════════════\n")
623 fmt.Printf(" Bundle Deep Inspection\n")
624 fmt.Printf("═══════════════════════════════════════════════════════════════\n\n")
625
626 // File info
627 fmt.Printf("📁 File Information\n")
628 fmt.Printf("───────────────────\n")
629 fmt.Printf(" Path: %s\n", filepath.Base(result.FilePath))
630 fmt.Printf(" Size: %s\n", formatBytes(result.FileSize))
631 fmt.Printf(" Has metadata frame: %v\n", result.HasMetadataFrame)
632 fmt.Printf(" Has frame index: %v\n\n", result.HasFrameIndex)
633
634 // Embedded metadata
635 if result.HasMetadataFrame && result.Metadata != nil && !opts.skipMetadata {
636 meta := result.Metadata
637 fmt.Printf("📋 Embedded Metadata (Skippable Frame)\n")
638 fmt.Printf("──────────────────────────────────────\n")
639 fmt.Printf(" Format: %s\n", meta.Format)
640 fmt.Printf(" Origin: %s\n", meta.Origin)
641 fmt.Printf(" Bundle Number: %06d\n", meta.BundleNumber)
642 if meta.CreatedBy != "" {
643 fmt.Printf(" Created by: %s\n", meta.CreatedBy)
644 }
645 if meta.CreatedByHost != "" {
646 fmt.Printf(" Created on: %s\n", meta.CreatedByHost)
647 }
648 fmt.Printf(" Created at: %s\n", meta.CreatedAt.Format("2006-01-02 15:04:05 MST"))
649
650 fmt.Printf("\n Content:\n")
651 fmt.Printf(" Operations: %s\n", formatNumber(meta.OperationCount))
652 fmt.Printf(" Unique DIDs: %s\n", formatNumber(meta.DIDCount))
653 fmt.Printf(" Frames: %d × %d ops\n", meta.FrameCount, meta.FrameSize)
654 fmt.Printf(" Timespan: %s → %s\n",
655 meta.StartTime.Format("2006-01-02 15:04:05"),
656 meta.EndTime.Format("2006-01-02 15:04:05"))
657 fmt.Printf(" Duration: %s\n",
658 formatDuration(meta.EndTime.Sub(meta.StartTime)))
659
660 fmt.Printf("\n Integrity:\n")
661 fmt.Printf(" Content hash: %s\n", meta.ContentHash)
662 if meta.ParentHash != "" {
663 fmt.Printf(" Parent hash: %s\n", meta.ParentHash)
664 }
665
666 if len(meta.FrameOffsets) > 0 {
667 // Calculate metadata size (size of the metadata frame itself)
668 metadataSize := int64(0)
669 if result.HasMetadataFrame {
670 // Metadata is at the end of file, after all data frames
671 // Size = file size - last frame offset
672 if len(meta.FrameOffsets) > 0 {
673 lastFrameOffset := meta.FrameOffsets[len(meta.FrameOffsets)-1]
674 metadataSize = result.FileSize - lastFrameOffset
675 }
676 }
677
678 // Print with fixes
679 fmt.Printf(" Ops Frame Index: %d offsets (embedded)\n", len(meta.FrameOffsets))
680 fmt.Printf(" Metadata size: %s\n", formatBytes(metadataSize))
681 fmt.Printf(" Frame offsets: %v\n", formatOffsetArray(meta.FrameOffsets, 5)) // Show first 5
682 }
683 fmt.Printf("\n")
684 }
685
686 // Operations breakdown
687 fmt.Printf("📊 Operations Analysis\n")
688 fmt.Printf("──────────────────────\n")
689 fmt.Printf(" Total operations: %s\n", formatNumber(result.TotalOps))
690 fmt.Printf(" Active: %s (%.1f%%)\n",
691 formatNumber(result.ActiveOps),
692 float64(result.ActiveOps)/float64(result.TotalOps)*100)
693 if result.NullifiedOps > 0 {
694 fmt.Printf(" Nullified: %s (%.1f%%)\n",
695 formatNumber(result.NullifiedOps),
696 float64(result.NullifiedOps)/float64(result.TotalOps)*100)
697 }
698
699 if len(result.OperationTypes) > 0 {
700 fmt.Printf("\n Operation Types:\n")
701
702 // Sort by count
703 var types []struct {
704 name string
705 count int
706 }
707 for name, count := range result.OperationTypes {
708 types = append(types, struct {
709 name string
710 count int
711 }{name, count})
712 }
713 sort.Slice(types, func(i, j int) bool {
714 return types[i].count > types[j].count
715 })
716
717 for _, t := range types {
718 pct := float64(t.count) / float64(result.TotalOps) * 100
719 fmt.Printf(" %-25s %s (%.1f%%)\n", t.name, formatNumber(t.count), pct)
720 }
721 }
722 fmt.Printf("\n")
723
724 // DID patterns
725 fmt.Printf("👤 DID Activity Patterns\n")
726 fmt.Printf("────────────────────────\n")
727 fmt.Printf(" Unique DIDs: %s\n", formatNumber(result.UniqueDIDs))
728 fmt.Printf(" Single-op DIDs: %s (%.1f%%)\n",
729 formatNumber(result.SingleOpDIDs),
730 float64(result.SingleOpDIDs)/float64(result.UniqueDIDs)*100)
731 fmt.Printf(" Multi-op DIDs: %s (%.1f%%)\n",
732 formatNumber(result.MultiOpDIDs),
733 float64(result.MultiOpDIDs)/float64(result.UniqueDIDs)*100)
734
735 if len(result.TopDIDs) > 0 {
736 fmt.Printf("\n Most Active DIDs:\n")
737 for i, da := range result.TopDIDs {
738 if i >= 5 {
739 break
740 }
741 fmt.Printf(" %d. %s (%d ops)\n", i+1, da.DID, da.Count)
742 }
743 }
744 fmt.Printf("\n")
745
746 // Handle patterns
747 if !opts.skipPatterns && result.TotalHandles > 0 {
748 fmt.Printf("🏷️ Handle Statistics\n")
749 fmt.Printf("────────────────────\n")
750 fmt.Printf(" Total handles: %s\n", formatNumber(result.TotalHandles))
751 if result.InvalidHandles > 0 {
752 fmt.Printf(" Invalid patterns: %s (%.1f%%)\n",
753 formatNumber(result.InvalidHandles),
754 float64(result.InvalidHandles)/float64(result.TotalHandles)*100)
755 }
756
757 if len(result.TopDomains) > 0 {
758 fmt.Printf("\n Top Domains:\n")
759 for i, dc := range result.TopDomains {
760 if i >= 10 {
761 break
762 }
763 pct := float64(dc.Count) / float64(result.TotalHandles) * 100
764 fmt.Printf(" %-25s %s (%.1f%%)\n", dc.Domain, formatNumber(dc.Count), pct)
765 }
766 }
767 fmt.Printf("\n")
768 }
769
770 // Service patterns
771 if !opts.skipPatterns && result.TotalServices > 0 {
772 fmt.Printf("🌐 Service Endpoints\n")
773 fmt.Printf("────────────────────\n")
774 fmt.Printf(" Total services: %s\n", formatNumber(result.TotalServices))
775 fmt.Printf(" Unique endpoints: %s\n", formatNumber(result.UniqueEndpoints))
776
777 if len(result.TopPDSEndpoints) > 0 {
778 fmt.Printf("\n Top PDS Endpoints:\n")
779 for i, ec := range result.TopPDSEndpoints {
780 if i >= 10 {
781 break
782 }
783 fmt.Printf(" %-40s %s ops\n", ec.Endpoint, formatNumber(ec.Count))
784 }
785 }
786 fmt.Printf("\n")
787 }
788
789 // Temporal analysis
790 fmt.Printf("⏱️ Time Distribution\n")
791 fmt.Printf("───────────────────────\n")
792 if result.TimeDistribution != nil {
793 td := result.TimeDistribution
794 fmt.Printf(" Earliest operation: %s\n", td.EarliestOp.Format(time.RFC3339))
795 fmt.Printf(" Latest operation: %s\n", td.LatestOp.Format(time.RFC3339))
796 fmt.Printf(" Time span: %s\n", td.TimeSpan)
797 fmt.Printf(" Peak hour: %s (%d ops)\n",
798 td.PeakHour.Format("2006-01-02 15:04"), td.PeakHourOps)
799 fmt.Printf(" Total active hours: %d\n", td.TotalHours)
800 fmt.Printf(" Avg ops/minute: %.1f\n", result.AvgOpsPerMinute)
801 }
802 fmt.Printf("\n")
803
804 // Size analysis
805 fmt.Printf("📏 Size Analysis\n")
806 fmt.Printf("────────────────\n")
807 fmt.Printf(" Total data: %s\n", formatBytes(result.TotalOpSize))
808 fmt.Printf(" Average per op: %s\n", formatBytes(int64(result.AvgOpSize)))
809 fmt.Printf(" Min operation: %s\n", formatBytes(int64(result.MinOpSize)))
810 fmt.Printf(" Max operation: %s\n\n", formatBytes(int64(result.MaxOpSize)))
811
812 // Cryptographic verification
813 if opts.verify && !opts.skipCrypto {
814 fmt.Printf("🔐 Cryptographic Verification\n")
815 fmt.Printf("─────────────────────────────\n")
816
817 status := func(valid bool) string {
818 if valid {
819 return "✓ Valid"
820 }
821 return "✗ Invalid"
822 }
823
824 fmt.Printf(" Content hash: %s\n", status(result.ContentHashValid))
825 fmt.Printf(" Compressed hash: %s\n", status(result.CompressedHashValid))
826 if result.HasMetadataFrame {
827 fmt.Printf(" Metadata integrity: %s\n", status(result.MetadataValid))
828 }
829 fmt.Printf("\n")
830 }
831
832 // Performance summary
833 fmt.Printf("⚡ Performance\n")
834 fmt.Printf("──────────────\n")
835 fmt.Printf(" Load time: %s\n", result.LoadTime)
836 if opts.verify {
837 fmt.Printf(" Verify time: %s\n", result.VerifyTime)
838 }
839 fmt.Printf(" Total time: %s\n", result.TotalTime)
840 if result.LoadTime.Seconds() > 0 {
841 opsPerSec := float64(result.TotalOps) / result.LoadTime.Seconds()
842 mbPerSec := float64(result.TotalOpSize) / result.LoadTime.Seconds() / (1024 * 1024)
843 fmt.Printf(" Throughput: %.0f ops/sec, %.2f MB/s\n", opsPerSec, mbPerSec)
844 }
845 fmt.Printf("\n")
846
847 return nil
848}
849
850func displayInspectJSON(result *inspectResult) error {
851 data, _ := json.MarshalIndent(result, "", " ")
852 fmt.Println(string(data))
853 return nil
854}
855
856func verifyCrypto(cmd *cobra.Command, path string, meta *storage.BundleMetadata, bundleNum int, verbose bool) (contentValid, compressedValid, metadataValid bool) {
857 ops, _ := storage.NewOperations(nil, verbose)
858
859 // Calculate actual hashes from file
860 compHash, compSize, contentHash, contentSize, err := ops.CalculateFileHashes(path)
861 if err != nil {
862 if verbose {
863 fmt.Fprintf(os.Stderr, " Hash calculation failed: %v\n", err)
864 }
865 return false, false, false
866 }
867
868 contentValid = true
869 compressedValid = true
870 metadataValid = true
871
872 // Verify against embedded metadata if available
873 if meta != nil {
874 // Check content hash (this is in the metadata)
875 if meta.ContentHash != "" && meta.ContentHash != contentHash {
876 contentValid = false
877 if verbose {
878 fmt.Fprintf(os.Stderr, " ✗ Content hash mismatch!\n")
879 fmt.Fprintf(os.Stderr, " Expected: %s\n", meta.ContentHash)
880 fmt.Fprintf(os.Stderr, " Actual: %s\n", contentHash)
881 }
882 }
883
884 if meta.OperationCount > 0 {
885 // We can't verify this without loading, so skip
886 metadataValid = true
887 }
888
889 // Note: We don't check compressed hash/size because they're not in metadata
890 // (The file IS the compressed data, so it's redundant)
891
892 if verbose {
893 fmt.Fprintf(os.Stderr, " Embedded metadata:\n")
894 fmt.Fprintf(os.Stderr, " Content hash: %s\n", meta.ContentHash[:16]+"...")
895 fmt.Fprintf(os.Stderr, " Operations: %d\n", meta.OperationCount)
896 fmt.Fprintf(os.Stderr, " DIDs: %d\n", meta.DIDCount)
897 }
898 }
899
900 // Also verify against repository index if bundle number is known
901 if bundleNum > 0 {
902 mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
903 if err == nil {
904 defer mgr.Close()
905
906 ctx := context.Background()
907 vr, err := mgr.VerifyBundle(ctx, bundleNum)
908 if err == nil && vr != nil {
909 // Index verification
910 indexContentValid := vr.Valid
911 indexHashMatch := vr.HashMatch
912
913 if verbose {
914 fmt.Fprintf(os.Stderr, " Repository index:\n")
915 fmt.Fprintf(os.Stderr, " Content valid: %v\n", indexContentValid)
916 fmt.Fprintf(os.Stderr, " Hash match: %v\n", indexHashMatch)
917 }
918
919 contentValid = contentValid && indexContentValid
920 compressedValid = compressedValid && indexHashMatch
921 }
922 }
923 }
924
925 if verbose {
926 fmt.Fprintf(os.Stderr, " Calculated hashes:\n")
927 fmt.Fprintf(os.Stderr, " Content: %s (%s)\n", contentHash[:16]+"...", formatBytes(contentSize))
928 fmt.Fprintf(os.Stderr, " Compressed: %s (%s)\n", compHash[:16]+"...", formatBytes(compSize))
929 }
930
931 return contentValid, compressedValid, metadataValid
932}
933
934func resolveBundlePath(cmd *cobra.Command, input string) (path string, bundleNum int, err error) {
935 // Check if it's a file path
936 if strings.HasSuffix(input, ".zst") || strings.Contains(input, "/") || strings.Contains(input, "\\") {
937 absPath, err := filepath.Abs(input)
938 if err != nil {
939 return "", 0, err
940 }
941
942 // Try to extract bundle number from filename
943 base := filepath.Base(absPath)
944 fmt.Sscanf(base, "%d", &bundleNum)
945
946 return absPath, bundleNum, nil
947 }
948
949 // Try to parse as bundle number
950 if _, err := fmt.Sscanf(input, "%d", &bundleNum); err == nil {
951 // Load from repository
952 mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
953 if err != nil {
954 return "", 0, err
955 }
956 defer mgr.Close()
957
958 path := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum))
959 if _, err := os.Stat(path); err != nil {
960 return "", 0, fmt.Errorf("bundle %d not found in repository", bundleNum)
961 }
962
963 return path, bundleNum, nil
964 }
965
966 return "", 0, fmt.Errorf("invalid input: must be bundle number or file path")
967}
968
969func formatOffsetArray(offsets []int64, maxShow int) string {
970 if len(offsets) == 0 {
971 return "[]"
972 }
973
974 if len(offsets) <= maxShow {
975 return fmt.Sprintf("%v", offsets)
976 }
977
978 // Show first maxShow elements
979 shown := make([]int64, maxShow)
980 copy(shown, offsets[:maxShow])
981 return fmt.Sprintf("%v ... (%d more)", shown, len(offsets)-maxShow)
982}