[DEPRECATED] Go implementation of plcbundle

bundle metadata frame

+169 -117
+161 -105
cmd/plcbundle/commands/inspect.go
··· 19 19 // ============================================================================ 20 20 21 21 type DIDActivity struct { 22 - DID string 23 - Count int 22 + DID string `json:"did"` 23 + Count int `json:"count"` 24 24 } 25 25 26 26 type DomainCount struct { 27 - Domain string 28 - Count int 27 + Domain string `json:"domain"` 28 + Count int `json:"count"` 29 29 } 30 30 31 31 type EndpointCount struct { 32 - Endpoint string 33 - Count int 32 + Endpoint string `json:"endpoint"` 33 + Count int `json:"count"` 34 34 } 35 35 36 36 type TimeSlot struct { 37 - Time time.Time 38 - Count int 37 + Time time.Time `json:"time"` 38 + Count int `json:"count"` 39 39 } 40 40 41 41 type inspectOptions struct { ··· 51 51 52 52 type inspectResult struct { 53 53 // Metadata 54 - Metadata *storage.BundleMetadata 54 + Metadata *storage.BundleMetadata `json:"metadata,omitempty"` 55 55 56 56 // Basic stats 57 - FilePath string 58 - FileSize int64 59 - HasMetadataFrame bool 60 - HasFrameIndex bool 57 + FilePath string `json:"file_path"` 58 + FileSize int64 `json:"file_size"` 59 + HasMetadataFrame bool `json:"has_metadata_frame"` 60 + HasFrameIndex bool `json:"has_frame_index"` 61 61 62 62 // Operation analysis 63 - TotalOps int 64 - NullifiedOps int 65 - ActiveOps int 66 - UniqueDIDs int 67 - OperationTypes map[string]int 63 + TotalOps int `json:"total_ops"` 64 + NullifiedOps int `json:"nullified_ops"` 65 + ActiveOps int `json:"active_ops"` 66 + UniqueDIDs int `json:"unique_dids"` 67 + OperationTypes map[string]int `json:"operation_types"` 68 68 69 69 // DID patterns 70 - TopDIDs []DIDActivity 71 - SingleOpDIDs int 72 - MultiOpDIDs int 70 + TopDIDs []DIDActivity `json:"top_dids"` 71 + SingleOpDIDs int `json:"single_op_dids"` 72 + MultiOpDIDs int `json:"multi_op_dids"` 73 73 74 74 // Handle patterns 75 - TotalHandles int 76 - TopDomains []DomainCount 77 - InvalidHandles int 75 + TotalHandles int `json:"total_handles"` 76 + TopDomains []DomainCount `json:"top_domains"` 77 + InvalidHandles int `json:"invalid_handles"` 78 78 79 79 // Service patterns 80 - TotalServices int 81 - UniqueEndpoints int 82 - TopPDSEndpoints []EndpointCount 80 + TotalServices int `json:"total_services"` 81 + UniqueEndpoints int `json:"unique_endpoints"` 82 + TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"` 83 83 84 84 // Temporal 85 - TimeDistribution []TimeSlot 86 - AvgOpsPerMinute float64 85 + TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"` 86 + AvgOpsPerMinute float64 `json:"avg_ops_per_minute"` 87 87 88 88 // Size analysis 89 - AvgOpSize int 90 - MinOpSize int 91 - MaxOpSize int 92 - TotalOpSize int64 89 + AvgOpSize int `json:"avg_op_size"` 90 + MinOpSize int `json:"min_op_size"` 91 + MaxOpSize int `json:"max_op_size"` 92 + TotalOpSize int64 `json:"total_op_size"` 93 93 94 94 // Crypto verification 95 - ContentHashValid bool 96 - CompressedHashValid bool 97 - MetadataValid bool 95 + ContentHashValid bool `json:"content_hash_valid"` 96 + CompressedHashValid bool `json:"compressed_hash_valid"` 97 + MetadataValid bool `json:"metadata_valid"` 98 98 99 99 // Timing 100 - LoadTime time.Duration 101 - AnalyzeTime time.Duration 102 - VerifyTime time.Duration 103 - TotalTime time.Duration 100 + LoadTime time.Duration `json:"load_time"` 101 + AnalyzeTime time.Duration `json:"analyze_time"` 102 + VerifyTime time.Duration `json:"verify_time"` 103 + TotalTime time.Duration `json:"total_time"` 104 104 } 105 105 106 106 type bundleAnalysis struct { 107 - TotalOps int 108 - NullifiedOps int 109 - ActiveOps int 110 - UniqueDIDs int 111 - OperationTypes map[string]int 112 - SingleOpDIDs int 113 - MultiOpDIDs int 114 - TotalHandles int 115 - InvalidHandles int 116 - TotalServices int 117 - UniqueEndpoints int 118 - AvgOpsPerMinute float64 119 - AvgOpSize int 120 - MinOpSize int 121 - MaxOpSize int 122 - TotalOpSize int64 107 + TotalOps int `json:"total_ops"` 108 + NullifiedOps int `json:"nullified_ops"` 109 + ActiveOps int `json:"active_ops"` 110 + UniqueDIDs int `json:"unique_dids"` 111 + OperationTypes map[string]int `json:"operation_types"` 112 + SingleOpDIDs int `json:"single_op_dids"` 113 + MultiOpDIDs int `json:"multi_op_dids"` 114 + TotalHandles int `json:"total_handles"` 115 + InvalidHandles int `json:"invalid_handles"` 116 + TotalServices int `json:"total_services"` 117 + UniqueEndpoints int `json:"unique_endpoints"` 118 + AvgOpsPerMinute float64 `json:"avg_ops_per_minute"` 119 + AvgOpSize int `json:"avg_op_size"` 120 + MinOpSize int `json:"min_op_size"` 121 + MaxOpSize int `json:"max_op_size"` 122 + TotalOpSize int64 `json:"total_op_size"` 123 123 124 - // For top-N calculations 124 + // For top-N calculations (unexported, won't appear in JSON) 125 125 didActivity map[string]int 126 126 domainCounts map[string]int 127 127 endpointCounts map[string]int 128 - timeSlots map[int64]int 128 + 129 + // For time calculations 130 + timeSlots map[int64]int 129 131 130 132 // Results 131 - TopDIDs []DIDActivity 132 - TopDomains []DomainCount 133 - TopPDSEndpoints []EndpointCount 134 - TimeDistribution []TimeSlot 133 + TopDIDs []DIDActivity `json:"top_dids"` 134 + TopDomains []DomainCount `json:"top_domains"` 135 + TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"` 136 + TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"` 137 + } 138 + 139 + type TimeDistributionSummary struct { 140 + EarliestOp time.Time `json:"earliest_op"` 141 + LatestOp time.Time `json:"latest_op"` 142 + TimeSpan string `json:"time_span"` 143 + PeakHour time.Time `json:"peak_hour"` 144 + PeakHourOps int `json:"peak_hour_ops"` 145 + TotalHours int `json:"total_hours"` 135 146 } 136 147 137 148 // ============================================================================ ··· 433 444 analysis.UniqueEndpoints = len(analysis.endpointCounts) 434 445 435 446 // Time distribution 436 - analysis.TimeDistribution = getTimeDistribution(analysis.timeSlots) 447 + analysis.TimeDistribution = calculateTimeDistributionSummary(analysis.timeSlots) 437 448 438 449 // Calculate ops per minute 439 450 if len(operations) > 1 { ··· 549 560 return results 550 561 } 551 562 552 - func getTimeDistribution(timeSlots map[int64]int) []TimeSlot { 553 - var results []TimeSlot 554 - for slot, count := range timeSlots { 555 - results = append(results, TimeSlot{ 556 - Time: time.Unix(slot*60, 0), 557 - Count: count, 558 - }) 563 + func calculateTimeDistributionSummary(timeSlots map[int64]int) *TimeDistributionSummary { 564 + if len(timeSlots) == 0 { 565 + return nil 566 + } 567 + 568 + var earliest, latest int64 569 + var peakHour int64 570 + var peakCount int 571 + 572 + // Group by hour and find stats 573 + hourlySlots := make(map[int64]int) 574 + 575 + for ts, count := range timeSlots { 576 + // Track earliest/latest 577 + if earliest == 0 || ts < earliest { 578 + earliest = ts 579 + } 580 + if ts > latest { 581 + latest = ts 582 + } 583 + 584 + // Group by hour 585 + hour := (ts / 3600) * 3600 // Truncate to hour 586 + hourlySlots[hour] += count 587 + } 588 + 589 + // Find peak hour 590 + for hour, count := range hourlySlots { 591 + if count > peakCount { 592 + peakCount = count 593 + peakHour = hour 594 + } 559 595 } 560 596 561 - sort.Slice(results, func(i, j int) bool { 562 - return results[i].Time.Before(results[j].Time) 563 - }) 597 + // Calculate time span 598 + duration := time.Unix(latest, 0).Sub(time.Unix(earliest, 0)) 599 + timeSpan := formatDuration(duration) 600 + 601 + // Calculate total hours covered 602 + totalHours := len(hourlySlots) 564 603 565 - return results 604 + return &TimeDistributionSummary{ 605 + EarliestOp: time.Unix(earliest, 0).UTC(), 606 + LatestOp: time.Unix(latest, 0).UTC(), 607 + TimeSpan: timeSpan, 608 + PeakHour: time.Unix(peakHour, 0).UTC(), 609 + PeakHourOps: peakCount, 610 + TotalHours: totalHours, 611 + } 566 612 } 567 613 568 614 // ============================================================================ ··· 588 634 meta := result.Metadata 589 635 fmt.Printf("📋 Embedded Metadata (Skippable Frame)\n") 590 636 fmt.Printf("──────────────────────────────────────\n") 591 - fmt.Printf(" Format: %s (v%d)\n", meta.Format, meta.Version) 592 - if meta.SpecURL != "" { 593 - fmt.Printf(" Specification: %s\n", meta.SpecURL) 594 - } 637 + fmt.Printf(" Format: %s\n", meta.Format) 638 + fmt.Printf(" Origin: %s\n", meta.Origin) 595 639 fmt.Printf(" Bundle Number: %06d\n", meta.BundleNumber) 596 - if meta.Origin != "" { 597 - fmt.Printf(" Origin: %s\n", meta.Origin) 598 - } 599 640 if meta.CreatedBy != "" { 600 641 fmt.Printf(" Created by: %s\n", meta.CreatedBy) 601 642 } ··· 621 662 } 622 663 623 664 if len(meta.FrameOffsets) > 0 { 624 - fmt.Printf("\n Frame Index: %d offsets (embedded)\n", len(meta.FrameOffsets)) 625 - firstDataOffset := meta.FrameOffsets[0] 626 - fmt.Printf(" Metadata size: %s\n", formatBytes(firstDataOffset)) 627 - fmt.Printf(" First data frame: offset %d\n", firstDataOffset) 665 + // Calculate metadata size (size of the metadata frame itself) 666 + metadataSize := int64(0) 667 + if result.HasMetadataFrame { 668 + // Metadata is at the end of file, after all data frames 669 + // Size = file size - last frame offset 670 + if len(meta.FrameOffsets) > 0 { 671 + lastFrameOffset := meta.FrameOffsets[len(meta.FrameOffsets)-1] 672 + metadataSize = result.FileSize - lastFrameOffset 673 + } 674 + } 675 + 676 + // Print with fixes 677 + fmt.Printf(" Ops Frame Index: %d offsets (embedded)\n", len(meta.FrameOffsets)) 678 + fmt.Printf(" Metadata size: %s\n", formatBytes(metadataSize)) 679 + fmt.Printf(" Frame offsets: %v\n", formatOffsetArray(meta.FrameOffsets, 5)) // Show first 5 628 680 } 629 681 fmt.Printf("\n") 630 682 } ··· 733 785 } 734 786 735 787 // Temporal analysis 736 - fmt.Printf("⏱️ Temporal Distribution\n") 788 + fmt.Printf("⏱️ Time Distribution\n") 737 789 fmt.Printf("───────────────────────\n") 738 - if len(result.TimeDistribution) > 0 { 739 - first := result.TimeDistribution[0] 740 - last := result.TimeDistribution[len(result.TimeDistribution)-1] 741 - duration := last.Time.Sub(first.Time) 742 - 743 - fmt.Printf(" Start: %s\n", first.Time.Format("2006-01-02 15:04:05")) 744 - fmt.Printf(" End: %s\n", last.Time.Format("2006-01-02 15:04:05")) 745 - fmt.Printf(" Duration: %s\n", formatDuration(duration)) 790 + if result.TimeDistribution != nil { 791 + td := result.TimeDistribution 792 + fmt.Printf(" Earliest operation: %s\n", td.EarliestOp.Format(time.RFC3339)) 793 + fmt.Printf(" Latest operation: %s\n", td.LatestOp.Format(time.RFC3339)) 794 + fmt.Printf(" Time span: %s\n", td.TimeSpan) 795 + fmt.Printf(" Peak hour: %s (%d ops)\n", 796 + td.PeakHour.Format("2006-01-02 15:04"), td.PeakHourOps) 797 + fmt.Printf(" Total active hours: %d\n", td.TotalHours) 746 798 fmt.Printf(" Avg ops/minute: %.1f\n", result.AvgOpsPerMinute) 747 - fmt.Printf(" Time slots: %d minutes\n", len(result.TimeDistribution)) 748 - 749 - // Find peak activity 750 - maxSlot := result.TimeDistribution[0] 751 - for _, slot := range result.TimeDistribution { 752 - if slot.Count > maxSlot.Count { 753 - maxSlot = slot 754 - } 755 - } 756 - fmt.Printf(" Peak activity: %d ops at %s\n", 757 - maxSlot.Count, maxSlot.Time.Format("15:04")) 758 799 } 759 800 fmt.Printf("\n") 760 801 ··· 922 963 923 964 return "", 0, fmt.Errorf("invalid input: must be bundle number or file path") 924 965 } 966 + 967 + func formatOffsetArray(offsets []int64, maxShow int) string { 968 + if len(offsets) == 0 { 969 + return "[]" 970 + } 971 + 972 + if len(offsets) <= maxShow { 973 + return fmt.Sprintf("%v", offsets) 974 + } 975 + 976 + // Show first maxShow elements 977 + shown := make([]int64, maxShow) 978 + copy(shown, offsets[:maxShow]) 979 + return fmt.Sprintf("%v ... (%d more)", shown, len(offsets)-maxShow) 980 + }
+8 -12
internal/storage/storage.go
··· 23 23 // BundleMetadata - Self-describing bundle (content-focused, not container) 24 24 type BundleMetadata struct { 25 25 // === Format Info === 26 - Version int `json:"version"` // Metadata schema version (1) 27 - Format string `json:"format"` // "plcbundle-v1" 28 - SpecURL string `json:"spec_url"` // "https://github.com/atscan-net/plcbundle" 26 + Format string `json:"format"` // "plcbundle-v1" 29 27 30 28 // === Bundle Identity === 29 + Origin string `json:"origin"` // Source PLC directory URL 31 30 BundleNumber int `json:"bundle_number"` // Sequential bundle number 32 - Origin string `json:"origin"` // Source PLC directory URL 33 - 34 - // === Creation Provenance === 35 - CreatedAt time.Time `json:"created_at"` // When bundle was created 36 - CreatedBy string `json:"created_by"` // "plcbundle/v1.2.3" 37 - CreatedByHost string `json:"created_by_host,omitempty"` // Optional: hostname that created it 38 31 39 32 // === Content Integrity === 40 33 ContentHash string `json:"content_hash"` // SHA256 of uncompressed JSONL content ··· 54 47 // === Optional Context === 55 48 Cursor string `json:"cursor,omitempty"` // PLC export cursor for this bundle 56 49 Notes string `json:"notes,omitempty"` // Optional description 50 + 51 + // === Creation Provenance === 52 + CreatedAt time.Time `json:"created_at"` // When bundle was created 53 + CreatedBy string `json:"created_by"` // "plcbundle/v1.2.3" 54 + CreatedByHost string `json:"created_by_host,omitempty"` // Optional: hostname that created it 57 55 } 58 56 59 57 // Operations handles low-level bundle file operations ··· 180 178 181 179 // 4. ✅ Build metadata with RELATIVE offsets 182 180 metadata := &BundleMetadata{ 183 - Version: MetadataFormatVersion, 184 - Format: "plcbundle-v1", 185 - SpecURL: "https://github.com/atscan-net/plcbundle", 181 + Format: fmt.Sprintf("plcbundle-v%d", MetadataFormatVersion), 186 182 BundleNumber: bundleInfo.BundleNumber, 187 183 Origin: bundleInfo.Origin, 188 184 CreatedAt: time.Now().UTC(),