···6767 }
68686969 var operation plc.PLCOperation
7070- // Use sonic instead of json.Unmarshal
7171- if err := json.Unmarshal(line, &operation); err != nil {
7070+ if err := json.UnmarshalNoEscape(line, &operation); err != nil {
7271 return nil, fmt.Errorf("failed to parse line: %w", err)
7372 }
7473
+8-1
cmd/plcbundle/detector.go
···545545546546// detectOperation runs all detectors on an operation and returns labels + confidence
547547func detectOperation(ctx context.Context, detectors []detector.Detector, op plc.PLCOperation, minConfidence float64) ([]string, float64) {
548548+ // Parse Operation ONCE before running detectors
549549+ opData, err := op.GetOperationData()
550550+ if err != nil {
551551+ return nil, 0
552552+ }
553553+ op.ParsedOperation = opData // Set for detectors to use
554554+548555 var matchedLabels []string
549556 var maxConfidence float64
550557551558 for _, det := range detectors {
552552- match, err := det.Detect(ctx, op)
559559+ match, err := det.Detect(ctx, op) // ← op now has ParsedOperation set
553560 if err != nil || match == nil || match.Confidence < minConfidence {
554561 continue
555562 }
+25-10
plc/types.go
···8899// PLCOperation represents a single operation from the PLC directory
1010type PLCOperation struct {
1111- DID string `json:"did"`
1212- //Operation map[string]interface{} `json:"operation"`
1313- Operation json.RawMessage `json:"operation"`
1111+ DID string `json:"did"`
1212+ Operation json.RawMessage `json:"operation"` // Lazy
1413 CID string `json:"cid"`
1514 Nullified interface{} `json:"nullified,omitempty"`
1615 CreatedAt time.Time `json:"createdAt"`
17161818- // RawJSON stores the original JSON bytes for exact reproduction
1919- RawJSON []byte `json:"-"`
1717+ RawJSON []byte `json:"-"`
1818+ ParsedOperation map[string]interface{} `json:"-"` // Pre-parsed cache
2019}
21202221// IsNullified checks if this operation has been nullified
···9291 Endpoint string
9392}
94939595-// GetOperationMap parses Operation RawMessage into a map
9696-func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) {
9494+// GetOperationData parses Operation into map (with caching)
9595+func (op *PLCOperation) GetOperationData() (map[string]interface{}, error) {
9696+ // Return cached if already parsed
9797+ if op.ParsedOperation != nil {
9898+ return op.ParsedOperation, nil
9999+ }
100100+101101+ // Parse on first call
97102 if len(op.Operation) == 0 {
98103 return nil, nil
99104 }
100100- var result map[string]interface{}
101101- if err := json.Unmarshal(op.Operation, &result); err != nil {
105105+106106+ var data map[string]interface{}
107107+ if err := json.UnmarshalNoEscape(op.Operation, &data); err != nil {
102108 return nil, err
103109 }
104104- return result, nil
110110+111111+ // Cache it
112112+ op.ParsedOperation = data
113113+114114+ return data, nil
115115+}
116116+117117+// GetOperationMap is an alias for compatibility
118118+func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) {
119119+ return op.GetOperationData()
105120}
+10-5
scripts/benchmark-detector.go
···16161717// Minimal operation struct
1818type Operation struct {
1919- DID string `json:"did"`
2020- //Operation map[string]interface{} `json:"operation"`
2121- CID string `json:"cid"`
2222- Nullified interface{} `json:"nullified,omitempty"`
2323- CreatedAt time.Time `json:"createdAt"`
1919+ DID string `json:"did"`
2020+ Operation map[string]interface{} `json:"operation"`
2121+ CID string `json:"cid"`
2222+ Nullified interface{} `json:"nullified,omitempty"`
2323+ CreatedAt time.Time `json:"createdAt"`
24242525 // RawJSON stores the original JSON bytes for exact reproduction
2626 RawJSON []byte `json:"-"`
···32323333 if strings.HasPrefix(op.DID, "did:plc:aa") {
3434 labels = append(labels, "test")
3535+ }
3636+3737+ // Log operation.sig (like console.log in JavaScript)
3838+ if sig, ok := op.Operation["sig"]; ok {
3939+ fmt.Fprintf(os.Stderr, "%v\n", sig)
3540 }
36413742 return labels
+12-8
scripts/benchmark-detector.js
···44function detect({ op }) {
55 const labels = [];
6677- if (op.did.match(/^did:plc:aa/)) {
77+ if (op.did.startsWith('did:plc:aa')) {
88 labels.push('test')
99 }
1010+1111+ console.log(op.operation.sig)
10121113 return labels;
1214}
13151416// ==========================================
1515-// Pure Bun bundle processor
1717+// Pure Bun bundle processor with native zstd
1618// ==========================================
1717-1818-import { spawn } from 'bun';
1919-import { readdir } from 'fs/promises';
20192120const BUNDLE_DIR = process.argv[2] || './';
2221const START_BUNDLE = parseInt(process.argv[3]) || 1;
···3938 const bundleFile = `${BUNDLE_DIR}/${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
40394140 try {
4242- // Decompress bundle using zstd command
4343- const proc = spawn(['zstd', '-d', '-c', bundleFile]);
4444- const text = await new Response(proc.stdout).text();
4141+ // Read compressed bundle
4242+ const compressed = await Bun.file(bundleFile).arrayBuffer();
4343+4444+ // Decompress using native Bun zstd (FAST!)
4545+ const decompressed = Bun.zstdDecompressSync(compressed);
4646+4747+ // Convert to text
4848+ const text = new TextDecoder().decode(decompressed);
45494650 const lines = text.split('\n').filter(line => line.trim());
4751