tangled
alpha
login
or
join now
angrydutchman.peedee.es
/
plcbundle
forked from
atscan.net/plcbundle
0
fork
atom
A Transparent and Verifiable Way to Sync the AT Protocol's PLC Directory
0
fork
atom
overview
issues
pulls
pipelines
detectors experiments
tree.fail
4 months ago
40336cb2
d5d20592
+1565
-17
7 changed files
expand all
collapse all
unified
split
cmd
plcbundle
detector.go
main.go
progress.go
detector
builtin.go
detector.go
registry.go
runner.go
+629
cmd/plcbundle/detector.go
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
// cmd/plcbundle/detector.go
2
+
package main
3
+
4
+
import (
5
+
"bufio"
6
+
"context"
7
+
"encoding/json"
8
+
"flag"
9
+
"fmt"
10
+
"os"
11
+
"sort"
12
+
"strings"
13
+
"time"
14
+
15
+
"tangled.org/atscan.net/plcbundle/detector"
16
+
"tangled.org/atscan.net/plcbundle/plc"
17
+
)
18
+
19
+
type defaultLogger struct{}
20
+
21
+
func (d *defaultLogger) Printf(format string, v ...interface{}) {
22
+
fmt.Fprintf(os.Stderr, format+"\n", v...)
23
+
}
24
+
25
+
func cmdDetector() {
26
+
if len(os.Args) < 3 {
27
+
printDetectorUsage()
28
+
os.Exit(1)
29
+
}
30
+
31
+
subcommand := os.Args[2]
32
+
33
+
switch subcommand {
34
+
case "list":
35
+
cmdDetectorList()
36
+
case "test":
37
+
cmdDetectorTest()
38
+
case "run":
39
+
cmdDetectorRun()
40
+
case "filter": // ← Add this
41
+
cmdDetectorFilter()
42
+
case "info":
43
+
cmdDetectorInfo()
44
+
default:
45
+
fmt.Fprintf(os.Stderr, "Unknown detector subcommand: %s\n", subcommand)
46
+
printDetectorUsage()
47
+
os.Exit(1)
48
+
}
49
+
}
50
+
51
+
func printDetectorUsage() {
52
+
fmt.Printf(`Usage: plcbundle detector <command> [options]
53
+
54
+
Commands:
55
+
list List available detectors
56
+
test Test a detector on specific bundles
57
+
run Run detector and output CSV results
58
+
filter Filter JSONL operations from stdin
59
+
info Show detailed detector information
60
+
61
+
Examples:
62
+
plcbundle detector list
63
+
plcbundle detector test nostr --bundle 42
64
+
plcbundle detector run all --bundles 1-100 > results.csv
65
+
plcbundle backfill | plcbundle detector filter all > filtered.jsonl
66
+
plcbundle detector info nostr
67
+
`)
68
+
}
69
+
70
+
// cmdDetectorFilter reads JSONL from stdin, filters OUT spam, outputs clean operations
71
+
func cmdDetectorFilter() {
72
+
if len(os.Args) < 4 {
73
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector filter <detector1> [detector2...] [--confidence 0.9]\n")
74
+
fmt.Fprintf(os.Stderr, "\nFilters OUT operations that match detectors (outputs clean data)\n\n")
75
+
fmt.Fprintf(os.Stderr, "Examples:\n")
76
+
fmt.Fprintf(os.Stderr, " plcbundle backfill | plcbundle detector filter all > clean.jsonl\n")
77
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundle 1 | plcbundle detector filter invalid_handle > clean.jsonl\n")
78
+
os.Exit(1)
79
+
}
80
+
81
+
// Manually separate detector names from flags
82
+
var detectorNames []string
83
+
var flagArgs []string
84
+
85
+
for i := 3; i < len(os.Args); i++ {
86
+
arg := os.Args[i]
87
+
if strings.HasPrefix(arg, "-") {
88
+
flagArgs = os.Args[i:]
89
+
break
90
+
}
91
+
detectorNames = append(detectorNames, arg)
92
+
}
93
+
94
+
if len(detectorNames) == 0 {
95
+
fmt.Fprintf(os.Stderr, "Error: at least one detector name required\n")
96
+
os.Exit(1)
97
+
}
98
+
99
+
// Parse flags
100
+
fs := flag.NewFlagSet("detector filter", flag.ExitOnError)
101
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence")
102
+
fs.Parse(flagArgs)
103
+
104
+
// Setup registry
105
+
registry := detector.DefaultRegistry()
106
+
107
+
// Handle "all" keyword
108
+
if len(detectorNames) == 1 && detectorNames[0] == "all" {
109
+
detectorNames = registry.Names()
110
+
fmt.Fprintf(os.Stderr, "Using all detectors: %s\n", strings.Join(detectorNames, ", "))
111
+
}
112
+
113
+
// Get all detectors
114
+
detectors := make([]detector.Detector, 0, len(detectorNames))
115
+
for _, name := range detectorNames {
116
+
d, err := registry.Get(name)
117
+
if err != nil {
118
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
119
+
os.Exit(1)
120
+
}
121
+
detectors = append(detectors, d)
122
+
}
123
+
124
+
// Log to stderr
125
+
fmt.Fprintf(os.Stderr, "Filtering OUT spam with %d detector(s)\n", len(detectorNames))
126
+
if len(detectorNames) <= 5 {
127
+
fmt.Fprintf(os.Stderr, "Detectors: %s\n", strings.Join(detectorNames, ", "))
128
+
}
129
+
fmt.Fprintf(os.Stderr, "Min confidence: %.2f\n\n", *confidence)
130
+
131
+
ctx := context.Background()
132
+
scanner := bufio.NewScanner(os.Stdin)
133
+
134
+
// Set large buffer for long lines
135
+
buf := make([]byte, 0, 64*1024)
136
+
scanner.Buffer(buf, 1024*1024)
137
+
138
+
cleanCount := 0
139
+
filteredCount := 0
140
+
totalCount := 0
141
+
totalBytes := int64(0) // ← Add total bytes
142
+
filteredBytes := int64(0) // ← Add filtered bytes
143
+
144
+
// Read JSONL from stdin
145
+
for scanner.Scan() {
146
+
line := scanner.Bytes()
147
+
if len(line) == 0 {
148
+
continue
149
+
}
150
+
151
+
totalCount++
152
+
opSize := int64(len(line))
153
+
totalBytes += opSize // ← Track total
154
+
155
+
// Parse operation
156
+
var op plc.PLCOperation
157
+
if err := json.Unmarshal(line, &op); err != nil {
158
+
fmt.Fprintf(os.Stderr, "Warning: failed to parse line %d: %v\n", totalCount, err)
159
+
continue
160
+
}
161
+
162
+
// Run all detectors on this operation
163
+
isSpam := false
164
+
165
+
for _, det := range detectors {
166
+
match, err := det.Detect(ctx, op)
167
+
if err != nil {
168
+
continue
169
+
}
170
+
171
+
if match != nil && match.Confidence >= *confidence {
172
+
// Detected as spam - filter it out
173
+
isSpam = true
174
+
break
175
+
}
176
+
}
177
+
178
+
// Output only if NOT spam (clean operation)
179
+
if !isSpam {
180
+
cleanCount++
181
+
fmt.Println(string(line))
182
+
} else {
183
+
filteredCount++
184
+
filteredBytes += opSize // ← Track filtered bytes
185
+
}
186
+
187
+
// Progress to stderr
188
+
if totalCount%1000 == 0 {
189
+
fmt.Fprintf(os.Stderr, "Processed: %d | Clean: %d | Filtered: %d | Saved: %s\r",
190
+
totalCount, cleanCount, filteredCount, formatBytes(filteredBytes))
191
+
}
192
+
}
193
+
194
+
if err := scanner.Err(); err != nil {
195
+
fmt.Fprintf(os.Stderr, "\nError reading stdin: %v\n", err)
196
+
os.Exit(1)
197
+
}
198
+
199
+
// Final stats to stderr
200
+
fmt.Fprintf(os.Stderr, "\n\n")
201
+
fmt.Fprintf(os.Stderr, "✓ Filter complete\n")
202
+
fmt.Fprintf(os.Stderr, " Total operations: %d\n", totalCount)
203
+
fmt.Fprintf(os.Stderr, " Clean: %d (%.2f%%)\n", cleanCount, float64(cleanCount)/float64(totalCount)*100)
204
+
fmt.Fprintf(os.Stderr, " Filtered out: %d (%.2f%%)\n", filteredCount, float64(filteredCount)/float64(totalCount)*100)
205
+
fmt.Fprintf(os.Stderr, " Total size: %s\n", formatBytes(totalBytes))
206
+
fmt.Fprintf(os.Stderr, " Filtered size: %s (%.2f%%)\n", formatBytes(filteredBytes), float64(filteredBytes)/float64(totalBytes)*100)
207
+
fmt.Fprintf(os.Stderr, " Clean size: %s (%.2f%%)\n", formatBytes(totalBytes-filteredBytes), float64(totalBytes-filteredBytes)/float64(totalBytes)*100)
208
+
fmt.Fprintf(os.Stderr, " Detectors used: %d\n", len(detectorNames))
209
+
}
210
+
211
+
func cmdDetectorList() {
212
+
registry := detector.DefaultRegistry()
213
+
detectors := registry.List()
214
+
215
+
// Sort by name
216
+
sort.Slice(detectors, func(i, j int) bool {
217
+
return detectors[i].Name() < detectors[j].Name()
218
+
})
219
+
220
+
fmt.Printf("Available detectors:\n\n")
221
+
for _, d := range detectors {
222
+
fmt.Printf(" %-20s %s (v%s)\n", d.Name(), d.Description(), d.Version())
223
+
}
224
+
fmt.Printf("\nUse 'plcbundle detector info <name>' for details\n")
225
+
}
226
+
227
+
func cmdDetectorTest() {
228
+
// Extract detector name first
229
+
if len(os.Args) < 4 {
230
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector test <detector-name> --bundle N\n")
231
+
os.Exit(1)
232
+
}
233
+
234
+
detectorName := os.Args[3]
235
+
236
+
// Parse flags from os.Args[4:]
237
+
fs := flag.NewFlagSet("detector test", flag.ExitOnError)
238
+
bundleNum := fs.Int("bundle", 0, "bundle number to test")
239
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence threshold")
240
+
verbose := fs.Bool("v", false, "verbose output")
241
+
fs.Parse(os.Args[4:]) // ← Changed from os.Args[3:]
242
+
243
+
if *bundleNum == 0 {
244
+
fmt.Fprintf(os.Stderr, "Error: --bundle required\n")
245
+
os.Exit(1)
246
+
}
247
+
248
+
// Load bundle
249
+
mgr, _, err := getManager("")
250
+
if err != nil {
251
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
252
+
os.Exit(1)
253
+
}
254
+
defer mgr.Close()
255
+
256
+
ctx := context.Background()
257
+
bundle, err := mgr.LoadBundle(ctx, *bundleNum)
258
+
if err != nil {
259
+
fmt.Fprintf(os.Stderr, "Error loading bundle: %v\n", err)
260
+
os.Exit(1)
261
+
}
262
+
263
+
fmt.Printf("Testing detector '%s' on bundle %06d...\n", detectorName, *bundleNum)
264
+
fmt.Printf("Min confidence: %.2f\n\n", *confidence)
265
+
266
+
// Run detector
267
+
registry := detector.DefaultRegistry()
268
+
config := detector.DefaultConfig()
269
+
config.MinConfidence = *confidence
270
+
271
+
runner := detector.NewRunner(registry, config, &defaultLogger{})
272
+
results, err := runner.RunOnBundle(ctx, detectorName, bundle)
273
+
if err != nil {
274
+
fmt.Fprintf(os.Stderr, "Detection failed: %v\n", err)
275
+
os.Exit(1)
276
+
}
277
+
278
+
// Calculate stats
279
+
stats := detector.CalculateStats(results, len(bundle.Operations))
280
+
281
+
// Display results
282
+
fmt.Printf("Results:\n")
283
+
fmt.Printf(" Total operations: %d\n", stats.TotalOperations)
284
+
fmt.Printf(" Matches found: %d (%.2f%%)\n", stats.MatchedCount, stats.MatchRate*100)
285
+
fmt.Printf("\n")
286
+
287
+
if len(stats.ByReason) > 0 {
288
+
fmt.Printf("Breakdown by reason:\n")
289
+
for reason, count := range stats.ByReason {
290
+
pct := float64(count) / float64(stats.MatchedCount) * 100
291
+
fmt.Printf(" %-25s %d (%.1f%%)\n", reason, count, pct)
292
+
}
293
+
fmt.Printf("\n")
294
+
}
295
+
296
+
if len(stats.ByCategory) > 0 {
297
+
fmt.Printf("Breakdown by category:\n")
298
+
for category, count := range stats.ByCategory {
299
+
pct := float64(count) / float64(stats.MatchedCount) * 100
300
+
fmt.Printf(" %-25s %d (%.1f%%)\n", category, count, pct)
301
+
}
302
+
fmt.Printf("\n")
303
+
}
304
+
305
+
if len(stats.ByConfidence) > 0 {
306
+
fmt.Printf("Confidence distribution:\n")
307
+
for bucket, count := range stats.ByConfidence {
308
+
pct := float64(count) / float64(stats.MatchedCount) * 100
309
+
fmt.Printf(" %-25s %d (%.1f%%)\n", bucket, count, pct)
310
+
}
311
+
fmt.Printf("\n")
312
+
}
313
+
314
+
if *verbose && len(results) > 0 {
315
+
fmt.Printf("Sample matches (first 10):\n")
316
+
displayCount := 10
317
+
if len(results) < displayCount {
318
+
displayCount = len(results)
319
+
}
320
+
321
+
for i := 0; i < displayCount; i++ {
322
+
res := results[i]
323
+
fmt.Printf(" %d. Position %d: %s\n", i+1, res.Position, res.DID)
324
+
fmt.Printf(" Reason: %s (confidence: %.2f)\n", res.Match.Reason, res.Match.Confidence)
325
+
if res.Match.Note != "" {
326
+
fmt.Printf(" Note: %s\n", res.Match.Note)
327
+
}
328
+
}
329
+
330
+
if len(results) > displayCount {
331
+
fmt.Printf(" ... and %d more\n", len(results)-displayCount)
332
+
}
333
+
}
334
+
}
335
+
336
+
func cmdDetectorRun() {
337
+
if len(os.Args) < 4 {
338
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector run <detector1> [detector2...] --bundles 1-100\n")
339
+
fmt.Fprintf(os.Stderr, "\nUse 'all' to run all available detectors\n")
340
+
os.Exit(1)
341
+
}
342
+
343
+
// Manually separate detector names from flags
344
+
var detectorNames []string
345
+
var flagArgs []string
346
+
347
+
for i := 3; i < len(os.Args); i++ {
348
+
arg := os.Args[i]
349
+
if strings.HasPrefix(arg, "-") {
350
+
// This and all remaining are flags
351
+
flagArgs = os.Args[i:]
352
+
break
353
+
}
354
+
// Detector name
355
+
detectorNames = append(detectorNames, arg)
356
+
}
357
+
358
+
if len(detectorNames) == 0 {
359
+
fmt.Fprintf(os.Stderr, "Error: at least one detector name required\n")
360
+
fmt.Fprintf(os.Stderr, "\nExamples:\n")
361
+
fmt.Fprintf(os.Stderr, " plcbundle detector run invalid_handle --bundles 1-100\n")
362
+
fmt.Fprintf(os.Stderr, " plcbundle detector run invalid_handle aka_spam --bundles 1-100\n")
363
+
fmt.Fprintf(os.Stderr, " plcbundle detector run all --bundles 1-100\n")
364
+
os.Exit(1)
365
+
}
366
+
367
+
// Parse flags
368
+
fs := flag.NewFlagSet("detector run", flag.ExitOnError)
369
+
bundleRange := fs.String("bundles", "", "bundle range (e.g., '1-100')")
370
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence")
371
+
fs.Parse(flagArgs)
372
+
373
+
if *bundleRange == "" {
374
+
fmt.Fprintf(os.Stderr, "Error: --bundles required\n")
375
+
os.Exit(1)
376
+
}
377
+
378
+
// Parse bundle range
379
+
start, end, err := parseBundleRange(*bundleRange)
380
+
if err != nil {
381
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
382
+
os.Exit(1)
383
+
}
384
+
385
+
// Load manager
386
+
mgr, _, err := getManager("")
387
+
if err != nil {
388
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
389
+
os.Exit(1)
390
+
}
391
+
defer mgr.Close()
392
+
393
+
// Setup registry
394
+
registry := detector.DefaultRegistry()
395
+
config := detector.DefaultConfig()
396
+
config.MinConfidence = *confidence
397
+
398
+
// Handle "all" keyword - expand to all available detectors
399
+
if len(detectorNames) == 1 && detectorNames[0] == "all" {
400
+
detectorNames = registry.Names()
401
+
fmt.Fprintf(os.Stderr, "Using all available detectors: %s\n", strings.Join(detectorNames, ", "))
402
+
}
403
+
404
+
// Log to stderr
405
+
fmt.Fprintf(os.Stderr, "Running %d detector(s) on bundles %d-%d...\n", len(detectorNames), start, end)
406
+
if len(detectorNames) <= 5 {
407
+
fmt.Fprintf(os.Stderr, "Detectors: %s\n", strings.Join(detectorNames, ", "))
408
+
}
409
+
fmt.Fprintf(os.Stderr, "Min confidence: %.2f\n\n", *confidence)
410
+
411
+
// Get all detectors
412
+
detectors := make([]detector.Detector, 0, len(detectorNames))
413
+
for _, name := range detectorNames {
414
+
d, err := registry.Get(name)
415
+
if err != nil {
416
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
417
+
os.Exit(1)
418
+
}
419
+
detectors = append(detectors, d)
420
+
}
421
+
422
+
ctx := context.Background()
423
+
424
+
// Write CSV header to stdout
425
+
fmt.Println("bundle,position,cid,detectors,confidence,detected_at,size")
426
+
427
+
// Track statistics
428
+
totalOps := 0
429
+
matchCount := 0
430
+
totalBytes := int64(0)
431
+
matchedBytes := int64(0)
432
+
bundlesProcessed := 0
433
+
detectorMatchCounts := make(map[string]int)
434
+
435
+
totalBundles := end - start + 1
436
+
437
+
// Create progress bar with byte tracking enabled
438
+
fmt.Fprintf(os.Stderr, "Processing bundles:\n")
439
+
progress := NewProgressBar(totalBundles)
440
+
progress.showBytes = true // Enable byte tracking
441
+
442
+
// Process bundles and stream results
443
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
444
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
445
+
if err != nil {
446
+
// Don't update progress on error, just log
447
+
progress.Finish()
448
+
fmt.Fprintf(os.Stderr, "\n⚠️ Warning: failed to load bundle %d: %v\n", bundleNum, err)
449
+
progress = NewProgressBar(totalBundles)
450
+
progress.showBytes = true
451
+
progress.SetWithBytes(bundleNum-start, totalBytes)
452
+
continue
453
+
}
454
+
455
+
bundlesProcessed++
456
+
totalOps += len(bundle.Operations)
457
+
458
+
// Process each operation with all detectors
459
+
for position, op := range bundle.Operations {
460
+
// Calculate operation size first
461
+
var opSize int
462
+
if len(op.RawJSON) > 0 {
463
+
opSize = len(op.RawJSON)
464
+
} else {
465
+
// Fallback: marshal to get size
466
+
data, _ := json.Marshal(op)
467
+
opSize = len(data)
468
+
}
469
+
totalBytes += int64(opSize)
470
+
471
+
// Collect all matches for this operation
472
+
var matchedDetectors []string
473
+
var maxConfidence float64
474
+
var detectedAt time.Time
475
+
476
+
// Run all detectors on this operation
477
+
for _, det := range detectors {
478
+
match, err := det.Detect(ctx, op)
479
+
if err != nil {
480
+
continue
481
+
}
482
+
483
+
// Skip if no match or confidence too low
484
+
if match == nil || match.Confidence < *confidence {
485
+
continue
486
+
}
487
+
488
+
// Collect detector name
489
+
matchedDetectors = append(matchedDetectors, det.Name())
490
+
detectorMatchCounts[det.Name()]++
491
+
492
+
// Track highest confidence
493
+
if match.Confidence > maxConfidence {
494
+
maxConfidence = match.Confidence
495
+
}
496
+
497
+
// Use current time for first match
498
+
if detectedAt.IsZero() {
499
+
detectedAt = time.Now()
500
+
}
501
+
}
502
+
503
+
// Output only if at least one detector matched
504
+
if len(matchedDetectors) > 0 {
505
+
matchCount++
506
+
matchedBytes += int64(opSize)
507
+
508
+
fmt.Printf("%d,%d,%s,%s,%.2f,%s,%d\n",
509
+
bundleNum,
510
+
position,
511
+
op.CID,
512
+
strings.Join(matchedDetectors, ";"),
513
+
maxConfidence,
514
+
detectedAt.Format("2006-01-02T15:04:05Z"),
515
+
opSize,
516
+
)
517
+
}
518
+
}
519
+
520
+
// Update progress with bytes
521
+
progress.SetWithBytes(bundleNum-start+1, totalBytes)
522
+
}
523
+
524
+
// Finish progress bar
525
+
progress.Finish()
526
+
527
+
// Final stats to stderr
528
+
fmt.Fprintf(os.Stderr, "\n")
529
+
fmt.Fprintf(os.Stderr, "✓ Detection complete\n")
530
+
fmt.Fprintf(os.Stderr, " Bundles processed: %d\n", bundlesProcessed)
531
+
fmt.Fprintf(os.Stderr, " Total operations: %d\n", totalOps)
532
+
fmt.Fprintf(os.Stderr, " Matches found: %d (%.2f%%)\n", matchCount, float64(matchCount)/float64(totalOps)*100)
533
+
fmt.Fprintf(os.Stderr, " Clean operations: %d (%.2f%%)\n", totalOps-matchCount, float64(totalOps-matchCount)/float64(totalOps)*100)
534
+
fmt.Fprintf(os.Stderr, "\n")
535
+
fmt.Fprintf(os.Stderr, " Total size: %s\n", formatBytes(totalBytes))
536
+
fmt.Fprintf(os.Stderr, " Matched size: %s (%.2f%%)\n", formatBytes(matchedBytes), float64(matchedBytes)/float64(totalBytes)*100)
537
+
fmt.Fprintf(os.Stderr, " Clean size: %s (%.2f%%)\n", formatBytes(totalBytes-matchedBytes), float64(totalBytes-matchedBytes)/float64(totalBytes)*100)
538
+
539
+
if matchedBytes > 0 {
540
+
fmt.Fprintf(os.Stderr, "\n")
541
+
fmt.Fprintf(os.Stderr, " 💾 Potential savings if filtered: %s (%.2f%% reduction)\n",
542
+
formatBytes(matchedBytes),
543
+
float64(matchedBytes)/float64(totalBytes)*100)
544
+
}
545
+
546
+
fmt.Fprintf(os.Stderr, "\n")
547
+
fmt.Fprintf(os.Stderr, " Detectors used: %d\n", len(detectorNames))
548
+
549
+
// Show breakdown by detector if multiple used
550
+
if len(detectorNames) > 1 {
551
+
fmt.Fprintf(os.Stderr, "\n")
552
+
fmt.Fprintf(os.Stderr, " Matches by detector:\n")
553
+
for _, name := range detectorNames {
554
+
count := detectorMatchCounts[name]
555
+
if count > 0 {
556
+
pct := float64(count) / float64(matchCount) * 100
557
+
fmt.Fprintf(os.Stderr, " %-20s %d (%.1f%%)\n", name, count, pct)
558
+
} else {
559
+
fmt.Fprintf(os.Stderr, " %-20s 0\n", name)
560
+
}
561
+
}
562
+
}
563
+
}
564
+
565
+
func cmdDetectorInfo() {
566
+
if len(os.Args) < 4 {
567
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector info <name>\n")
568
+
os.Exit(1)
569
+
}
570
+
571
+
detectorName := os.Args[3]
572
+
573
+
registry := detector.DefaultRegistry()
574
+
d, err := registry.Get(detectorName)
575
+
if err != nil {
576
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
577
+
os.Exit(1)
578
+
}
579
+
580
+
fmt.Printf("Detector: %s\n", d.Name())
581
+
fmt.Printf("Version: %s\n", d.Version())
582
+
fmt.Printf("Description: %s\n", d.Description())
583
+
fmt.Printf("\n")
584
+
585
+
// Show example usage
586
+
fmt.Printf("Usage examples:\n")
587
+
fmt.Printf(" # Test on single bundle\n")
588
+
fmt.Printf(" plcbundle detector test %s --bundle 42\n\n", d.Name())
589
+
fmt.Printf(" # Run on range and save\n")
590
+
fmt.Printf(" plcbundle detector run %s --bundles 1-100 --output results.csv\n\n", d.Name())
591
+
fmt.Printf(" # Use with filter creation\n")
592
+
fmt.Printf(" plcbundle filter detect --detector %s --bundles 1-100\n", d.Name())
593
+
}
594
+
595
+
// Helper functions
596
+
597
+
func parseBundleRange(rangeStr string) (start, end int, err error) {
598
+
// Handle single bundle number
599
+
if !strings.Contains(rangeStr, "-") {
600
+
var num int
601
+
_, err = fmt.Sscanf(rangeStr, "%d", &num)
602
+
if err != nil {
603
+
return 0, 0, fmt.Errorf("invalid bundle number: %w", err)
604
+
}
605
+
return num, num, nil
606
+
}
607
+
608
+
// Handle range (e.g., "1-100")
609
+
parts := strings.Split(rangeStr, "-")
610
+
if len(parts) != 2 {
611
+
return 0, 0, fmt.Errorf("invalid range format (expected: N or start-end)")
612
+
}
613
+
614
+
_, err = fmt.Sscanf(parts[0], "%d", &start)
615
+
if err != nil {
616
+
return 0, 0, fmt.Errorf("invalid start: %w", err)
617
+
}
618
+
619
+
_, err = fmt.Sscanf(parts[1], "%d", &end)
620
+
if err != nil {
621
+
return 0, 0, fmt.Errorf("invalid end: %w", err)
622
+
}
623
+
624
+
if start > end {
625
+
return 0, 0, fmt.Errorf("start must be <= end")
626
+
}
627
+
628
+
return start, end, nil
629
+
}
+97
-12
cmd/plcbundle/main.go
···
2
3
import (
4
"context"
0
5
"flag"
6
"fmt"
7
"net/http"
···
82
cmdServe()
83
case "compare":
84
cmdCompare()
0
0
85
case "version":
86
fmt.Printf("plcbundle version %s\n", version)
87
fmt.Printf(" commit: %s\n", gitCommit)
···
110
mempool Show mempool status and operations
111
serve Start HTTP server to serve bundle data
112
compare Compare local index with target index
0
113
version Show version
114
115
Security Model:
···
845
846
func cmdExport() {
847
fs := flag.NewFlagSet("export", flag.ExitOnError)
848
-
count := fs.Int("count", 1000, "number of operations to export")
0
0
849
after := fs.String("after", "", "timestamp to start after (RFC3339)")
850
fs.Parse(os.Args[2:])
851
0
0
0
0
0
0
0
0
0
0
0
0
0
0
852
mgr, _, err := getManager("")
853
if err != nil {
854
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
···
856
}
857
defer mgr.Close()
858
859
-
// Parse after time
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
860
var afterTime time.Time
861
if *after != "" {
862
afterTime, err = time.Parse(time.RFC3339, *after)
···
867
}
868
869
ctx := context.Background()
870
-
ops, err := mgr.ExportOperations(ctx, afterTime, *count)
871
-
if err != nil {
872
-
fmt.Fprintf(os.Stderr, "Export failed: %v\n", err)
873
-
os.Exit(1)
874
-
}
875
876
-
// Output as JSONL
877
-
for _, op := range ops {
878
-
if len(op.RawJSON) > 0 {
879
-
fmt.Println(string(op.RawJSON))
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
880
}
881
}
882
883
-
fmt.Fprintf(os.Stderr, "Exported %d operations\n", len(ops))
0
0
0
884
}
885
886
func cmdBackfill() {
···
2
3
import (
4
"context"
5
+
"encoding/json"
6
"flag"
7
"fmt"
8
"net/http"
···
83
cmdServe()
84
case "compare":
85
cmdCompare()
86
+
case "detector":
87
+
cmdDetector()
88
case "version":
89
fmt.Printf("plcbundle version %s\n", version)
90
fmt.Printf(" commit: %s\n", gitCommit)
···
113
mempool Show mempool status and operations
114
serve Start HTTP server to serve bundle data
115
compare Compare local index with target index
116
+
detector
117
version Show version
118
119
Security Model:
···
849
850
func cmdExport() {
851
fs := flag.NewFlagSet("export", flag.ExitOnError)
852
+
bundles := fs.String("bundles", "", "bundle number or range (e.g., '42' or '1-100')")
853
+
all := fs.Bool("all", false, "export all bundles")
854
+
count := fs.Int("count", 0, "limit number of operations (0 = all)")
855
after := fs.String("after", "", "timestamp to start after (RFC3339)")
856
fs.Parse(os.Args[2:])
857
858
+
// Validate flags
859
+
if !*all && *bundles == "" {
860
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle export --bundles <number|range> [options]\n")
861
+
fmt.Fprintf(os.Stderr, " or: plcbundle export --all [options]\n")
862
+
fmt.Fprintf(os.Stderr, "\nExamples:\n")
863
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 42\n")
864
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 1-100\n")
865
+
fmt.Fprintf(os.Stderr, " plcbundle export --all\n")
866
+
fmt.Fprintf(os.Stderr, " plcbundle export --all --count 50000\n")
867
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 42 | jq .\n")
868
+
os.Exit(1)
869
+
}
870
+
871
+
// Load manager
872
mgr, _, err := getManager("")
873
if err != nil {
874
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
···
876
}
877
defer mgr.Close()
878
879
+
// Determine bundle range
880
+
var start, end int
881
+
if *all {
882
+
// Export all bundles
883
+
index := mgr.GetIndex()
884
+
bundles := index.GetBundles()
885
+
if len(bundles) == 0 {
886
+
fmt.Fprintf(os.Stderr, "No bundles available\n")
887
+
os.Exit(1)
888
+
}
889
+
start = bundles[0].BundleNumber
890
+
end = bundles[len(bundles)-1].BundleNumber
891
+
892
+
fmt.Fprintf(os.Stderr, "Exporting all bundles (%d-%d)\n", start, end)
893
+
} else {
894
+
// Parse bundle range
895
+
start, end, err = parseBundleRange(*bundles)
896
+
if err != nil {
897
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
898
+
os.Exit(1)
899
+
}
900
+
fmt.Fprintf(os.Stderr, "Exporting bundles %d-%d\n", start, end)
901
+
}
902
+
903
+
// Log to stderr
904
+
if *count > 0 {
905
+
fmt.Fprintf(os.Stderr, "Limit: %d operations\n", *count)
906
+
}
907
+
if *after != "" {
908
+
fmt.Fprintf(os.Stderr, "After: %s\n", *after)
909
+
}
910
+
fmt.Fprintf(os.Stderr, "\n")
911
+
912
+
// Parse after time if provided
913
var afterTime time.Time
914
if *after != "" {
915
afterTime, err = time.Parse(time.RFC3339, *after)
···
920
}
921
922
ctx := context.Background()
923
+
exported := 0
0
0
0
0
924
925
+
// Export operations from bundles
926
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
927
+
// Check if we've reached the limit
928
+
if *count > 0 && exported >= *count {
929
+
break
930
+
}
931
+
932
+
fmt.Fprintf(os.Stderr, "Processing bundle %d...\r", bundleNum)
933
+
934
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
935
+
if err != nil {
936
+
fmt.Fprintf(os.Stderr, "\nWarning: failed to load bundle %d: %v\n", bundleNum, err)
937
+
continue
938
+
}
939
+
940
+
// Output operations
941
+
for _, op := range bundle.Operations {
942
+
// Check after time filter
943
+
if !afterTime.IsZero() && op.CreatedAt.Before(afterTime) {
944
+
continue
945
+
}
946
+
947
+
// Check count limit
948
+
if *count > 0 && exported >= *count {
949
+
break
950
+
}
951
+
952
+
// Output operation as JSONL
953
+
if len(op.RawJSON) > 0 {
954
+
fmt.Println(string(op.RawJSON))
955
+
} else {
956
+
// Fallback to marshaling
957
+
data, _ := json.Marshal(op)
958
+
fmt.Println(string(data))
959
+
}
960
+
961
+
exported++
962
}
963
}
964
965
+
// Final stats to stderr
966
+
fmt.Fprintf(os.Stderr, "\n\n")
967
+
fmt.Fprintf(os.Stderr, "✓ Export complete\n")
968
+
fmt.Fprintf(os.Stderr, " Exported: %d operations\n", exported)
969
}
970
971
func cmdBackfill() {
+6
-5
cmd/plcbundle/progress.go
···
2
3
import (
4
"fmt"
0
5
"strings"
6
"sync"
7
"time"
···
80
pb.current = pb.total
81
pb.currentBytes = pb.totalBytes
82
pb.print()
83
-
fmt.Println() // New line after completion
84
}
85
86
// print renders the progress bar (must be called with lock held)
···
113
eta = time.Duration(float64(remaining)/speed) * time.Second
114
}
115
116
-
// Print progress bar
117
-
if pb.showBytes && pb.totalBytes > 0 {
118
// Calculate MB/s (using decimal units: 1 MB = 1,000,000 bytes)
119
mbProcessed := float64(pb.currentBytes) / (1000 * 1000)
120
mbPerSec := mbProcessed / elapsed.Seconds()
121
122
-
fmt.Printf("\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | %.1f MB/s | ETA: %s ",
123
bar,
124
percent,
125
pb.current,
···
128
mbPerSec,
129
formatETA(eta))
130
} else {
131
-
fmt.Printf("\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | ETA: %s ",
132
bar,
133
percent,
134
pb.current,
···
2
3
import (
4
"fmt"
5
+
"os"
6
"strings"
7
"sync"
8
"time"
···
81
pb.current = pb.total
82
pb.currentBytes = pb.totalBytes
83
pb.print()
84
+
fmt.Fprintf(os.Stderr, "\n") // ← FIXED: Use stderr
85
}
86
87
// print renders the progress bar (must be called with lock held)
···
114
eta = time.Duration(float64(remaining)/speed) * time.Second
115
}
116
117
+
// Show MB/s if bytes are being tracked (changed condition)
118
+
if pb.showBytes && pb.currentBytes > 0 {
119
// Calculate MB/s (using decimal units: 1 MB = 1,000,000 bytes)
120
mbProcessed := float64(pb.currentBytes) / (1000 * 1000)
121
mbPerSec := mbProcessed / elapsed.Seconds()
122
123
+
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | %.1f MB/s | ETA: %s ",
124
bar,
125
percent,
126
pb.current,
···
129
mbPerSec,
130
formatETA(eta))
131
} else {
132
+
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | ETA: %s ",
133
bar,
134
percent,
135
pb.current,
+467
detector/builtin.go
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
// detector/builtin.go
2
+
package detector
3
+
4
+
import (
5
+
"context"
6
+
"regexp"
7
+
"strings"
8
+
9
+
"tangled.org/atscan.net/plcbundle/plc"
10
+
)
11
+
12
+
// InvalidHandleDetector detects operations with invalid handle patterns
13
+
type InvalidHandleDetector struct {
14
+
// Valid handle regex: lowercase letters, numbers, hyphens, dots only
15
+
validHandlePattern *regexp.Regexp
16
+
}
17
+
18
+
func NewInvalidHandleDetector() *InvalidHandleDetector {
19
+
return &InvalidHandleDetector{
20
+
// Valid handle: alphanumeric, hyphens, dots (no underscores!)
21
+
validHandlePattern: regexp.MustCompile(`^at://[a-z0-9][a-z0-9-]*(\.[a-z0-9][a-z0-9-]*)*\.[a-z]+$`),
22
+
}
23
+
}
24
+
25
+
func (d *InvalidHandleDetector) Name() string { return "invalid_handle" }
26
+
func (d *InvalidHandleDetector) Description() string {
27
+
return "Detects operations with invalid handle patterns (underscores, invalid chars)"
28
+
}
29
+
func (d *InvalidHandleDetector) Version() string { return "1.0.0" }
30
+
31
+
func (d *InvalidHandleDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
32
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
33
+
for _, aka := range alsoKnownAs {
34
+
if str, ok := aka.(string); ok {
35
+
// Check if it's an at:// handle
36
+
if !strings.HasPrefix(str, "at://") {
37
+
continue
38
+
}
39
+
40
+
// Check for underscore (invalid in Bluesky handles)
41
+
if strings.Contains(str, "_") {
42
+
return &Match{
43
+
Reason: "underscore_in_handle",
44
+
Category: "invalid_handle",
45
+
Confidence: 0.99,
46
+
Note: "Handle contains underscore which is invalid in Bluesky",
47
+
Metadata: map[string]interface{}{
48
+
"invalid_handle": str,
49
+
"violation": "underscore_character",
50
+
},
51
+
}, nil
52
+
}
53
+
54
+
// Check if handle matches valid pattern
55
+
if !d.validHandlePattern.MatchString(str) {
56
+
return &Match{
57
+
Reason: "invalid_handle_pattern",
58
+
Category: "invalid_handle",
59
+
Confidence: 0.95,
60
+
Note: "Handle does not match valid Bluesky handle pattern",
61
+
Metadata: map[string]interface{}{
62
+
"invalid_handle": str,
63
+
"violation": "pattern_mismatch",
64
+
},
65
+
}, nil
66
+
}
67
+
}
68
+
}
69
+
}
70
+
71
+
return nil, nil
72
+
}
73
+
74
+
// AlsoKnownAsSpamDetector detects excessive/garbage alsoKnownAs entries
75
+
type AlsoKnownAsSpamDetector struct {
76
+
maxLegitimateEntries int
77
+
minGarbageLength int
78
+
}
79
+
80
+
func NewAlsoKnownAsSpamDetector() *AlsoKnownAsSpamDetector {
81
+
return &AlsoKnownAsSpamDetector{
82
+
maxLegitimateEntries: 3, // Normal operations have 1-3 entries
83
+
minGarbageLength: 100, // Garbage strings are very long
84
+
}
85
+
}
86
+
87
+
func (d *AlsoKnownAsSpamDetector) Name() string { return "aka_spam" }
88
+
func (d *AlsoKnownAsSpamDetector) Description() string {
89
+
return "Detects spam through excessive or garbage alsoKnownAs entries"
90
+
}
91
+
func (d *AlsoKnownAsSpamDetector) Version() string { return "1.0.0" }
92
+
93
+
func (d *AlsoKnownAsSpamDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
94
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
95
+
entryCount := len(alsoKnownAs)
96
+
97
+
// Count different types of entries
98
+
atURICount := 0
99
+
garbageCount := 0
100
+
var garbageExamples []string
101
+
102
+
for _, aka := range alsoKnownAs {
103
+
if str, ok := aka.(string); ok {
104
+
if strings.HasPrefix(str, "at://") {
105
+
atURICount++
106
+
} else if len(str) > d.minGarbageLength {
107
+
garbageCount++
108
+
if len(garbageExamples) < 2 {
109
+
// Store first few for evidence
110
+
preview := str
111
+
if len(preview) > 50 {
112
+
preview = preview[:50] + "..."
113
+
}
114
+
garbageExamples = append(garbageExamples, preview)
115
+
}
116
+
}
117
+
}
118
+
}
119
+
120
+
// Detection: Excessive entries
121
+
if entryCount > d.maxLegitimateEntries {
122
+
confidence := 0.80
123
+
if garbageCount > 0 {
124
+
confidence = 0.95 // Higher confidence if garbage detected
125
+
}
126
+
127
+
return &Match{
128
+
Reason: "excessive_aka_entries",
129
+
Category: "spam",
130
+
Confidence: confidence,
131
+
Note: "Operation has excessive alsoKnownAs entries",
132
+
Metadata: map[string]interface{}{
133
+
"total_entries": entryCount,
134
+
"at_uri_count": atURICount,
135
+
"garbage_count": garbageCount,
136
+
"garbage_examples": garbageExamples,
137
+
},
138
+
}, nil
139
+
}
140
+
141
+
// Detection: Garbage entries present (even if count is low)
142
+
if garbageCount > 0 {
143
+
return &Match{
144
+
Reason: "garbage_aka_entries",
145
+
Category: "spam",
146
+
Confidence: 0.98,
147
+
Note: "Operation contains garbage/random strings in alsoKnownAs",
148
+
Metadata: map[string]interface{}{
149
+
"total_entries": entryCount,
150
+
"garbage_count": garbageCount,
151
+
"garbage_examples": garbageExamples,
152
+
},
153
+
}, nil
154
+
}
155
+
}
156
+
157
+
return nil, nil
158
+
}
159
+
160
+
// CompositeSpamDetector combines multiple signals for higher confidence
161
+
type CompositeSpamDetector struct {
162
+
invalidHandle *InvalidHandleDetector
163
+
akaSpam *AlsoKnownAsSpamDetector
164
+
}
165
+
166
+
func NewCompositeSpamDetector() *CompositeSpamDetector {
167
+
return &CompositeSpamDetector{
168
+
invalidHandle: NewInvalidHandleDetector(),
169
+
akaSpam: NewAlsoKnownAsSpamDetector(),
170
+
}
171
+
}
172
+
173
+
func (d *CompositeSpamDetector) Name() string { return "composite_spam" }
174
+
func (d *CompositeSpamDetector) Description() string {
175
+
return "Combines multiple spam signals for high-confidence detection"
176
+
}
177
+
func (d *CompositeSpamDetector) Version() string { return "1.0.0" }
178
+
179
+
func (d *CompositeSpamDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
180
+
// Check both detectors
181
+
invalidHandleMatch, _ := d.invalidHandle.Detect(ctx, op)
182
+
akaSpamMatch, _ := d.akaSpam.Detect(ctx, op)
183
+
184
+
// If both match, very high confidence
185
+
if invalidHandleMatch != nil && akaSpamMatch != nil {
186
+
return &Match{
187
+
Reason: "multiple_spam_indicators",
188
+
Category: "spam",
189
+
Confidence: 0.99,
190
+
Note: "Operation has both invalid handle and excessive alsoKnownAs entries",
191
+
Metadata: map[string]interface{}{
192
+
"invalid_handle_reason": invalidHandleMatch.Reason,
193
+
"aka_spam_reason": akaSpamMatch.Reason,
194
+
"invalid_handle_data": invalidHandleMatch.Metadata,
195
+
"aka_spam_data": akaSpamMatch.Metadata,
196
+
},
197
+
}, nil
198
+
}
199
+
200
+
// Return whichever matched
201
+
if invalidHandleMatch != nil {
202
+
return invalidHandleMatch, nil
203
+
}
204
+
if akaSpamMatch != nil {
205
+
return akaSpamMatch, nil
206
+
}
207
+
208
+
return nil, nil
209
+
}
210
+
211
+
// SpamPDSDetector detects known spam PDS endpoints
212
+
type SpamPDSDetector struct {
213
+
spamEndpoints map[string]bool
214
+
spamDomains map[string]bool
215
+
}
216
+
217
+
func NewSpamPDSDetector() *SpamPDSDetector {
218
+
return &SpamPDSDetector{
219
+
spamEndpoints: map[string]bool{
220
+
"pds.trump.com": true,
221
+
// Add more as discovered
222
+
},
223
+
spamDomains: map[string]bool{
224
+
"trump.com": true,
225
+
"donald.trump.com": true,
226
+
// Add more as discovered
227
+
},
228
+
}
229
+
}
230
+
231
+
func (d *SpamPDSDetector) Name() string { return "spam_pds" }
232
+
func (d *SpamPDSDetector) Description() string {
233
+
return "Detects operations using known spam PDS endpoints and fake domain claims"
234
+
}
235
+
func (d *SpamPDSDetector) Version() string { return "1.0.0" }
236
+
237
+
func (d *SpamPDSDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
238
+
// Check PDS endpoint
239
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
240
+
if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
241
+
if endpoint, ok := pds["endpoint"].(string); ok {
242
+
host := extractHost(endpoint)
243
+
244
+
// Check if it's a known spam PDS
245
+
if d.spamEndpoints[host] {
246
+
return &Match{
247
+
Reason: "spam_pds_endpoint",
248
+
Category: "spam",
249
+
Confidence: 0.99,
250
+
Note: "Operation uses known spam PDS endpoint",
251
+
Metadata: map[string]interface{}{
252
+
"endpoint": endpoint,
253
+
"host": host,
254
+
},
255
+
}, nil
256
+
}
257
+
}
258
+
}
259
+
}
260
+
261
+
// Check for spam domain claims in alsoKnownAs
262
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
263
+
for _, aka := range alsoKnownAs {
264
+
if str, ok := aka.(string); ok {
265
+
if !strings.HasPrefix(str, "at://") {
266
+
continue
267
+
}
268
+
269
+
// Extract domain from at:// URI
270
+
domain := strings.TrimPrefix(str, "at://")
271
+
if idx := strings.Index(domain, "/"); idx > 0 {
272
+
domain = domain[:idx]
273
+
}
274
+
275
+
// Check if claiming spam domain
276
+
if d.spamDomains[domain] {
277
+
return &Match{
278
+
Reason: "fake_domain_claim",
279
+
Category: "impersonation",
280
+
Confidence: 0.99,
281
+
Note: "Operation claims known spam/fake domain",
282
+
Metadata: map[string]interface{}{
283
+
"claimed_domain": domain,
284
+
"handle": str,
285
+
},
286
+
}, nil
287
+
}
288
+
289
+
// Check for subdomain patterns (like jr.donald.trump.com)
290
+
for spamDomain := range d.spamDomains {
291
+
if strings.HasSuffix(domain, "."+spamDomain) || domain == spamDomain {
292
+
return &Match{
293
+
Reason: "fake_domain_claim",
294
+
Category: "impersonation",
295
+
Confidence: 0.99,
296
+
Note: "Operation claims domain related to known spam domain",
297
+
Metadata: map[string]interface{}{
298
+
"claimed_domain": domain,
299
+
"spam_domain": spamDomain,
300
+
},
301
+
}, nil
302
+
}
303
+
}
304
+
}
305
+
}
306
+
}
307
+
308
+
return nil, nil
309
+
}
310
+
311
+
// ServiceAbuseDetector detects operations with abused service structures
312
+
type ServiceAbuseDetector struct {
313
+
maxServiceTypeLength int
314
+
maxEndpointLength int
315
+
maxHandleLength int
316
+
}
317
+
318
+
func NewServiceAbuseDetector() *ServiceAbuseDetector {
319
+
return &ServiceAbuseDetector{
320
+
maxServiceTypeLength: 100, // Normal types are short (e.g., "AtprotoPersonalDataServer")
321
+
maxEndpointLength: 200, // Normal endpoints are reasonable URLs
322
+
maxHandleLength: 100, // Normal handles are short
323
+
}
324
+
}
325
+
326
+
func (d *ServiceAbuseDetector) Name() string { return "service_abuse" }
327
+
func (d *ServiceAbuseDetector) Description() string {
328
+
return "Detects operations with abused service structures (random strings, numeric keys)"
329
+
}
330
+
func (d *ServiceAbuseDetector) Version() string { return "1.0.0" }
331
+
332
+
func (d *ServiceAbuseDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
333
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
334
+
// Check for numeric service keys (spam uses "0", "1", "2" instead of proper names)
335
+
hasNumericKeys := false
336
+
numericKeyCount := 0
337
+
338
+
for key := range services {
339
+
// Check if key is a digit
340
+
if len(key) == 1 && key >= "0" && key <= "9" {
341
+
hasNumericKeys = true
342
+
numericKeyCount++
343
+
}
344
+
}
345
+
346
+
if hasNumericKeys && numericKeyCount > 1 {
347
+
return &Match{
348
+
Reason: "numeric_service_keys",
349
+
Category: "service_abuse",
350
+
Confidence: 0.98,
351
+
Note: "Services use numeric keys instead of proper names",
352
+
Metadata: map[string]interface{}{
353
+
"numeric_key_count": numericKeyCount,
354
+
},
355
+
}, nil
356
+
}
357
+
358
+
// Check each service for abuse patterns
359
+
for serviceName, serviceData := range services {
360
+
if serviceMap, ok := serviceData.(map[string]interface{}); ok {
361
+
// Check service type length
362
+
if serviceType, ok := serviceMap["type"].(string); ok {
363
+
if len(serviceType) > d.maxServiceTypeLength {
364
+
return &Match{
365
+
Reason: "excessive_service_type_length",
366
+
Category: "service_abuse",
367
+
Confidence: 0.99,
368
+
Note: "Service type field contains excessively long random string",
369
+
Metadata: map[string]interface{}{
370
+
"service_name": serviceName,
371
+
"type_length": len(serviceType),
372
+
"type_preview": serviceType[:50] + "...",
373
+
},
374
+
}, nil
375
+
}
376
+
}
377
+
378
+
// Check endpoint length
379
+
if endpoint, ok := serviceMap["endpoint"].(string); ok {
380
+
if len(endpoint) > d.maxEndpointLength {
381
+
return &Match{
382
+
Reason: "excessive_endpoint_length",
383
+
Category: "service_abuse",
384
+
Confidence: 0.99,
385
+
Note: "Service endpoint contains excessively long random string",
386
+
Metadata: map[string]interface{}{
387
+
"service_name": serviceName,
388
+
"endpoint_length": len(endpoint),
389
+
"endpoint_preview": endpoint[:min(100, len(endpoint))] + "...",
390
+
},
391
+
}, nil
392
+
}
393
+
}
394
+
}
395
+
}
396
+
}
397
+
398
+
// Check for excessively long handles in alsoKnownAs
399
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
400
+
for _, aka := range alsoKnownAs {
401
+
if str, ok := aka.(string); ok {
402
+
if strings.HasPrefix(str, "at://") {
403
+
handle := strings.TrimPrefix(str, "at://")
404
+
if len(handle) > d.maxHandleLength {
405
+
return &Match{
406
+
Reason: "excessive_handle_length",
407
+
Category: "service_abuse",
408
+
Confidence: 0.98,
409
+
Note: "Handle contains excessively long random string",
410
+
Metadata: map[string]interface{}{
411
+
"handle_length": len(handle),
412
+
"handle_preview": handle[:min(50, len(handle))] + "...",
413
+
},
414
+
}, nil
415
+
}
416
+
}
417
+
}
418
+
}
419
+
}
420
+
421
+
// Check for empty verificationMethods (common in this spam)
422
+
if vm, ok := op.Operation["verificationMethods"].(map[string]interface{}); ok {
423
+
if len(vm) == 0 {
424
+
// Empty verificationMethods alone isn't enough, but combined with other signals...
425
+
// Check if there are other suspicious signals
426
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
427
+
if len(services) > 2 {
428
+
// Multiple services + empty verificationMethods = suspicious
429
+
return &Match{
430
+
Reason: "empty_verification_methods",
431
+
Category: "service_abuse",
432
+
Confidence: 0.85,
433
+
Note: "Empty verificationMethods with multiple services",
434
+
Metadata: map[string]interface{}{
435
+
"service_count": len(services),
436
+
},
437
+
}, nil
438
+
}
439
+
}
440
+
}
441
+
}
442
+
443
+
return nil, nil
444
+
}
445
+
446
+
// Helper function for min
447
+
func min(a, b int) int {
448
+
if a < b {
449
+
return a
450
+
}
451
+
return b
452
+
}
453
+
454
+
// Helper functions
455
+
456
+
func extractHost(endpoint string) string {
457
+
// Extract host from URL
458
+
endpoint = strings.TrimPrefix(endpoint, "http://")
459
+
endpoint = strings.TrimPrefix(endpoint, "https://")
460
+
if idx := strings.Index(endpoint, "/"); idx > 0 {
461
+
endpoint = endpoint[:idx]
462
+
}
463
+
if idx := strings.Index(endpoint, ":"); idx > 0 {
464
+
endpoint = endpoint[:idx]
465
+
}
466
+
return endpoint
467
+
}
+63
detector/detector.go
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
// detector/detector.go
2
+
package detector
3
+
4
+
import (
5
+
"context"
6
+
"time"
7
+
8
+
"tangled.org/atscan.net/plcbundle/plc"
9
+
)
10
+
11
+
// Detector represents a spam detection algorithm
12
+
type Detector interface {
13
+
// Name returns the detector's unique identifier
14
+
Name() string
15
+
16
+
// Description returns a human-readable description
17
+
Description() string
18
+
19
+
// Detect analyzes an operation and returns a match result
20
+
Detect(ctx context.Context, op plc.PLCOperation) (*Match, error)
21
+
22
+
// Version returns the detector version
23
+
Version() string
24
+
}
25
+
26
+
// Match represents a positive spam detection
27
+
type Match struct {
28
+
Reason string // Short identifier (e.g., "nostr_crosspost")
29
+
Category string // Broader category (e.g., "cross_posting")
30
+
Confidence float64 // 0.0 to 1.0
31
+
Note string // Optional human-readable explanation
32
+
Metadata map[string]interface{} // Additional context
33
+
}
34
+
35
+
// Result represents the outcome of running a detector on an operation
36
+
type Result struct {
37
+
BundleNumber int
38
+
Position int
39
+
DID string
40
+
CID string // ← Add this field
41
+
Match *Match // nil if no match
42
+
Error error
43
+
DetectorName string
44
+
DetectedAt time.Time
45
+
}
46
+
47
+
// Config holds detector configuration
48
+
type Config struct {
49
+
MinConfidence float64
50
+
Timeout time.Duration
51
+
Parallel bool
52
+
Workers int
53
+
}
54
+
55
+
// DefaultConfig returns sensible defaults
56
+
func DefaultConfig() *Config {
57
+
return &Config{
58
+
MinConfidence: 0.90,
59
+
Timeout: 5 * time.Second,
60
+
Parallel: true,
61
+
Workers: 4,
62
+
}
63
+
}
+87
detector/registry.go
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
// detector/registry.go
2
+
package detector
3
+
4
+
import (
5
+
"fmt"
6
+
"sync"
7
+
)
8
+
9
+
// Registry manages available detectors
10
+
type Registry struct {
11
+
detectors map[string]Detector
12
+
mu sync.RWMutex
13
+
}
14
+
15
+
// NewRegistry creates a new detector registry
16
+
func NewRegistry() *Registry {
17
+
return &Registry{
18
+
detectors: make(map[string]Detector),
19
+
}
20
+
}
21
+
22
+
// Register adds a detector to the registry
23
+
func (r *Registry) Register(d Detector) error {
24
+
r.mu.Lock()
25
+
defer r.mu.Unlock()
26
+
27
+
name := d.Name()
28
+
if _, exists := r.detectors[name]; exists {
29
+
return fmt.Errorf("detector %q already registered", name)
30
+
}
31
+
32
+
r.detectors[name] = d
33
+
return nil
34
+
}
35
+
36
+
// Get retrieves a detector by name
37
+
func (r *Registry) Get(name string) (Detector, error) {
38
+
r.mu.RLock()
39
+
defer r.mu.RUnlock()
40
+
41
+
d, ok := r.detectors[name]
42
+
if !ok {
43
+
return nil, fmt.Errorf("detector %q not found", name)
44
+
}
45
+
46
+
return d, nil
47
+
}
48
+
49
+
// List returns all registered detectors
50
+
func (r *Registry) List() []Detector {
51
+
r.mu.RLock()
52
+
defer r.mu.RUnlock()
53
+
54
+
detectors := make([]Detector, 0, len(r.detectors))
55
+
for _, d := range r.detectors {
56
+
detectors = append(detectors, d)
57
+
}
58
+
59
+
return detectors
60
+
}
61
+
62
+
// Names returns all detector names
63
+
func (r *Registry) Names() []string {
64
+
r.mu.RLock()
65
+
defer r.mu.RUnlock()
66
+
67
+
names := make([]string, 0, len(r.detectors))
68
+
for name := range r.detectors {
69
+
names = append(names, name)
70
+
}
71
+
72
+
return names
73
+
}
74
+
75
+
// DefaultRegistry returns a registry with built-in detectors
76
+
func DefaultRegistry() *Registry {
77
+
r := NewRegistry()
78
+
79
+
// Register real spam detectors
80
+
r.Register(NewInvalidHandleDetector())
81
+
r.Register(NewAlsoKnownAsSpamDetector())
82
+
r.Register(NewCompositeSpamDetector())
83
+
r.Register(NewSpamPDSDetector())
84
+
r.Register(NewServiceAbuseDetector())
85
+
86
+
return r
87
+
}
+216
detector/runner.go
···
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
// detector/runner.go
2
+
package detector
3
+
4
+
import (
5
+
"context"
6
+
"fmt"
7
+
"sync"
8
+
"time"
9
+
10
+
"tangled.org/atscan.net/plcbundle/bundle"
11
+
"tangled.org/atscan.net/plcbundle/plc"
12
+
)
13
+
14
+
// Runner executes detectors against operations
15
+
type Runner struct {
16
+
registry *Registry
17
+
config *Config
18
+
logger Logger
19
+
}
20
+
21
+
type Logger interface {
22
+
Printf(format string, v ...interface{})
23
+
}
24
+
25
+
// NewRunner creates a new detector runner
26
+
func NewRunner(registry *Registry, config *Config, logger Logger) *Runner {
27
+
if config == nil {
28
+
config = DefaultConfig()
29
+
}
30
+
return &Runner{
31
+
registry: registry,
32
+
config: config,
33
+
logger: logger,
34
+
}
35
+
}
36
+
37
+
// RunOnBundle runs detector(s) on all operations in a bundle
38
+
func (r *Runner) RunOnBundle(ctx context.Context, detectorName string, b *bundle.Bundle) ([]*Result, error) {
39
+
detector, err := r.registry.Get(detectorName)
40
+
if err != nil {
41
+
return nil, err
42
+
}
43
+
44
+
var results []*Result
45
+
46
+
if r.config.Parallel {
47
+
results = r.runParallel(ctx, detector, b)
48
+
} else {
49
+
results = r.runSequential(ctx, detector, b)
50
+
}
51
+
52
+
// Filter by minimum confidence
53
+
filtered := make([]*Result, 0)
54
+
for _, res := range results {
55
+
if res.Match != nil && res.Match.Confidence >= r.config.MinConfidence {
56
+
filtered = append(filtered, res)
57
+
}
58
+
}
59
+
60
+
return filtered, nil
61
+
}
62
+
63
+
func (r *Runner) runSequential(ctx context.Context, detector Detector, b *bundle.Bundle) []*Result {
64
+
results := make([]*Result, 0)
65
+
66
+
for pos, op := range b.Operations {
67
+
select {
68
+
case <-ctx.Done():
69
+
return results
70
+
default:
71
+
}
72
+
73
+
result := r.detectOne(ctx, detector, b.BundleNumber, pos, op)
74
+
if result.Match != nil || result.Error != nil {
75
+
results = append(results, result)
76
+
}
77
+
}
78
+
79
+
return results
80
+
}
81
+
82
+
func (r *Runner) runParallel(ctx context.Context, detector Detector, b *bundle.Bundle) []*Result {
83
+
type job struct {
84
+
pos int
85
+
op plc.PLCOperation
86
+
}
87
+
88
+
jobs := make(chan job, len(b.Operations))
89
+
resultsChan := make(chan *Result, len(b.Operations))
90
+
91
+
// Start workers
92
+
var wg sync.WaitGroup
93
+
for i := 0; i < r.config.Workers; i++ {
94
+
wg.Add(1)
95
+
go func() {
96
+
defer wg.Done()
97
+
for j := range jobs {
98
+
select {
99
+
case <-ctx.Done():
100
+
return
101
+
default:
102
+
}
103
+
104
+
result := r.detectOne(ctx, detector, b.BundleNumber, j.pos, j.op)
105
+
if result.Match != nil || result.Error != nil {
106
+
resultsChan <- result
107
+
}
108
+
}
109
+
}()
110
+
}
111
+
112
+
// Send jobs
113
+
for pos, op := range b.Operations {
114
+
jobs <- job{pos: pos, op: op}
115
+
}
116
+
close(jobs)
117
+
118
+
// Wait for completion
119
+
go func() {
120
+
wg.Wait()
121
+
close(resultsChan)
122
+
}()
123
+
124
+
// Collect results
125
+
results := make([]*Result, 0)
126
+
for result := range resultsChan {
127
+
results = append(results, result)
128
+
}
129
+
130
+
return results
131
+
}
132
+
133
+
func (r *Runner) detectOne(ctx context.Context, detector Detector, bundleNum, pos int, op plc.PLCOperation) *Result {
134
+
// Create timeout context
135
+
detectCtx, cancel := context.WithTimeout(ctx, r.config.Timeout)
136
+
defer cancel()
137
+
138
+
result := &Result{
139
+
BundleNumber: bundleNum,
140
+
Position: pos,
141
+
DID: op.DID,
142
+
CID: op.CID, // ← Add this
143
+
DetectorName: detector.Name(),
144
+
DetectedAt: time.Now(),
145
+
}
146
+
147
+
match, err := detector.Detect(detectCtx, op)
148
+
result.Match = match
149
+
result.Error = err
150
+
151
+
return result
152
+
}
153
+
154
+
// RunMultipleDetectors runs multiple detectors on a bundle
155
+
func (r *Runner) RunMultipleDetectors(ctx context.Context, detectorNames []string, b *bundle.Bundle) (map[string][]*Result, error) {
156
+
allResults := make(map[string][]*Result)
157
+
158
+
for _, name := range detectorNames {
159
+
results, err := r.RunOnBundle(ctx, name, b)
160
+
if err != nil {
161
+
return nil, fmt.Errorf("detector %s failed: %w", name, err)
162
+
}
163
+
allResults[name] = results
164
+
}
165
+
166
+
return allResults, nil
167
+
}
168
+
169
+
// Stats represents detection statistics
170
+
type Stats struct {
171
+
TotalOperations int
172
+
MatchedCount int
173
+
MatchRate float64
174
+
ByReason map[string]int
175
+
ByCategory map[string]int
176
+
ByConfidence map[string]int // 0.9-1.0, 0.8-0.9, etc.
177
+
}
178
+
179
+
// CalculateStats computes statistics from results
180
+
func CalculateStats(results []*Result, totalOps int) *Stats {
181
+
stats := &Stats{
182
+
TotalOperations: totalOps,
183
+
MatchedCount: len(results),
184
+
ByReason: make(map[string]int),
185
+
ByCategory: make(map[string]int),
186
+
ByConfidence: make(map[string]int),
187
+
}
188
+
189
+
if totalOps > 0 {
190
+
stats.MatchRate = float64(len(results)) / float64(totalOps)
191
+
}
192
+
193
+
for _, res := range results {
194
+
if res.Match == nil {
195
+
continue
196
+
}
197
+
198
+
stats.ByReason[res.Match.Reason]++
199
+
stats.ByCategory[res.Match.Category]++
200
+
201
+
// Confidence buckets
202
+
conf := res.Match.Confidence
203
+
switch {
204
+
case conf >= 0.95:
205
+
stats.ByConfidence["0.95-1.00"]++
206
+
case conf >= 0.90:
207
+
stats.ByConfidence["0.90-0.95"]++
208
+
case conf >= 0.85:
209
+
stats.ByConfidence["0.85-0.90"]++
210
+
default:
211
+
stats.ByConfidence["0.00-0.85"]++
212
+
}
213
+
}
214
+
215
+
return stats
216
+
}