tangled
alpha
login
or
join now
angrydutchman.peedee.es
/
plcbundle
forked from
atscan.net/plcbundle
0
fork
atom
A Transparent and Verifiable Way to Sync the AT Protocol's PLC Directory
0
fork
atom
overview
issues
pulls
pipelines
detectors experiments
tree.fail
4 months ago
40336cb2
d5d20592
+1565
-17
7 changed files
expand all
collapse all
unified
split
cmd
plcbundle
detector.go
main.go
progress.go
detector
builtin.go
detector.go
registry.go
runner.go
+629
cmd/plcbundle/detector.go
···
1
1
+
// cmd/plcbundle/detector.go
2
2
+
package main
3
3
+
4
4
+
import (
5
5
+
"bufio"
6
6
+
"context"
7
7
+
"encoding/json"
8
8
+
"flag"
9
9
+
"fmt"
10
10
+
"os"
11
11
+
"sort"
12
12
+
"strings"
13
13
+
"time"
14
14
+
15
15
+
"tangled.org/atscan.net/plcbundle/detector"
16
16
+
"tangled.org/atscan.net/plcbundle/plc"
17
17
+
)
18
18
+
19
19
+
type defaultLogger struct{}
20
20
+
21
21
+
func (d *defaultLogger) Printf(format string, v ...interface{}) {
22
22
+
fmt.Fprintf(os.Stderr, format+"\n", v...)
23
23
+
}
24
24
+
25
25
+
func cmdDetector() {
26
26
+
if len(os.Args) < 3 {
27
27
+
printDetectorUsage()
28
28
+
os.Exit(1)
29
29
+
}
30
30
+
31
31
+
subcommand := os.Args[2]
32
32
+
33
33
+
switch subcommand {
34
34
+
case "list":
35
35
+
cmdDetectorList()
36
36
+
case "test":
37
37
+
cmdDetectorTest()
38
38
+
case "run":
39
39
+
cmdDetectorRun()
40
40
+
case "filter": // ← Add this
41
41
+
cmdDetectorFilter()
42
42
+
case "info":
43
43
+
cmdDetectorInfo()
44
44
+
default:
45
45
+
fmt.Fprintf(os.Stderr, "Unknown detector subcommand: %s\n", subcommand)
46
46
+
printDetectorUsage()
47
47
+
os.Exit(1)
48
48
+
}
49
49
+
}
50
50
+
51
51
+
func printDetectorUsage() {
52
52
+
fmt.Printf(`Usage: plcbundle detector <command> [options]
53
53
+
54
54
+
Commands:
55
55
+
list List available detectors
56
56
+
test Test a detector on specific bundles
57
57
+
run Run detector and output CSV results
58
58
+
filter Filter JSONL operations from stdin
59
59
+
info Show detailed detector information
60
60
+
61
61
+
Examples:
62
62
+
plcbundle detector list
63
63
+
plcbundle detector test nostr --bundle 42
64
64
+
plcbundle detector run all --bundles 1-100 > results.csv
65
65
+
plcbundle backfill | plcbundle detector filter all > filtered.jsonl
66
66
+
plcbundle detector info nostr
67
67
+
`)
68
68
+
}
69
69
+
70
70
+
// cmdDetectorFilter reads JSONL from stdin, filters OUT spam, outputs clean operations
71
71
+
func cmdDetectorFilter() {
72
72
+
if len(os.Args) < 4 {
73
73
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector filter <detector1> [detector2...] [--confidence 0.9]\n")
74
74
+
fmt.Fprintf(os.Stderr, "\nFilters OUT operations that match detectors (outputs clean data)\n\n")
75
75
+
fmt.Fprintf(os.Stderr, "Examples:\n")
76
76
+
fmt.Fprintf(os.Stderr, " plcbundle backfill | plcbundle detector filter all > clean.jsonl\n")
77
77
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundle 1 | plcbundle detector filter invalid_handle > clean.jsonl\n")
78
78
+
os.Exit(1)
79
79
+
}
80
80
+
81
81
+
// Manually separate detector names from flags
82
82
+
var detectorNames []string
83
83
+
var flagArgs []string
84
84
+
85
85
+
for i := 3; i < len(os.Args); i++ {
86
86
+
arg := os.Args[i]
87
87
+
if strings.HasPrefix(arg, "-") {
88
88
+
flagArgs = os.Args[i:]
89
89
+
break
90
90
+
}
91
91
+
detectorNames = append(detectorNames, arg)
92
92
+
}
93
93
+
94
94
+
if len(detectorNames) == 0 {
95
95
+
fmt.Fprintf(os.Stderr, "Error: at least one detector name required\n")
96
96
+
os.Exit(1)
97
97
+
}
98
98
+
99
99
+
// Parse flags
100
100
+
fs := flag.NewFlagSet("detector filter", flag.ExitOnError)
101
101
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence")
102
102
+
fs.Parse(flagArgs)
103
103
+
104
104
+
// Setup registry
105
105
+
registry := detector.DefaultRegistry()
106
106
+
107
107
+
// Handle "all" keyword
108
108
+
if len(detectorNames) == 1 && detectorNames[0] == "all" {
109
109
+
detectorNames = registry.Names()
110
110
+
fmt.Fprintf(os.Stderr, "Using all detectors: %s\n", strings.Join(detectorNames, ", "))
111
111
+
}
112
112
+
113
113
+
// Get all detectors
114
114
+
detectors := make([]detector.Detector, 0, len(detectorNames))
115
115
+
for _, name := range detectorNames {
116
116
+
d, err := registry.Get(name)
117
117
+
if err != nil {
118
118
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
119
119
+
os.Exit(1)
120
120
+
}
121
121
+
detectors = append(detectors, d)
122
122
+
}
123
123
+
124
124
+
// Log to stderr
125
125
+
fmt.Fprintf(os.Stderr, "Filtering OUT spam with %d detector(s)\n", len(detectorNames))
126
126
+
if len(detectorNames) <= 5 {
127
127
+
fmt.Fprintf(os.Stderr, "Detectors: %s\n", strings.Join(detectorNames, ", "))
128
128
+
}
129
129
+
fmt.Fprintf(os.Stderr, "Min confidence: %.2f\n\n", *confidence)
130
130
+
131
131
+
ctx := context.Background()
132
132
+
scanner := bufio.NewScanner(os.Stdin)
133
133
+
134
134
+
// Set large buffer for long lines
135
135
+
buf := make([]byte, 0, 64*1024)
136
136
+
scanner.Buffer(buf, 1024*1024)
137
137
+
138
138
+
cleanCount := 0
139
139
+
filteredCount := 0
140
140
+
totalCount := 0
141
141
+
totalBytes := int64(0) // ← Add total bytes
142
142
+
filteredBytes := int64(0) // ← Add filtered bytes
143
143
+
144
144
+
// Read JSONL from stdin
145
145
+
for scanner.Scan() {
146
146
+
line := scanner.Bytes()
147
147
+
if len(line) == 0 {
148
148
+
continue
149
149
+
}
150
150
+
151
151
+
totalCount++
152
152
+
opSize := int64(len(line))
153
153
+
totalBytes += opSize // ← Track total
154
154
+
155
155
+
// Parse operation
156
156
+
var op plc.PLCOperation
157
157
+
if err := json.Unmarshal(line, &op); err != nil {
158
158
+
fmt.Fprintf(os.Stderr, "Warning: failed to parse line %d: %v\n", totalCount, err)
159
159
+
continue
160
160
+
}
161
161
+
162
162
+
// Run all detectors on this operation
163
163
+
isSpam := false
164
164
+
165
165
+
for _, det := range detectors {
166
166
+
match, err := det.Detect(ctx, op)
167
167
+
if err != nil {
168
168
+
continue
169
169
+
}
170
170
+
171
171
+
if match != nil && match.Confidence >= *confidence {
172
172
+
// Detected as spam - filter it out
173
173
+
isSpam = true
174
174
+
break
175
175
+
}
176
176
+
}
177
177
+
178
178
+
// Output only if NOT spam (clean operation)
179
179
+
if !isSpam {
180
180
+
cleanCount++
181
181
+
fmt.Println(string(line))
182
182
+
} else {
183
183
+
filteredCount++
184
184
+
filteredBytes += opSize // ← Track filtered bytes
185
185
+
}
186
186
+
187
187
+
// Progress to stderr
188
188
+
if totalCount%1000 == 0 {
189
189
+
fmt.Fprintf(os.Stderr, "Processed: %d | Clean: %d | Filtered: %d | Saved: %s\r",
190
190
+
totalCount, cleanCount, filteredCount, formatBytes(filteredBytes))
191
191
+
}
192
192
+
}
193
193
+
194
194
+
if err := scanner.Err(); err != nil {
195
195
+
fmt.Fprintf(os.Stderr, "\nError reading stdin: %v\n", err)
196
196
+
os.Exit(1)
197
197
+
}
198
198
+
199
199
+
// Final stats to stderr
200
200
+
fmt.Fprintf(os.Stderr, "\n\n")
201
201
+
fmt.Fprintf(os.Stderr, "✓ Filter complete\n")
202
202
+
fmt.Fprintf(os.Stderr, " Total operations: %d\n", totalCount)
203
203
+
fmt.Fprintf(os.Stderr, " Clean: %d (%.2f%%)\n", cleanCount, float64(cleanCount)/float64(totalCount)*100)
204
204
+
fmt.Fprintf(os.Stderr, " Filtered out: %d (%.2f%%)\n", filteredCount, float64(filteredCount)/float64(totalCount)*100)
205
205
+
fmt.Fprintf(os.Stderr, " Total size: %s\n", formatBytes(totalBytes))
206
206
+
fmt.Fprintf(os.Stderr, " Filtered size: %s (%.2f%%)\n", formatBytes(filteredBytes), float64(filteredBytes)/float64(totalBytes)*100)
207
207
+
fmt.Fprintf(os.Stderr, " Clean size: %s (%.2f%%)\n", formatBytes(totalBytes-filteredBytes), float64(totalBytes-filteredBytes)/float64(totalBytes)*100)
208
208
+
fmt.Fprintf(os.Stderr, " Detectors used: %d\n", len(detectorNames))
209
209
+
}
210
210
+
211
211
+
func cmdDetectorList() {
212
212
+
registry := detector.DefaultRegistry()
213
213
+
detectors := registry.List()
214
214
+
215
215
+
// Sort by name
216
216
+
sort.Slice(detectors, func(i, j int) bool {
217
217
+
return detectors[i].Name() < detectors[j].Name()
218
218
+
})
219
219
+
220
220
+
fmt.Printf("Available detectors:\n\n")
221
221
+
for _, d := range detectors {
222
222
+
fmt.Printf(" %-20s %s (v%s)\n", d.Name(), d.Description(), d.Version())
223
223
+
}
224
224
+
fmt.Printf("\nUse 'plcbundle detector info <name>' for details\n")
225
225
+
}
226
226
+
227
227
+
func cmdDetectorTest() {
228
228
+
// Extract detector name first
229
229
+
if len(os.Args) < 4 {
230
230
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector test <detector-name> --bundle N\n")
231
231
+
os.Exit(1)
232
232
+
}
233
233
+
234
234
+
detectorName := os.Args[3]
235
235
+
236
236
+
// Parse flags from os.Args[4:]
237
237
+
fs := flag.NewFlagSet("detector test", flag.ExitOnError)
238
238
+
bundleNum := fs.Int("bundle", 0, "bundle number to test")
239
239
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence threshold")
240
240
+
verbose := fs.Bool("v", false, "verbose output")
241
241
+
fs.Parse(os.Args[4:]) // ← Changed from os.Args[3:]
242
242
+
243
243
+
if *bundleNum == 0 {
244
244
+
fmt.Fprintf(os.Stderr, "Error: --bundle required\n")
245
245
+
os.Exit(1)
246
246
+
}
247
247
+
248
248
+
// Load bundle
249
249
+
mgr, _, err := getManager("")
250
250
+
if err != nil {
251
251
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
252
252
+
os.Exit(1)
253
253
+
}
254
254
+
defer mgr.Close()
255
255
+
256
256
+
ctx := context.Background()
257
257
+
bundle, err := mgr.LoadBundle(ctx, *bundleNum)
258
258
+
if err != nil {
259
259
+
fmt.Fprintf(os.Stderr, "Error loading bundle: %v\n", err)
260
260
+
os.Exit(1)
261
261
+
}
262
262
+
263
263
+
fmt.Printf("Testing detector '%s' on bundle %06d...\n", detectorName, *bundleNum)
264
264
+
fmt.Printf("Min confidence: %.2f\n\n", *confidence)
265
265
+
266
266
+
// Run detector
267
267
+
registry := detector.DefaultRegistry()
268
268
+
config := detector.DefaultConfig()
269
269
+
config.MinConfidence = *confidence
270
270
+
271
271
+
runner := detector.NewRunner(registry, config, &defaultLogger{})
272
272
+
results, err := runner.RunOnBundle(ctx, detectorName, bundle)
273
273
+
if err != nil {
274
274
+
fmt.Fprintf(os.Stderr, "Detection failed: %v\n", err)
275
275
+
os.Exit(1)
276
276
+
}
277
277
+
278
278
+
// Calculate stats
279
279
+
stats := detector.CalculateStats(results, len(bundle.Operations))
280
280
+
281
281
+
// Display results
282
282
+
fmt.Printf("Results:\n")
283
283
+
fmt.Printf(" Total operations: %d\n", stats.TotalOperations)
284
284
+
fmt.Printf(" Matches found: %d (%.2f%%)\n", stats.MatchedCount, stats.MatchRate*100)
285
285
+
fmt.Printf("\n")
286
286
+
287
287
+
if len(stats.ByReason) > 0 {
288
288
+
fmt.Printf("Breakdown by reason:\n")
289
289
+
for reason, count := range stats.ByReason {
290
290
+
pct := float64(count) / float64(stats.MatchedCount) * 100
291
291
+
fmt.Printf(" %-25s %d (%.1f%%)\n", reason, count, pct)
292
292
+
}
293
293
+
fmt.Printf("\n")
294
294
+
}
295
295
+
296
296
+
if len(stats.ByCategory) > 0 {
297
297
+
fmt.Printf("Breakdown by category:\n")
298
298
+
for category, count := range stats.ByCategory {
299
299
+
pct := float64(count) / float64(stats.MatchedCount) * 100
300
300
+
fmt.Printf(" %-25s %d (%.1f%%)\n", category, count, pct)
301
301
+
}
302
302
+
fmt.Printf("\n")
303
303
+
}
304
304
+
305
305
+
if len(stats.ByConfidence) > 0 {
306
306
+
fmt.Printf("Confidence distribution:\n")
307
307
+
for bucket, count := range stats.ByConfidence {
308
308
+
pct := float64(count) / float64(stats.MatchedCount) * 100
309
309
+
fmt.Printf(" %-25s %d (%.1f%%)\n", bucket, count, pct)
310
310
+
}
311
311
+
fmt.Printf("\n")
312
312
+
}
313
313
+
314
314
+
if *verbose && len(results) > 0 {
315
315
+
fmt.Printf("Sample matches (first 10):\n")
316
316
+
displayCount := 10
317
317
+
if len(results) < displayCount {
318
318
+
displayCount = len(results)
319
319
+
}
320
320
+
321
321
+
for i := 0; i < displayCount; i++ {
322
322
+
res := results[i]
323
323
+
fmt.Printf(" %d. Position %d: %s\n", i+1, res.Position, res.DID)
324
324
+
fmt.Printf(" Reason: %s (confidence: %.2f)\n", res.Match.Reason, res.Match.Confidence)
325
325
+
if res.Match.Note != "" {
326
326
+
fmt.Printf(" Note: %s\n", res.Match.Note)
327
327
+
}
328
328
+
}
329
329
+
330
330
+
if len(results) > displayCount {
331
331
+
fmt.Printf(" ... and %d more\n", len(results)-displayCount)
332
332
+
}
333
333
+
}
334
334
+
}
335
335
+
336
336
+
func cmdDetectorRun() {
337
337
+
if len(os.Args) < 4 {
338
338
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector run <detector1> [detector2...] --bundles 1-100\n")
339
339
+
fmt.Fprintf(os.Stderr, "\nUse 'all' to run all available detectors\n")
340
340
+
os.Exit(1)
341
341
+
}
342
342
+
343
343
+
// Manually separate detector names from flags
344
344
+
var detectorNames []string
345
345
+
var flagArgs []string
346
346
+
347
347
+
for i := 3; i < len(os.Args); i++ {
348
348
+
arg := os.Args[i]
349
349
+
if strings.HasPrefix(arg, "-") {
350
350
+
// This and all remaining are flags
351
351
+
flagArgs = os.Args[i:]
352
352
+
break
353
353
+
}
354
354
+
// Detector name
355
355
+
detectorNames = append(detectorNames, arg)
356
356
+
}
357
357
+
358
358
+
if len(detectorNames) == 0 {
359
359
+
fmt.Fprintf(os.Stderr, "Error: at least one detector name required\n")
360
360
+
fmt.Fprintf(os.Stderr, "\nExamples:\n")
361
361
+
fmt.Fprintf(os.Stderr, " plcbundle detector run invalid_handle --bundles 1-100\n")
362
362
+
fmt.Fprintf(os.Stderr, " plcbundle detector run invalid_handle aka_spam --bundles 1-100\n")
363
363
+
fmt.Fprintf(os.Stderr, " plcbundle detector run all --bundles 1-100\n")
364
364
+
os.Exit(1)
365
365
+
}
366
366
+
367
367
+
// Parse flags
368
368
+
fs := flag.NewFlagSet("detector run", flag.ExitOnError)
369
369
+
bundleRange := fs.String("bundles", "", "bundle range (e.g., '1-100')")
370
370
+
confidence := fs.Float64("confidence", 0.90, "minimum confidence")
371
371
+
fs.Parse(flagArgs)
372
372
+
373
373
+
if *bundleRange == "" {
374
374
+
fmt.Fprintf(os.Stderr, "Error: --bundles required\n")
375
375
+
os.Exit(1)
376
376
+
}
377
377
+
378
378
+
// Parse bundle range
379
379
+
start, end, err := parseBundleRange(*bundleRange)
380
380
+
if err != nil {
381
381
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
382
382
+
os.Exit(1)
383
383
+
}
384
384
+
385
385
+
// Load manager
386
386
+
mgr, _, err := getManager("")
387
387
+
if err != nil {
388
388
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
389
389
+
os.Exit(1)
390
390
+
}
391
391
+
defer mgr.Close()
392
392
+
393
393
+
// Setup registry
394
394
+
registry := detector.DefaultRegistry()
395
395
+
config := detector.DefaultConfig()
396
396
+
config.MinConfidence = *confidence
397
397
+
398
398
+
// Handle "all" keyword - expand to all available detectors
399
399
+
if len(detectorNames) == 1 && detectorNames[0] == "all" {
400
400
+
detectorNames = registry.Names()
401
401
+
fmt.Fprintf(os.Stderr, "Using all available detectors: %s\n", strings.Join(detectorNames, ", "))
402
402
+
}
403
403
+
404
404
+
// Log to stderr
405
405
+
fmt.Fprintf(os.Stderr, "Running %d detector(s) on bundles %d-%d...\n", len(detectorNames), start, end)
406
406
+
if len(detectorNames) <= 5 {
407
407
+
fmt.Fprintf(os.Stderr, "Detectors: %s\n", strings.Join(detectorNames, ", "))
408
408
+
}
409
409
+
fmt.Fprintf(os.Stderr, "Min confidence: %.2f\n\n", *confidence)
410
410
+
411
411
+
// Get all detectors
412
412
+
detectors := make([]detector.Detector, 0, len(detectorNames))
413
413
+
for _, name := range detectorNames {
414
414
+
d, err := registry.Get(name)
415
415
+
if err != nil {
416
416
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
417
417
+
os.Exit(1)
418
418
+
}
419
419
+
detectors = append(detectors, d)
420
420
+
}
421
421
+
422
422
+
ctx := context.Background()
423
423
+
424
424
+
// Write CSV header to stdout
425
425
+
fmt.Println("bundle,position,cid,detectors,confidence,detected_at,size")
426
426
+
427
427
+
// Track statistics
428
428
+
totalOps := 0
429
429
+
matchCount := 0
430
430
+
totalBytes := int64(0)
431
431
+
matchedBytes := int64(0)
432
432
+
bundlesProcessed := 0
433
433
+
detectorMatchCounts := make(map[string]int)
434
434
+
435
435
+
totalBundles := end - start + 1
436
436
+
437
437
+
// Create progress bar with byte tracking enabled
438
438
+
fmt.Fprintf(os.Stderr, "Processing bundles:\n")
439
439
+
progress := NewProgressBar(totalBundles)
440
440
+
progress.showBytes = true // Enable byte tracking
441
441
+
442
442
+
// Process bundles and stream results
443
443
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
444
444
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
445
445
+
if err != nil {
446
446
+
// Don't update progress on error, just log
447
447
+
progress.Finish()
448
448
+
fmt.Fprintf(os.Stderr, "\n⚠️ Warning: failed to load bundle %d: %v\n", bundleNum, err)
449
449
+
progress = NewProgressBar(totalBundles)
450
450
+
progress.showBytes = true
451
451
+
progress.SetWithBytes(bundleNum-start, totalBytes)
452
452
+
continue
453
453
+
}
454
454
+
455
455
+
bundlesProcessed++
456
456
+
totalOps += len(bundle.Operations)
457
457
+
458
458
+
// Process each operation with all detectors
459
459
+
for position, op := range bundle.Operations {
460
460
+
// Calculate operation size first
461
461
+
var opSize int
462
462
+
if len(op.RawJSON) > 0 {
463
463
+
opSize = len(op.RawJSON)
464
464
+
} else {
465
465
+
// Fallback: marshal to get size
466
466
+
data, _ := json.Marshal(op)
467
467
+
opSize = len(data)
468
468
+
}
469
469
+
totalBytes += int64(opSize)
470
470
+
471
471
+
// Collect all matches for this operation
472
472
+
var matchedDetectors []string
473
473
+
var maxConfidence float64
474
474
+
var detectedAt time.Time
475
475
+
476
476
+
// Run all detectors on this operation
477
477
+
for _, det := range detectors {
478
478
+
match, err := det.Detect(ctx, op)
479
479
+
if err != nil {
480
480
+
continue
481
481
+
}
482
482
+
483
483
+
// Skip if no match or confidence too low
484
484
+
if match == nil || match.Confidence < *confidence {
485
485
+
continue
486
486
+
}
487
487
+
488
488
+
// Collect detector name
489
489
+
matchedDetectors = append(matchedDetectors, det.Name())
490
490
+
detectorMatchCounts[det.Name()]++
491
491
+
492
492
+
// Track highest confidence
493
493
+
if match.Confidence > maxConfidence {
494
494
+
maxConfidence = match.Confidence
495
495
+
}
496
496
+
497
497
+
// Use current time for first match
498
498
+
if detectedAt.IsZero() {
499
499
+
detectedAt = time.Now()
500
500
+
}
501
501
+
}
502
502
+
503
503
+
// Output only if at least one detector matched
504
504
+
if len(matchedDetectors) > 0 {
505
505
+
matchCount++
506
506
+
matchedBytes += int64(opSize)
507
507
+
508
508
+
fmt.Printf("%d,%d,%s,%s,%.2f,%s,%d\n",
509
509
+
bundleNum,
510
510
+
position,
511
511
+
op.CID,
512
512
+
strings.Join(matchedDetectors, ";"),
513
513
+
maxConfidence,
514
514
+
detectedAt.Format("2006-01-02T15:04:05Z"),
515
515
+
opSize,
516
516
+
)
517
517
+
}
518
518
+
}
519
519
+
520
520
+
// Update progress with bytes
521
521
+
progress.SetWithBytes(bundleNum-start+1, totalBytes)
522
522
+
}
523
523
+
524
524
+
// Finish progress bar
525
525
+
progress.Finish()
526
526
+
527
527
+
// Final stats to stderr
528
528
+
fmt.Fprintf(os.Stderr, "\n")
529
529
+
fmt.Fprintf(os.Stderr, "✓ Detection complete\n")
530
530
+
fmt.Fprintf(os.Stderr, " Bundles processed: %d\n", bundlesProcessed)
531
531
+
fmt.Fprintf(os.Stderr, " Total operations: %d\n", totalOps)
532
532
+
fmt.Fprintf(os.Stderr, " Matches found: %d (%.2f%%)\n", matchCount, float64(matchCount)/float64(totalOps)*100)
533
533
+
fmt.Fprintf(os.Stderr, " Clean operations: %d (%.2f%%)\n", totalOps-matchCount, float64(totalOps-matchCount)/float64(totalOps)*100)
534
534
+
fmt.Fprintf(os.Stderr, "\n")
535
535
+
fmt.Fprintf(os.Stderr, " Total size: %s\n", formatBytes(totalBytes))
536
536
+
fmt.Fprintf(os.Stderr, " Matched size: %s (%.2f%%)\n", formatBytes(matchedBytes), float64(matchedBytes)/float64(totalBytes)*100)
537
537
+
fmt.Fprintf(os.Stderr, " Clean size: %s (%.2f%%)\n", formatBytes(totalBytes-matchedBytes), float64(totalBytes-matchedBytes)/float64(totalBytes)*100)
538
538
+
539
539
+
if matchedBytes > 0 {
540
540
+
fmt.Fprintf(os.Stderr, "\n")
541
541
+
fmt.Fprintf(os.Stderr, " 💾 Potential savings if filtered: %s (%.2f%% reduction)\n",
542
542
+
formatBytes(matchedBytes),
543
543
+
float64(matchedBytes)/float64(totalBytes)*100)
544
544
+
}
545
545
+
546
546
+
fmt.Fprintf(os.Stderr, "\n")
547
547
+
fmt.Fprintf(os.Stderr, " Detectors used: %d\n", len(detectorNames))
548
548
+
549
549
+
// Show breakdown by detector if multiple used
550
550
+
if len(detectorNames) > 1 {
551
551
+
fmt.Fprintf(os.Stderr, "\n")
552
552
+
fmt.Fprintf(os.Stderr, " Matches by detector:\n")
553
553
+
for _, name := range detectorNames {
554
554
+
count := detectorMatchCounts[name]
555
555
+
if count > 0 {
556
556
+
pct := float64(count) / float64(matchCount) * 100
557
557
+
fmt.Fprintf(os.Stderr, " %-20s %d (%.1f%%)\n", name, count, pct)
558
558
+
} else {
559
559
+
fmt.Fprintf(os.Stderr, " %-20s 0\n", name)
560
560
+
}
561
561
+
}
562
562
+
}
563
563
+
}
564
564
+
565
565
+
func cmdDetectorInfo() {
566
566
+
if len(os.Args) < 4 {
567
567
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle detector info <name>\n")
568
568
+
os.Exit(1)
569
569
+
}
570
570
+
571
571
+
detectorName := os.Args[3]
572
572
+
573
573
+
registry := detector.DefaultRegistry()
574
574
+
d, err := registry.Get(detectorName)
575
575
+
if err != nil {
576
576
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
577
577
+
os.Exit(1)
578
578
+
}
579
579
+
580
580
+
fmt.Printf("Detector: %s\n", d.Name())
581
581
+
fmt.Printf("Version: %s\n", d.Version())
582
582
+
fmt.Printf("Description: %s\n", d.Description())
583
583
+
fmt.Printf("\n")
584
584
+
585
585
+
// Show example usage
586
586
+
fmt.Printf("Usage examples:\n")
587
587
+
fmt.Printf(" # Test on single bundle\n")
588
588
+
fmt.Printf(" plcbundle detector test %s --bundle 42\n\n", d.Name())
589
589
+
fmt.Printf(" # Run on range and save\n")
590
590
+
fmt.Printf(" plcbundle detector run %s --bundles 1-100 --output results.csv\n\n", d.Name())
591
591
+
fmt.Printf(" # Use with filter creation\n")
592
592
+
fmt.Printf(" plcbundle filter detect --detector %s --bundles 1-100\n", d.Name())
593
593
+
}
594
594
+
595
595
+
// Helper functions
596
596
+
597
597
+
func parseBundleRange(rangeStr string) (start, end int, err error) {
598
598
+
// Handle single bundle number
599
599
+
if !strings.Contains(rangeStr, "-") {
600
600
+
var num int
601
601
+
_, err = fmt.Sscanf(rangeStr, "%d", &num)
602
602
+
if err != nil {
603
603
+
return 0, 0, fmt.Errorf("invalid bundle number: %w", err)
604
604
+
}
605
605
+
return num, num, nil
606
606
+
}
607
607
+
608
608
+
// Handle range (e.g., "1-100")
609
609
+
parts := strings.Split(rangeStr, "-")
610
610
+
if len(parts) != 2 {
611
611
+
return 0, 0, fmt.Errorf("invalid range format (expected: N or start-end)")
612
612
+
}
613
613
+
614
614
+
_, err = fmt.Sscanf(parts[0], "%d", &start)
615
615
+
if err != nil {
616
616
+
return 0, 0, fmt.Errorf("invalid start: %w", err)
617
617
+
}
618
618
+
619
619
+
_, err = fmt.Sscanf(parts[1], "%d", &end)
620
620
+
if err != nil {
621
621
+
return 0, 0, fmt.Errorf("invalid end: %w", err)
622
622
+
}
623
623
+
624
624
+
if start > end {
625
625
+
return 0, 0, fmt.Errorf("start must be <= end")
626
626
+
}
627
627
+
628
628
+
return start, end, nil
629
629
+
}
+97
-12
cmd/plcbundle/main.go
···
2
2
3
3
import (
4
4
"context"
5
5
+
"encoding/json"
5
6
"flag"
6
7
"fmt"
7
8
"net/http"
···
82
83
cmdServe()
83
84
case "compare":
84
85
cmdCompare()
86
86
+
case "detector":
87
87
+
cmdDetector()
85
88
case "version":
86
89
fmt.Printf("plcbundle version %s\n", version)
87
90
fmt.Printf(" commit: %s\n", gitCommit)
···
110
113
mempool Show mempool status and operations
111
114
serve Start HTTP server to serve bundle data
112
115
compare Compare local index with target index
116
116
+
detector
113
117
version Show version
114
118
115
119
Security Model:
···
845
849
846
850
func cmdExport() {
847
851
fs := flag.NewFlagSet("export", flag.ExitOnError)
848
848
-
count := fs.Int("count", 1000, "number of operations to export")
852
852
+
bundles := fs.String("bundles", "", "bundle number or range (e.g., '42' or '1-100')")
853
853
+
all := fs.Bool("all", false, "export all bundles")
854
854
+
count := fs.Int("count", 0, "limit number of operations (0 = all)")
849
855
after := fs.String("after", "", "timestamp to start after (RFC3339)")
850
856
fs.Parse(os.Args[2:])
851
857
858
858
+
// Validate flags
859
859
+
if !*all && *bundles == "" {
860
860
+
fmt.Fprintf(os.Stderr, "Usage: plcbundle export --bundles <number|range> [options]\n")
861
861
+
fmt.Fprintf(os.Stderr, " or: plcbundle export --all [options]\n")
862
862
+
fmt.Fprintf(os.Stderr, "\nExamples:\n")
863
863
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 42\n")
864
864
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 1-100\n")
865
865
+
fmt.Fprintf(os.Stderr, " plcbundle export --all\n")
866
866
+
fmt.Fprintf(os.Stderr, " plcbundle export --all --count 50000\n")
867
867
+
fmt.Fprintf(os.Stderr, " plcbundle export --bundles 42 | jq .\n")
868
868
+
os.Exit(1)
869
869
+
}
870
870
+
871
871
+
// Load manager
852
872
mgr, _, err := getManager("")
853
873
if err != nil {
854
874
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
···
856
876
}
857
877
defer mgr.Close()
858
878
859
859
-
// Parse after time
879
879
+
// Determine bundle range
880
880
+
var start, end int
881
881
+
if *all {
882
882
+
// Export all bundles
883
883
+
index := mgr.GetIndex()
884
884
+
bundles := index.GetBundles()
885
885
+
if len(bundles) == 0 {
886
886
+
fmt.Fprintf(os.Stderr, "No bundles available\n")
887
887
+
os.Exit(1)
888
888
+
}
889
889
+
start = bundles[0].BundleNumber
890
890
+
end = bundles[len(bundles)-1].BundleNumber
891
891
+
892
892
+
fmt.Fprintf(os.Stderr, "Exporting all bundles (%d-%d)\n", start, end)
893
893
+
} else {
894
894
+
// Parse bundle range
895
895
+
start, end, err = parseBundleRange(*bundles)
896
896
+
if err != nil {
897
897
+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
898
898
+
os.Exit(1)
899
899
+
}
900
900
+
fmt.Fprintf(os.Stderr, "Exporting bundles %d-%d\n", start, end)
901
901
+
}
902
902
+
903
903
+
// Log to stderr
904
904
+
if *count > 0 {
905
905
+
fmt.Fprintf(os.Stderr, "Limit: %d operations\n", *count)
906
906
+
}
907
907
+
if *after != "" {
908
908
+
fmt.Fprintf(os.Stderr, "After: %s\n", *after)
909
909
+
}
910
910
+
fmt.Fprintf(os.Stderr, "\n")
911
911
+
912
912
+
// Parse after time if provided
860
913
var afterTime time.Time
861
914
if *after != "" {
862
915
afterTime, err = time.Parse(time.RFC3339, *after)
···
867
920
}
868
921
869
922
ctx := context.Background()
870
870
-
ops, err := mgr.ExportOperations(ctx, afterTime, *count)
871
871
-
if err != nil {
872
872
-
fmt.Fprintf(os.Stderr, "Export failed: %v\n", err)
873
873
-
os.Exit(1)
874
874
-
}
923
923
+
exported := 0
875
924
876
876
-
// Output as JSONL
877
877
-
for _, op := range ops {
878
878
-
if len(op.RawJSON) > 0 {
879
879
-
fmt.Println(string(op.RawJSON))
925
925
+
// Export operations from bundles
926
926
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
927
927
+
// Check if we've reached the limit
928
928
+
if *count > 0 && exported >= *count {
929
929
+
break
930
930
+
}
931
931
+
932
932
+
fmt.Fprintf(os.Stderr, "Processing bundle %d...\r", bundleNum)
933
933
+
934
934
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
935
935
+
if err != nil {
936
936
+
fmt.Fprintf(os.Stderr, "\nWarning: failed to load bundle %d: %v\n", bundleNum, err)
937
937
+
continue
938
938
+
}
939
939
+
940
940
+
// Output operations
941
941
+
for _, op := range bundle.Operations {
942
942
+
// Check after time filter
943
943
+
if !afterTime.IsZero() && op.CreatedAt.Before(afterTime) {
944
944
+
continue
945
945
+
}
946
946
+
947
947
+
// Check count limit
948
948
+
if *count > 0 && exported >= *count {
949
949
+
break
950
950
+
}
951
951
+
952
952
+
// Output operation as JSONL
953
953
+
if len(op.RawJSON) > 0 {
954
954
+
fmt.Println(string(op.RawJSON))
955
955
+
} else {
956
956
+
// Fallback to marshaling
957
957
+
data, _ := json.Marshal(op)
958
958
+
fmt.Println(string(data))
959
959
+
}
960
960
+
961
961
+
exported++
880
962
}
881
963
}
882
964
883
883
-
fmt.Fprintf(os.Stderr, "Exported %d operations\n", len(ops))
965
965
+
// Final stats to stderr
966
966
+
fmt.Fprintf(os.Stderr, "\n\n")
967
967
+
fmt.Fprintf(os.Stderr, "✓ Export complete\n")
968
968
+
fmt.Fprintf(os.Stderr, " Exported: %d operations\n", exported)
884
969
}
885
970
886
971
func cmdBackfill() {
+6
-5
cmd/plcbundle/progress.go
···
2
2
3
3
import (
4
4
"fmt"
5
5
+
"os"
5
6
"strings"
6
7
"sync"
7
8
"time"
···
80
81
pb.current = pb.total
81
82
pb.currentBytes = pb.totalBytes
82
83
pb.print()
83
83
-
fmt.Println() // New line after completion
84
84
+
fmt.Fprintf(os.Stderr, "\n") // ← FIXED: Use stderr
84
85
}
85
86
86
87
// print renders the progress bar (must be called with lock held)
···
113
114
eta = time.Duration(float64(remaining)/speed) * time.Second
114
115
}
115
116
116
116
-
// Print progress bar
117
117
-
if pb.showBytes && pb.totalBytes > 0 {
117
117
+
// Show MB/s if bytes are being tracked (changed condition)
118
118
+
if pb.showBytes && pb.currentBytes > 0 {
118
119
// Calculate MB/s (using decimal units: 1 MB = 1,000,000 bytes)
119
120
mbProcessed := float64(pb.currentBytes) / (1000 * 1000)
120
121
mbPerSec := mbProcessed / elapsed.Seconds()
121
122
122
122
-
fmt.Printf("\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | %.1f MB/s | ETA: %s ",
123
123
+
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | %.1f MB/s | ETA: %s ",
123
124
bar,
124
125
percent,
125
126
pb.current,
···
128
129
mbPerSec,
129
130
formatETA(eta))
130
131
} else {
131
131
-
fmt.Printf("\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | ETA: %s ",
132
132
+
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d bundles | %.1f/s | ETA: %s ",
132
133
bar,
133
134
percent,
134
135
pb.current,
+467
detector/builtin.go
···
1
1
+
// detector/builtin.go
2
2
+
package detector
3
3
+
4
4
+
import (
5
5
+
"context"
6
6
+
"regexp"
7
7
+
"strings"
8
8
+
9
9
+
"tangled.org/atscan.net/plcbundle/plc"
10
10
+
)
11
11
+
12
12
+
// InvalidHandleDetector detects operations with invalid handle patterns
13
13
+
type InvalidHandleDetector struct {
14
14
+
// Valid handle regex: lowercase letters, numbers, hyphens, dots only
15
15
+
validHandlePattern *regexp.Regexp
16
16
+
}
17
17
+
18
18
+
func NewInvalidHandleDetector() *InvalidHandleDetector {
19
19
+
return &InvalidHandleDetector{
20
20
+
// Valid handle: alphanumeric, hyphens, dots (no underscores!)
21
21
+
validHandlePattern: regexp.MustCompile(`^at://[a-z0-9][a-z0-9-]*(\.[a-z0-9][a-z0-9-]*)*\.[a-z]+$`),
22
22
+
}
23
23
+
}
24
24
+
25
25
+
func (d *InvalidHandleDetector) Name() string { return "invalid_handle" }
26
26
+
func (d *InvalidHandleDetector) Description() string {
27
27
+
return "Detects operations with invalid handle patterns (underscores, invalid chars)"
28
28
+
}
29
29
+
func (d *InvalidHandleDetector) Version() string { return "1.0.0" }
30
30
+
31
31
+
func (d *InvalidHandleDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
32
32
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
33
33
+
for _, aka := range alsoKnownAs {
34
34
+
if str, ok := aka.(string); ok {
35
35
+
// Check if it's an at:// handle
36
36
+
if !strings.HasPrefix(str, "at://") {
37
37
+
continue
38
38
+
}
39
39
+
40
40
+
// Check for underscore (invalid in Bluesky handles)
41
41
+
if strings.Contains(str, "_") {
42
42
+
return &Match{
43
43
+
Reason: "underscore_in_handle",
44
44
+
Category: "invalid_handle",
45
45
+
Confidence: 0.99,
46
46
+
Note: "Handle contains underscore which is invalid in Bluesky",
47
47
+
Metadata: map[string]interface{}{
48
48
+
"invalid_handle": str,
49
49
+
"violation": "underscore_character",
50
50
+
},
51
51
+
}, nil
52
52
+
}
53
53
+
54
54
+
// Check if handle matches valid pattern
55
55
+
if !d.validHandlePattern.MatchString(str) {
56
56
+
return &Match{
57
57
+
Reason: "invalid_handle_pattern",
58
58
+
Category: "invalid_handle",
59
59
+
Confidence: 0.95,
60
60
+
Note: "Handle does not match valid Bluesky handle pattern",
61
61
+
Metadata: map[string]interface{}{
62
62
+
"invalid_handle": str,
63
63
+
"violation": "pattern_mismatch",
64
64
+
},
65
65
+
}, nil
66
66
+
}
67
67
+
}
68
68
+
}
69
69
+
}
70
70
+
71
71
+
return nil, nil
72
72
+
}
73
73
+
74
74
+
// AlsoKnownAsSpamDetector detects excessive/garbage alsoKnownAs entries
75
75
+
type AlsoKnownAsSpamDetector struct {
76
76
+
maxLegitimateEntries int
77
77
+
minGarbageLength int
78
78
+
}
79
79
+
80
80
+
func NewAlsoKnownAsSpamDetector() *AlsoKnownAsSpamDetector {
81
81
+
return &AlsoKnownAsSpamDetector{
82
82
+
maxLegitimateEntries: 3, // Normal operations have 1-3 entries
83
83
+
minGarbageLength: 100, // Garbage strings are very long
84
84
+
}
85
85
+
}
86
86
+
87
87
+
func (d *AlsoKnownAsSpamDetector) Name() string { return "aka_spam" }
88
88
+
func (d *AlsoKnownAsSpamDetector) Description() string {
89
89
+
return "Detects spam through excessive or garbage alsoKnownAs entries"
90
90
+
}
91
91
+
func (d *AlsoKnownAsSpamDetector) Version() string { return "1.0.0" }
92
92
+
93
93
+
func (d *AlsoKnownAsSpamDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
94
94
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
95
95
+
entryCount := len(alsoKnownAs)
96
96
+
97
97
+
// Count different types of entries
98
98
+
atURICount := 0
99
99
+
garbageCount := 0
100
100
+
var garbageExamples []string
101
101
+
102
102
+
for _, aka := range alsoKnownAs {
103
103
+
if str, ok := aka.(string); ok {
104
104
+
if strings.HasPrefix(str, "at://") {
105
105
+
atURICount++
106
106
+
} else if len(str) > d.minGarbageLength {
107
107
+
garbageCount++
108
108
+
if len(garbageExamples) < 2 {
109
109
+
// Store first few for evidence
110
110
+
preview := str
111
111
+
if len(preview) > 50 {
112
112
+
preview = preview[:50] + "..."
113
113
+
}
114
114
+
garbageExamples = append(garbageExamples, preview)
115
115
+
}
116
116
+
}
117
117
+
}
118
118
+
}
119
119
+
120
120
+
// Detection: Excessive entries
121
121
+
if entryCount > d.maxLegitimateEntries {
122
122
+
confidence := 0.80
123
123
+
if garbageCount > 0 {
124
124
+
confidence = 0.95 // Higher confidence if garbage detected
125
125
+
}
126
126
+
127
127
+
return &Match{
128
128
+
Reason: "excessive_aka_entries",
129
129
+
Category: "spam",
130
130
+
Confidence: confidence,
131
131
+
Note: "Operation has excessive alsoKnownAs entries",
132
132
+
Metadata: map[string]interface{}{
133
133
+
"total_entries": entryCount,
134
134
+
"at_uri_count": atURICount,
135
135
+
"garbage_count": garbageCount,
136
136
+
"garbage_examples": garbageExamples,
137
137
+
},
138
138
+
}, nil
139
139
+
}
140
140
+
141
141
+
// Detection: Garbage entries present (even if count is low)
142
142
+
if garbageCount > 0 {
143
143
+
return &Match{
144
144
+
Reason: "garbage_aka_entries",
145
145
+
Category: "spam",
146
146
+
Confidence: 0.98,
147
147
+
Note: "Operation contains garbage/random strings in alsoKnownAs",
148
148
+
Metadata: map[string]interface{}{
149
149
+
"total_entries": entryCount,
150
150
+
"garbage_count": garbageCount,
151
151
+
"garbage_examples": garbageExamples,
152
152
+
},
153
153
+
}, nil
154
154
+
}
155
155
+
}
156
156
+
157
157
+
return nil, nil
158
158
+
}
159
159
+
160
160
+
// CompositeSpamDetector combines multiple signals for higher confidence
161
161
+
type CompositeSpamDetector struct {
162
162
+
invalidHandle *InvalidHandleDetector
163
163
+
akaSpam *AlsoKnownAsSpamDetector
164
164
+
}
165
165
+
166
166
+
func NewCompositeSpamDetector() *CompositeSpamDetector {
167
167
+
return &CompositeSpamDetector{
168
168
+
invalidHandle: NewInvalidHandleDetector(),
169
169
+
akaSpam: NewAlsoKnownAsSpamDetector(),
170
170
+
}
171
171
+
}
172
172
+
173
173
+
func (d *CompositeSpamDetector) Name() string { return "composite_spam" }
174
174
+
func (d *CompositeSpamDetector) Description() string {
175
175
+
return "Combines multiple spam signals for high-confidence detection"
176
176
+
}
177
177
+
func (d *CompositeSpamDetector) Version() string { return "1.0.0" }
178
178
+
179
179
+
func (d *CompositeSpamDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
180
180
+
// Check both detectors
181
181
+
invalidHandleMatch, _ := d.invalidHandle.Detect(ctx, op)
182
182
+
akaSpamMatch, _ := d.akaSpam.Detect(ctx, op)
183
183
+
184
184
+
// If both match, very high confidence
185
185
+
if invalidHandleMatch != nil && akaSpamMatch != nil {
186
186
+
return &Match{
187
187
+
Reason: "multiple_spam_indicators",
188
188
+
Category: "spam",
189
189
+
Confidence: 0.99,
190
190
+
Note: "Operation has both invalid handle and excessive alsoKnownAs entries",
191
191
+
Metadata: map[string]interface{}{
192
192
+
"invalid_handle_reason": invalidHandleMatch.Reason,
193
193
+
"aka_spam_reason": akaSpamMatch.Reason,
194
194
+
"invalid_handle_data": invalidHandleMatch.Metadata,
195
195
+
"aka_spam_data": akaSpamMatch.Metadata,
196
196
+
},
197
197
+
}, nil
198
198
+
}
199
199
+
200
200
+
// Return whichever matched
201
201
+
if invalidHandleMatch != nil {
202
202
+
return invalidHandleMatch, nil
203
203
+
}
204
204
+
if akaSpamMatch != nil {
205
205
+
return akaSpamMatch, nil
206
206
+
}
207
207
+
208
208
+
return nil, nil
209
209
+
}
210
210
+
211
211
+
// SpamPDSDetector detects known spam PDS endpoints
212
212
+
type SpamPDSDetector struct {
213
213
+
spamEndpoints map[string]bool
214
214
+
spamDomains map[string]bool
215
215
+
}
216
216
+
217
217
+
func NewSpamPDSDetector() *SpamPDSDetector {
218
218
+
return &SpamPDSDetector{
219
219
+
spamEndpoints: map[string]bool{
220
220
+
"pds.trump.com": true,
221
221
+
// Add more as discovered
222
222
+
},
223
223
+
spamDomains: map[string]bool{
224
224
+
"trump.com": true,
225
225
+
"donald.trump.com": true,
226
226
+
// Add more as discovered
227
227
+
},
228
228
+
}
229
229
+
}
230
230
+
231
231
+
func (d *SpamPDSDetector) Name() string { return "spam_pds" }
232
232
+
func (d *SpamPDSDetector) Description() string {
233
233
+
return "Detects operations using known spam PDS endpoints and fake domain claims"
234
234
+
}
235
235
+
func (d *SpamPDSDetector) Version() string { return "1.0.0" }
236
236
+
237
237
+
func (d *SpamPDSDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
238
238
+
// Check PDS endpoint
239
239
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
240
240
+
if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
241
241
+
if endpoint, ok := pds["endpoint"].(string); ok {
242
242
+
host := extractHost(endpoint)
243
243
+
244
244
+
// Check if it's a known spam PDS
245
245
+
if d.spamEndpoints[host] {
246
246
+
return &Match{
247
247
+
Reason: "spam_pds_endpoint",
248
248
+
Category: "spam",
249
249
+
Confidence: 0.99,
250
250
+
Note: "Operation uses known spam PDS endpoint",
251
251
+
Metadata: map[string]interface{}{
252
252
+
"endpoint": endpoint,
253
253
+
"host": host,
254
254
+
},
255
255
+
}, nil
256
256
+
}
257
257
+
}
258
258
+
}
259
259
+
}
260
260
+
261
261
+
// Check for spam domain claims in alsoKnownAs
262
262
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
263
263
+
for _, aka := range alsoKnownAs {
264
264
+
if str, ok := aka.(string); ok {
265
265
+
if !strings.HasPrefix(str, "at://") {
266
266
+
continue
267
267
+
}
268
268
+
269
269
+
// Extract domain from at:// URI
270
270
+
domain := strings.TrimPrefix(str, "at://")
271
271
+
if idx := strings.Index(domain, "/"); idx > 0 {
272
272
+
domain = domain[:idx]
273
273
+
}
274
274
+
275
275
+
// Check if claiming spam domain
276
276
+
if d.spamDomains[domain] {
277
277
+
return &Match{
278
278
+
Reason: "fake_domain_claim",
279
279
+
Category: "impersonation",
280
280
+
Confidence: 0.99,
281
281
+
Note: "Operation claims known spam/fake domain",
282
282
+
Metadata: map[string]interface{}{
283
283
+
"claimed_domain": domain,
284
284
+
"handle": str,
285
285
+
},
286
286
+
}, nil
287
287
+
}
288
288
+
289
289
+
// Check for subdomain patterns (like jr.donald.trump.com)
290
290
+
for spamDomain := range d.spamDomains {
291
291
+
if strings.HasSuffix(domain, "."+spamDomain) || domain == spamDomain {
292
292
+
return &Match{
293
293
+
Reason: "fake_domain_claim",
294
294
+
Category: "impersonation",
295
295
+
Confidence: 0.99,
296
296
+
Note: "Operation claims domain related to known spam domain",
297
297
+
Metadata: map[string]interface{}{
298
298
+
"claimed_domain": domain,
299
299
+
"spam_domain": spamDomain,
300
300
+
},
301
301
+
}, nil
302
302
+
}
303
303
+
}
304
304
+
}
305
305
+
}
306
306
+
}
307
307
+
308
308
+
return nil, nil
309
309
+
}
310
310
+
311
311
+
// ServiceAbuseDetector detects operations with abused service structures
312
312
+
type ServiceAbuseDetector struct {
313
313
+
maxServiceTypeLength int
314
314
+
maxEndpointLength int
315
315
+
maxHandleLength int
316
316
+
}
317
317
+
318
318
+
func NewServiceAbuseDetector() *ServiceAbuseDetector {
319
319
+
return &ServiceAbuseDetector{
320
320
+
maxServiceTypeLength: 100, // Normal types are short (e.g., "AtprotoPersonalDataServer")
321
321
+
maxEndpointLength: 200, // Normal endpoints are reasonable URLs
322
322
+
maxHandleLength: 100, // Normal handles are short
323
323
+
}
324
324
+
}
325
325
+
326
326
+
func (d *ServiceAbuseDetector) Name() string { return "service_abuse" }
327
327
+
func (d *ServiceAbuseDetector) Description() string {
328
328
+
return "Detects operations with abused service structures (random strings, numeric keys)"
329
329
+
}
330
330
+
func (d *ServiceAbuseDetector) Version() string { return "1.0.0" }
331
331
+
332
332
+
func (d *ServiceAbuseDetector) Detect(ctx context.Context, op plc.PLCOperation) (*Match, error) {
333
333
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
334
334
+
// Check for numeric service keys (spam uses "0", "1", "2" instead of proper names)
335
335
+
hasNumericKeys := false
336
336
+
numericKeyCount := 0
337
337
+
338
338
+
for key := range services {
339
339
+
// Check if key is a digit
340
340
+
if len(key) == 1 && key >= "0" && key <= "9" {
341
341
+
hasNumericKeys = true
342
342
+
numericKeyCount++
343
343
+
}
344
344
+
}
345
345
+
346
346
+
if hasNumericKeys && numericKeyCount > 1 {
347
347
+
return &Match{
348
348
+
Reason: "numeric_service_keys",
349
349
+
Category: "service_abuse",
350
350
+
Confidence: 0.98,
351
351
+
Note: "Services use numeric keys instead of proper names",
352
352
+
Metadata: map[string]interface{}{
353
353
+
"numeric_key_count": numericKeyCount,
354
354
+
},
355
355
+
}, nil
356
356
+
}
357
357
+
358
358
+
// Check each service for abuse patterns
359
359
+
for serviceName, serviceData := range services {
360
360
+
if serviceMap, ok := serviceData.(map[string]interface{}); ok {
361
361
+
// Check service type length
362
362
+
if serviceType, ok := serviceMap["type"].(string); ok {
363
363
+
if len(serviceType) > d.maxServiceTypeLength {
364
364
+
return &Match{
365
365
+
Reason: "excessive_service_type_length",
366
366
+
Category: "service_abuse",
367
367
+
Confidence: 0.99,
368
368
+
Note: "Service type field contains excessively long random string",
369
369
+
Metadata: map[string]interface{}{
370
370
+
"service_name": serviceName,
371
371
+
"type_length": len(serviceType),
372
372
+
"type_preview": serviceType[:50] + "...",
373
373
+
},
374
374
+
}, nil
375
375
+
}
376
376
+
}
377
377
+
378
378
+
// Check endpoint length
379
379
+
if endpoint, ok := serviceMap["endpoint"].(string); ok {
380
380
+
if len(endpoint) > d.maxEndpointLength {
381
381
+
return &Match{
382
382
+
Reason: "excessive_endpoint_length",
383
383
+
Category: "service_abuse",
384
384
+
Confidence: 0.99,
385
385
+
Note: "Service endpoint contains excessively long random string",
386
386
+
Metadata: map[string]interface{}{
387
387
+
"service_name": serviceName,
388
388
+
"endpoint_length": len(endpoint),
389
389
+
"endpoint_preview": endpoint[:min(100, len(endpoint))] + "...",
390
390
+
},
391
391
+
}, nil
392
392
+
}
393
393
+
}
394
394
+
}
395
395
+
}
396
396
+
}
397
397
+
398
398
+
// Check for excessively long handles in alsoKnownAs
399
399
+
if alsoKnownAs, ok := op.Operation["alsoKnownAs"].([]interface{}); ok {
400
400
+
for _, aka := range alsoKnownAs {
401
401
+
if str, ok := aka.(string); ok {
402
402
+
if strings.HasPrefix(str, "at://") {
403
403
+
handle := strings.TrimPrefix(str, "at://")
404
404
+
if len(handle) > d.maxHandleLength {
405
405
+
return &Match{
406
406
+
Reason: "excessive_handle_length",
407
407
+
Category: "service_abuse",
408
408
+
Confidence: 0.98,
409
409
+
Note: "Handle contains excessively long random string",
410
410
+
Metadata: map[string]interface{}{
411
411
+
"handle_length": len(handle),
412
412
+
"handle_preview": handle[:min(50, len(handle))] + "...",
413
413
+
},
414
414
+
}, nil
415
415
+
}
416
416
+
}
417
417
+
}
418
418
+
}
419
419
+
}
420
420
+
421
421
+
// Check for empty verificationMethods (common in this spam)
422
422
+
if vm, ok := op.Operation["verificationMethods"].(map[string]interface{}); ok {
423
423
+
if len(vm) == 0 {
424
424
+
// Empty verificationMethods alone isn't enough, but combined with other signals...
425
425
+
// Check if there are other suspicious signals
426
426
+
if services, ok := op.Operation["services"].(map[string]interface{}); ok {
427
427
+
if len(services) > 2 {
428
428
+
// Multiple services + empty verificationMethods = suspicious
429
429
+
return &Match{
430
430
+
Reason: "empty_verification_methods",
431
431
+
Category: "service_abuse",
432
432
+
Confidence: 0.85,
433
433
+
Note: "Empty verificationMethods with multiple services",
434
434
+
Metadata: map[string]interface{}{
435
435
+
"service_count": len(services),
436
436
+
},
437
437
+
}, nil
438
438
+
}
439
439
+
}
440
440
+
}
441
441
+
}
442
442
+
443
443
+
return nil, nil
444
444
+
}
445
445
+
446
446
+
// Helper function for min
447
447
+
func min(a, b int) int {
448
448
+
if a < b {
449
449
+
return a
450
450
+
}
451
451
+
return b
452
452
+
}
453
453
+
454
454
+
// Helper functions
455
455
+
456
456
+
func extractHost(endpoint string) string {
457
457
+
// Extract host from URL
458
458
+
endpoint = strings.TrimPrefix(endpoint, "http://")
459
459
+
endpoint = strings.TrimPrefix(endpoint, "https://")
460
460
+
if idx := strings.Index(endpoint, "/"); idx > 0 {
461
461
+
endpoint = endpoint[:idx]
462
462
+
}
463
463
+
if idx := strings.Index(endpoint, ":"); idx > 0 {
464
464
+
endpoint = endpoint[:idx]
465
465
+
}
466
466
+
return endpoint
467
467
+
}
+63
detector/detector.go
···
1
1
+
// detector/detector.go
2
2
+
package detector
3
3
+
4
4
+
import (
5
5
+
"context"
6
6
+
"time"
7
7
+
8
8
+
"tangled.org/atscan.net/plcbundle/plc"
9
9
+
)
10
10
+
11
11
+
// Detector represents a spam detection algorithm
12
12
+
type Detector interface {
13
13
+
// Name returns the detector's unique identifier
14
14
+
Name() string
15
15
+
16
16
+
// Description returns a human-readable description
17
17
+
Description() string
18
18
+
19
19
+
// Detect analyzes an operation and returns a match result
20
20
+
Detect(ctx context.Context, op plc.PLCOperation) (*Match, error)
21
21
+
22
22
+
// Version returns the detector version
23
23
+
Version() string
24
24
+
}
25
25
+
26
26
+
// Match represents a positive spam detection
27
27
+
type Match struct {
28
28
+
Reason string // Short identifier (e.g., "nostr_crosspost")
29
29
+
Category string // Broader category (e.g., "cross_posting")
30
30
+
Confidence float64 // 0.0 to 1.0
31
31
+
Note string // Optional human-readable explanation
32
32
+
Metadata map[string]interface{} // Additional context
33
33
+
}
34
34
+
35
35
+
// Result represents the outcome of running a detector on an operation
36
36
+
type Result struct {
37
37
+
BundleNumber int
38
38
+
Position int
39
39
+
DID string
40
40
+
CID string // ← Add this field
41
41
+
Match *Match // nil if no match
42
42
+
Error error
43
43
+
DetectorName string
44
44
+
DetectedAt time.Time
45
45
+
}
46
46
+
47
47
+
// Config holds detector configuration
48
48
+
type Config struct {
49
49
+
MinConfidence float64
50
50
+
Timeout time.Duration
51
51
+
Parallel bool
52
52
+
Workers int
53
53
+
}
54
54
+
55
55
+
// DefaultConfig returns sensible defaults
56
56
+
func DefaultConfig() *Config {
57
57
+
return &Config{
58
58
+
MinConfidence: 0.90,
59
59
+
Timeout: 5 * time.Second,
60
60
+
Parallel: true,
61
61
+
Workers: 4,
62
62
+
}
63
63
+
}
+87
detector/registry.go
···
1
1
+
// detector/registry.go
2
2
+
package detector
3
3
+
4
4
+
import (
5
5
+
"fmt"
6
6
+
"sync"
7
7
+
)
8
8
+
9
9
+
// Registry manages available detectors
10
10
+
type Registry struct {
11
11
+
detectors map[string]Detector
12
12
+
mu sync.RWMutex
13
13
+
}
14
14
+
15
15
+
// NewRegistry creates a new detector registry
16
16
+
func NewRegistry() *Registry {
17
17
+
return &Registry{
18
18
+
detectors: make(map[string]Detector),
19
19
+
}
20
20
+
}
21
21
+
22
22
+
// Register adds a detector to the registry
23
23
+
func (r *Registry) Register(d Detector) error {
24
24
+
r.mu.Lock()
25
25
+
defer r.mu.Unlock()
26
26
+
27
27
+
name := d.Name()
28
28
+
if _, exists := r.detectors[name]; exists {
29
29
+
return fmt.Errorf("detector %q already registered", name)
30
30
+
}
31
31
+
32
32
+
r.detectors[name] = d
33
33
+
return nil
34
34
+
}
35
35
+
36
36
+
// Get retrieves a detector by name
37
37
+
func (r *Registry) Get(name string) (Detector, error) {
38
38
+
r.mu.RLock()
39
39
+
defer r.mu.RUnlock()
40
40
+
41
41
+
d, ok := r.detectors[name]
42
42
+
if !ok {
43
43
+
return nil, fmt.Errorf("detector %q not found", name)
44
44
+
}
45
45
+
46
46
+
return d, nil
47
47
+
}
48
48
+
49
49
+
// List returns all registered detectors
50
50
+
func (r *Registry) List() []Detector {
51
51
+
r.mu.RLock()
52
52
+
defer r.mu.RUnlock()
53
53
+
54
54
+
detectors := make([]Detector, 0, len(r.detectors))
55
55
+
for _, d := range r.detectors {
56
56
+
detectors = append(detectors, d)
57
57
+
}
58
58
+
59
59
+
return detectors
60
60
+
}
61
61
+
62
62
+
// Names returns all detector names
63
63
+
func (r *Registry) Names() []string {
64
64
+
r.mu.RLock()
65
65
+
defer r.mu.RUnlock()
66
66
+
67
67
+
names := make([]string, 0, len(r.detectors))
68
68
+
for name := range r.detectors {
69
69
+
names = append(names, name)
70
70
+
}
71
71
+
72
72
+
return names
73
73
+
}
74
74
+
75
75
+
// DefaultRegistry returns a registry with built-in detectors
76
76
+
func DefaultRegistry() *Registry {
77
77
+
r := NewRegistry()
78
78
+
79
79
+
// Register real spam detectors
80
80
+
r.Register(NewInvalidHandleDetector())
81
81
+
r.Register(NewAlsoKnownAsSpamDetector())
82
82
+
r.Register(NewCompositeSpamDetector())
83
83
+
r.Register(NewSpamPDSDetector())
84
84
+
r.Register(NewServiceAbuseDetector())
85
85
+
86
86
+
return r
87
87
+
}
+216
detector/runner.go
···
1
1
+
// detector/runner.go
2
2
+
package detector
3
3
+
4
4
+
import (
5
5
+
"context"
6
6
+
"fmt"
7
7
+
"sync"
8
8
+
"time"
9
9
+
10
10
+
"tangled.org/atscan.net/plcbundle/bundle"
11
11
+
"tangled.org/atscan.net/plcbundle/plc"
12
12
+
)
13
13
+
14
14
+
// Runner executes detectors against operations
15
15
+
type Runner struct {
16
16
+
registry *Registry
17
17
+
config *Config
18
18
+
logger Logger
19
19
+
}
20
20
+
21
21
+
type Logger interface {
22
22
+
Printf(format string, v ...interface{})
23
23
+
}
24
24
+
25
25
+
// NewRunner creates a new detector runner
26
26
+
func NewRunner(registry *Registry, config *Config, logger Logger) *Runner {
27
27
+
if config == nil {
28
28
+
config = DefaultConfig()
29
29
+
}
30
30
+
return &Runner{
31
31
+
registry: registry,
32
32
+
config: config,
33
33
+
logger: logger,
34
34
+
}
35
35
+
}
36
36
+
37
37
+
// RunOnBundle runs detector(s) on all operations in a bundle
38
38
+
func (r *Runner) RunOnBundle(ctx context.Context, detectorName string, b *bundle.Bundle) ([]*Result, error) {
39
39
+
detector, err := r.registry.Get(detectorName)
40
40
+
if err != nil {
41
41
+
return nil, err
42
42
+
}
43
43
+
44
44
+
var results []*Result
45
45
+
46
46
+
if r.config.Parallel {
47
47
+
results = r.runParallel(ctx, detector, b)
48
48
+
} else {
49
49
+
results = r.runSequential(ctx, detector, b)
50
50
+
}
51
51
+
52
52
+
// Filter by minimum confidence
53
53
+
filtered := make([]*Result, 0)
54
54
+
for _, res := range results {
55
55
+
if res.Match != nil && res.Match.Confidence >= r.config.MinConfidence {
56
56
+
filtered = append(filtered, res)
57
57
+
}
58
58
+
}
59
59
+
60
60
+
return filtered, nil
61
61
+
}
62
62
+
63
63
+
func (r *Runner) runSequential(ctx context.Context, detector Detector, b *bundle.Bundle) []*Result {
64
64
+
results := make([]*Result, 0)
65
65
+
66
66
+
for pos, op := range b.Operations {
67
67
+
select {
68
68
+
case <-ctx.Done():
69
69
+
return results
70
70
+
default:
71
71
+
}
72
72
+
73
73
+
result := r.detectOne(ctx, detector, b.BundleNumber, pos, op)
74
74
+
if result.Match != nil || result.Error != nil {
75
75
+
results = append(results, result)
76
76
+
}
77
77
+
}
78
78
+
79
79
+
return results
80
80
+
}
81
81
+
82
82
+
func (r *Runner) runParallel(ctx context.Context, detector Detector, b *bundle.Bundle) []*Result {
83
83
+
type job struct {
84
84
+
pos int
85
85
+
op plc.PLCOperation
86
86
+
}
87
87
+
88
88
+
jobs := make(chan job, len(b.Operations))
89
89
+
resultsChan := make(chan *Result, len(b.Operations))
90
90
+
91
91
+
// Start workers
92
92
+
var wg sync.WaitGroup
93
93
+
for i := 0; i < r.config.Workers; i++ {
94
94
+
wg.Add(1)
95
95
+
go func() {
96
96
+
defer wg.Done()
97
97
+
for j := range jobs {
98
98
+
select {
99
99
+
case <-ctx.Done():
100
100
+
return
101
101
+
default:
102
102
+
}
103
103
+
104
104
+
result := r.detectOne(ctx, detector, b.BundleNumber, j.pos, j.op)
105
105
+
if result.Match != nil || result.Error != nil {
106
106
+
resultsChan <- result
107
107
+
}
108
108
+
}
109
109
+
}()
110
110
+
}
111
111
+
112
112
+
// Send jobs
113
113
+
for pos, op := range b.Operations {
114
114
+
jobs <- job{pos: pos, op: op}
115
115
+
}
116
116
+
close(jobs)
117
117
+
118
118
+
// Wait for completion
119
119
+
go func() {
120
120
+
wg.Wait()
121
121
+
close(resultsChan)
122
122
+
}()
123
123
+
124
124
+
// Collect results
125
125
+
results := make([]*Result, 0)
126
126
+
for result := range resultsChan {
127
127
+
results = append(results, result)
128
128
+
}
129
129
+
130
130
+
return results
131
131
+
}
132
132
+
133
133
+
func (r *Runner) detectOne(ctx context.Context, detector Detector, bundleNum, pos int, op plc.PLCOperation) *Result {
134
134
+
// Create timeout context
135
135
+
detectCtx, cancel := context.WithTimeout(ctx, r.config.Timeout)
136
136
+
defer cancel()
137
137
+
138
138
+
result := &Result{
139
139
+
BundleNumber: bundleNum,
140
140
+
Position: pos,
141
141
+
DID: op.DID,
142
142
+
CID: op.CID, // ← Add this
143
143
+
DetectorName: detector.Name(),
144
144
+
DetectedAt: time.Now(),
145
145
+
}
146
146
+
147
147
+
match, err := detector.Detect(detectCtx, op)
148
148
+
result.Match = match
149
149
+
result.Error = err
150
150
+
151
151
+
return result
152
152
+
}
153
153
+
154
154
+
// RunMultipleDetectors runs multiple detectors on a bundle
155
155
+
func (r *Runner) RunMultipleDetectors(ctx context.Context, detectorNames []string, b *bundle.Bundle) (map[string][]*Result, error) {
156
156
+
allResults := make(map[string][]*Result)
157
157
+
158
158
+
for _, name := range detectorNames {
159
159
+
results, err := r.RunOnBundle(ctx, name, b)
160
160
+
if err != nil {
161
161
+
return nil, fmt.Errorf("detector %s failed: %w", name, err)
162
162
+
}
163
163
+
allResults[name] = results
164
164
+
}
165
165
+
166
166
+
return allResults, nil
167
167
+
}
168
168
+
169
169
+
// Stats represents detection statistics
170
170
+
type Stats struct {
171
171
+
TotalOperations int
172
172
+
MatchedCount int
173
173
+
MatchRate float64
174
174
+
ByReason map[string]int
175
175
+
ByCategory map[string]int
176
176
+
ByConfidence map[string]int // 0.9-1.0, 0.8-0.9, etc.
177
177
+
}
178
178
+
179
179
+
// CalculateStats computes statistics from results
180
180
+
func CalculateStats(results []*Result, totalOps int) *Stats {
181
181
+
stats := &Stats{
182
182
+
TotalOperations: totalOps,
183
183
+
MatchedCount: len(results),
184
184
+
ByReason: make(map[string]int),
185
185
+
ByCategory: make(map[string]int),
186
186
+
ByConfidence: make(map[string]int),
187
187
+
}
188
188
+
189
189
+
if totalOps > 0 {
190
190
+
stats.MatchRate = float64(len(results)) / float64(totalOps)
191
191
+
}
192
192
+
193
193
+
for _, res := range results {
194
194
+
if res.Match == nil {
195
195
+
continue
196
196
+
}
197
197
+
198
198
+
stats.ByReason[res.Match.Reason]++
199
199
+
stats.ByCategory[res.Match.Category]++
200
200
+
201
201
+
// Confidence buckets
202
202
+
conf := res.Match.Confidence
203
203
+
switch {
204
204
+
case conf >= 0.95:
205
205
+
stats.ByConfidence["0.95-1.00"]++
206
206
+
case conf >= 0.90:
207
207
+
stats.ByConfidence["0.90-0.95"]++
208
208
+
case conf >= 0.85:
209
209
+
stats.ByConfidence["0.85-0.90"]++
210
210
+
default:
211
211
+
stats.ByConfidence["0.00-0.85"]++
212
212
+
}
213
213
+
}
214
214
+
215
215
+
return stats
216
216
+
}