[DEPRECATED] Go implementation of plcbundle
1// detector/builtin.go
2package detector
3
4import (
5 "context"
6 "regexp"
7 "strings"
8
9 "tangled.org/atscan.net/plcbundle/internal/plcclient"
10)
11
12// NoOpDetector is an empty detector for speed testing
13type NoOpDetector struct{}
14
15func NewNoOpDetector() *NoOpDetector {
16 return &NoOpDetector{}
17}
18
19func (d *NoOpDetector) Name() string { return "noop" }
20func (d *NoOpDetector) Description() string {
21 return "Empty detector for benchmarking (always returns no match)"
22}
23func (d *NoOpDetector) Version() string { return "1.0.0" }
24
25func (d *NoOpDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) {
26 // Instant return - no work done
27 return nil, nil
28}
29
30// InvalidHandleDetector detects operations with invalid handle patterns
31type InvalidHandleDetector struct {
32 // Valid handle regex based on AT Protocol handle specification
33 validHandlePattern *regexp.Regexp
34}
35
36func NewInvalidHandleDetector() *InvalidHandleDetector {
37 return &InvalidHandleDetector{
38 // Valid handle pattern: domain segments + TLD
39 // Each segment: alphanumeric start/end, hyphens allowed in middle, max 63 chars per segment
40 // TLD must start with letter
41 validHandlePattern: regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`),
42 }
43}
44
45func (d *InvalidHandleDetector) Name() string { return "invalid_handle" }
46func (d *InvalidHandleDetector) Description() string {
47 return "Detects operations with invalid handle patterns (underscores, invalid chars, malformed)"
48}
49func (d *InvalidHandleDetector) Version() string { return "1.0.0" }
50
51func (d *InvalidHandleDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) {
52 // Parse Operation field on-demand
53 operation, err := op.GetOperationMap()
54 if err != nil {
55 return nil, err
56 }
57 if operation == nil {
58 return nil, nil
59 }
60
61 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok {
62
63 for _, aka := range alsoKnownAs {
64 if str, ok := aka.(string); ok {
65 // Check if it's an at:// handle
66 if !strings.HasPrefix(str, "at://") {
67 continue
68 }
69
70 // Extract handle (remove at:// prefix)
71 handle := strings.TrimPrefix(str, "at://")
72
73 // Remove any path component (e.g., at://user.bsky.social/profile -> user.bsky.social)
74 if idx := strings.Index(handle, "/"); idx > 0 {
75 handle = handle[:idx]
76 }
77
78 // Check for underscore (invalid in Bluesky handles)
79 if strings.Contains(handle, "_") {
80 return &Match{
81 Reason: "underscore_in_handle",
82 Category: "invalid_handle",
83 Confidence: 0.99,
84 Note: "Handle contains underscore which is invalid in AT Protocol",
85 Metadata: map[string]interface{}{
86 "invalid_handle": str,
87 "extracted": handle,
88 "violation": "underscore_character",
89 },
90 }, nil
91 }
92
93 // Check for other invalid characters (anything not alphanumeric, hyphen, or dot)
94 invalidChars := regexp.MustCompile(`[^a-zA-Z0-9.-]`)
95 if invalidChars.MatchString(handle) {
96 return &Match{
97 Reason: "invalid_characters",
98 Category: "invalid_handle",
99 Confidence: 0.99,
100 Note: "Handle contains invalid characters",
101 Metadata: map[string]interface{}{
102 "invalid_handle": str,
103 "extracted": handle,
104 "violation": "invalid_characters",
105 },
106 }, nil
107 }
108
109 // Check if handle matches valid AT Protocol pattern
110 if !d.validHandlePattern.MatchString(handle) {
111 return &Match{
112 Reason: "invalid_handle_pattern",
113 Category: "invalid_handle",
114 Confidence: 0.95,
115 Note: "Handle does not match valid AT Protocol handle pattern",
116 Metadata: map[string]interface{}{
117 "invalid_handle": str,
118 "extracted": handle,
119 "violation": "pattern_mismatch",
120 },
121 }, nil
122 }
123
124 // Additional checks: handle length
125 if len(handle) > 253 { // DNS maximum
126 return &Match{
127 Reason: "handle_too_long",
128 Category: "invalid_handle",
129 Confidence: 0.98,
130 Note: "Handle exceeds maximum length (253 characters)",
131 Metadata: map[string]interface{}{
132 "invalid_handle": str,
133 "extracted": handle,
134 "length": len(handle),
135 "violation": "exceeds_max_length",
136 },
137 }, nil
138 }
139
140 // Check segment lengths (each part between dots should be max 63 chars)
141 segments := strings.Split(handle, ".")
142 for i, segment := range segments {
143 if len(segment) == 0 {
144 return &Match{
145 Reason: "empty_segment",
146 Category: "invalid_handle",
147 Confidence: 0.99,
148 Note: "Handle contains empty segment (consecutive dots)",
149 Metadata: map[string]interface{}{
150 "invalid_handle": str,
151 "extracted": handle,
152 "violation": "empty_segment",
153 },
154 }, nil
155 }
156 if len(segment) > 63 {
157 return &Match{
158 Reason: "segment_too_long",
159 Category: "invalid_handle",
160 Confidence: 0.98,
161 Note: "Handle segment exceeds maximum length (63 characters)",
162 Metadata: map[string]interface{}{
163 "invalid_handle": str,
164 "extracted": handle,
165 "segment": i,
166 "segment_value": segment,
167 "length": len(segment),
168 "violation": "segment_exceeds_max_length",
169 },
170 }, nil
171 }
172 }
173
174 // Check minimum segments (at least 2: subdomain.tld)
175 if len(segments) < 2 {
176 return &Match{
177 Reason: "insufficient_segments",
178 Category: "invalid_handle",
179 Confidence: 0.99,
180 Note: "Handle must have at least 2 segments (subdomain.tld)",
181 Metadata: map[string]interface{}{
182 "invalid_handle": str,
183 "extracted": handle,
184 "segments": len(segments),
185 "violation": "insufficient_segments",
186 },
187 }, nil
188 }
189 }
190 }
191 }
192
193 return nil, nil
194}
195
196// AlsoKnownAsSpamDetector detects excessive/garbage alsoKnownAs entries
197type AlsoKnownAsSpamDetector struct {
198 maxLegitimateEntries int
199 minGarbageLength int
200}
201
202func NewAlsoKnownAsSpamDetector() *AlsoKnownAsSpamDetector {
203 return &AlsoKnownAsSpamDetector{
204 maxLegitimateEntries: 3, // Normal operations have 1-3 entries
205 minGarbageLength: 100, // Garbage strings are very long
206 }
207}
208
209func (d *AlsoKnownAsSpamDetector) Name() string { return "aka_spam" }
210func (d *AlsoKnownAsSpamDetector) Description() string {
211 return "Detects spam through excessive or garbage alsoKnownAs entries"
212}
213func (d *AlsoKnownAsSpamDetector) Version() string { return "1.0.0" }
214
215func (d *AlsoKnownAsSpamDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) {
216 // Parse Operation field on-demand
217 operation, err := op.GetOperationMap()
218 if err != nil {
219 return nil, err
220 }
221 if operation == nil {
222 return nil, nil
223 }
224 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok {
225 entryCount := len(alsoKnownAs)
226
227 // Count different types of entries
228 atURICount := 0
229 garbageCount := 0
230 var garbageExamples []string
231
232 for _, aka := range alsoKnownAs {
233 if str, ok := aka.(string); ok {
234 if strings.HasPrefix(str, "at://") {
235 atURICount++
236 } else if len(str) > d.minGarbageLength {
237 garbageCount++
238 if len(garbageExamples) < 2 {
239 // Store first few for evidence
240 preview := str
241 if len(preview) > 50 {
242 preview = preview[:50] + "..."
243 }
244 garbageExamples = append(garbageExamples, preview)
245 }
246 }
247 }
248 }
249
250 // Detection: Excessive entries
251 if entryCount > d.maxLegitimateEntries {
252 confidence := 0.80
253 if garbageCount > 0 {
254 confidence = 0.95 // Higher confidence if garbage detected
255 }
256
257 return &Match{
258 Reason: "excessive_aka_entries",
259 Category: "spam",
260 Confidence: confidence,
261 Note: "Operation has excessive alsoKnownAs entries",
262 Metadata: map[string]interface{}{
263 "total_entries": entryCount,
264 "at_uri_count": atURICount,
265 "garbage_count": garbageCount,
266 "garbage_examples": garbageExamples,
267 },
268 }, nil
269 }
270
271 // Detection: Garbage entries present (even if count is low)
272 if garbageCount > 0 {
273 return &Match{
274 Reason: "garbage_aka_entries",
275 Category: "spam",
276 Confidence: 0.98,
277 Note: "Operation contains garbage/random strings in alsoKnownAs",
278 Metadata: map[string]interface{}{
279 "total_entries": entryCount,
280 "garbage_count": garbageCount,
281 "garbage_examples": garbageExamples,
282 },
283 }, nil
284 }
285 }
286
287 return nil, nil
288}
289
290// SpamPDSDetector detects known spam PDS endpoints
291type SpamPDSDetector struct {
292 spamEndpoints map[string]bool
293 spamDomains map[string]bool
294}
295
296func NewSpamPDSDetector() *SpamPDSDetector {
297 return &SpamPDSDetector{
298 spamEndpoints: map[string]bool{
299 "pds.trump.com": true,
300 // Add more as discovered
301 },
302 spamDomains: map[string]bool{
303 "trump.com": true,
304 "donald.trump.com": true,
305 // Add more as discovered
306 },
307 }
308}
309
310func (d *SpamPDSDetector) Name() string { return "spam_pds" }
311func (d *SpamPDSDetector) Description() string {
312 return "Detects operations using known spam PDS endpoints and fake domain claims"
313}
314func (d *SpamPDSDetector) Version() string { return "1.0.0" }
315
316func (d *SpamPDSDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) {
317 // Parse Operation field on-demand
318 operation, err := op.GetOperationMap()
319 if err != nil {
320 return nil, err
321 }
322 if operation == nil {
323 return nil, nil
324 }
325 // Check PDS endpoint
326 if services, ok := operation["services"].(map[string]interface{}); ok {
327 if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
328 if endpoint, ok := pds["endpoint"].(string); ok {
329 host := extractHost(endpoint)
330
331 // Check if it's a known spam PDS
332 if d.spamEndpoints[host] {
333 return &Match{
334 Reason: "spam_pds_endpoint",
335 Category: "spam",
336 Confidence: 0.99,
337 Note: "Operation uses known spam PDS endpoint",
338 Metadata: map[string]interface{}{
339 "endpoint": endpoint,
340 "host": host,
341 },
342 }, nil
343 }
344 }
345 }
346 }
347
348 // Check for spam domain claims in alsoKnownAs
349 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok {
350 for _, aka := range alsoKnownAs {
351 if str, ok := aka.(string); ok {
352 if !strings.HasPrefix(str, "at://") {
353 continue
354 }
355
356 // Extract domain from at:// URI
357 domain := strings.TrimPrefix(str, "at://")
358 if idx := strings.Index(domain, "/"); idx > 0 {
359 domain = domain[:idx]
360 }
361
362 // Check if claiming spam domain
363 if d.spamDomains[domain] {
364 return &Match{
365 Reason: "fake_domain_claim",
366 Category: "impersonation",
367 Confidence: 0.99,
368 Note: "Operation claims known spam/fake domain",
369 Metadata: map[string]interface{}{
370 "claimed_domain": domain,
371 "handle": str,
372 },
373 }, nil
374 }
375
376 // Check for subdomain patterns (like jr.donald.trump.com)
377 for spamDomain := range d.spamDomains {
378 if strings.HasSuffix(domain, "."+spamDomain) || domain == spamDomain {
379 return &Match{
380 Reason: "fake_domain_claim",
381 Category: "impersonation",
382 Confidence: 0.99,
383 Note: "Operation claims domain related to known spam domain",
384 Metadata: map[string]interface{}{
385 "claimed_domain": domain,
386 "spam_domain": spamDomain,
387 },
388 }, nil
389 }
390 }
391 }
392 }
393 }
394
395 return nil, nil
396}
397
398// ServiceAbuseDetector detects operations with abused service structures
399type ServiceAbuseDetector struct {
400 maxServiceTypeLength int
401 maxEndpointLength int
402 maxHandleLength int
403}
404
405func NewServiceAbuseDetector() *ServiceAbuseDetector {
406 return &ServiceAbuseDetector{
407 maxServiceTypeLength: 100, // Normal types are short (e.g., "AtprotoPersonalDataServer")
408 maxEndpointLength: 200, // Normal endpoints are reasonable URLs
409 maxHandleLength: 100, // Normal handles are short
410 }
411}
412
413func (d *ServiceAbuseDetector) Name() string { return "service_abuse" }
414func (d *ServiceAbuseDetector) Description() string {
415 return "Detects operations with abused service structures (random strings, numeric keys)"
416}
417func (d *ServiceAbuseDetector) Version() string { return "1.0.0" }
418
419func (d *ServiceAbuseDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) {
420 // Parse Operation field on-demand
421 operation, err := op.GetOperationMap()
422 if err != nil {
423 return nil, err
424 }
425 if operation == nil {
426 return nil, nil
427 }
428 if services, ok := operation["services"].(map[string]interface{}); ok {
429 // Check for numeric service keys (spam uses "0", "1", "2" instead of proper names)
430 hasNumericKeys := false
431 numericKeyCount := 0
432
433 for key := range services {
434 // Check if key is a digit
435 if len(key) == 1 && key >= "0" && key <= "9" {
436 hasNumericKeys = true
437 numericKeyCount++
438 }
439 }
440
441 if hasNumericKeys && numericKeyCount > 1 {
442 return &Match{
443 Reason: "numeric_service_keys",
444 Category: "service_abuse",
445 Confidence: 0.98,
446 Note: "Services use numeric keys instead of proper names",
447 Metadata: map[string]interface{}{
448 "numeric_key_count": numericKeyCount,
449 },
450 }, nil
451 }
452
453 // Check each service for abuse patterns
454 for serviceName, serviceData := range services {
455 if serviceMap, ok := serviceData.(map[string]interface{}); ok {
456 // Check service type length
457 if serviceType, ok := serviceMap["type"].(string); ok {
458 if len(serviceType) > d.maxServiceTypeLength {
459 return &Match{
460 Reason: "excessive_service_type_length",
461 Category: "service_abuse",
462 Confidence: 0.99,
463 Note: "Service type field contains excessively long random string",
464 Metadata: map[string]interface{}{
465 "service_name": serviceName,
466 "type_length": len(serviceType),
467 "type_preview": serviceType[:50] + "...",
468 },
469 }, nil
470 }
471 }
472
473 // Check endpoint length
474 if endpoint, ok := serviceMap["endpoint"].(string); ok {
475 if len(endpoint) > d.maxEndpointLength {
476 return &Match{
477 Reason: "excessive_endpoint_length",
478 Category: "service_abuse",
479 Confidence: 0.99,
480 Note: "Service endpoint contains excessively long random string",
481 Metadata: map[string]interface{}{
482 "service_name": serviceName,
483 "endpoint_length": len(endpoint),
484 "endpoint_preview": endpoint[:min(100, len(endpoint))] + "...",
485 },
486 }, nil
487 }
488 }
489 }
490 }
491 }
492
493 // Check for excessively long handles in alsoKnownAs
494 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok {
495 for _, aka := range alsoKnownAs {
496 if str, ok := aka.(string); ok {
497 if strings.HasPrefix(str, "at://") {
498 handle := strings.TrimPrefix(str, "at://")
499 if len(handle) > d.maxHandleLength {
500 return &Match{
501 Reason: "excessive_handle_length",
502 Category: "service_abuse",
503 Confidence: 0.98,
504 Note: "Handle contains excessively long random string",
505 Metadata: map[string]interface{}{
506 "handle_length": len(handle),
507 "handle_preview": handle[:min(50, len(handle))] + "...",
508 },
509 }, nil
510 }
511 }
512 }
513 }
514 }
515
516 // Check for empty verificationMethods (common in this spam)
517 if vm, ok := operation["verificationMethods"].(map[string]interface{}); ok {
518 if len(vm) == 0 {
519 // Empty verificationMethods alone isn't enough, but combined with other signals...
520 // Check if there are other suspicious signals
521 if services, ok := operation["services"].(map[string]interface{}); ok {
522 if len(services) > 2 {
523 // Multiple services + empty verificationMethods = suspicious
524 return &Match{
525 Reason: "empty_verification_methods",
526 Category: "service_abuse",
527 Confidence: 0.85,
528 Note: "Empty verificationMethods with multiple services",
529 Metadata: map[string]interface{}{
530 "service_count": len(services),
531 },
532 }, nil
533 }
534 }
535 }
536 }
537
538 return nil, nil
539}
540
541// Helper function for min
542func min(a, b int) int {
543 if a < b {
544 return a
545 }
546 return b
547}
548
549// Helper functions
550
551func extractHost(endpoint string) string {
552 // Extract host from URL
553 endpoint = strings.TrimPrefix(endpoint, "http://")
554 endpoint = strings.TrimPrefix(endpoint, "https://")
555 if idx := strings.Index(endpoint, "/"); idx > 0 {
556 endpoint = endpoint[:idx]
557 }
558 if idx := strings.Index(endpoint, ":"); idx > 0 {
559 endpoint = endpoint[:idx]
560 }
561 return endpoint
562}