[DEPRECATED] Go implementation of plcbundle
at rust-test 562 lines 17 kB view raw
1// detector/builtin.go 2package detector 3 4import ( 5 "context" 6 "regexp" 7 "strings" 8 9 "tangled.org/atscan.net/plcbundle/internal/plcclient" 10) 11 12// NoOpDetector is an empty detector for speed testing 13type NoOpDetector struct{} 14 15func NewNoOpDetector() *NoOpDetector { 16 return &NoOpDetector{} 17} 18 19func (d *NoOpDetector) Name() string { return "noop" } 20func (d *NoOpDetector) Description() string { 21 return "Empty detector for benchmarking (always returns no match)" 22} 23func (d *NoOpDetector) Version() string { return "1.0.0" } 24 25func (d *NoOpDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) { 26 // Instant return - no work done 27 return nil, nil 28} 29 30// InvalidHandleDetector detects operations with invalid handle patterns 31type InvalidHandleDetector struct { 32 // Valid handle regex based on AT Protocol handle specification 33 validHandlePattern *regexp.Regexp 34} 35 36func NewInvalidHandleDetector() *InvalidHandleDetector { 37 return &InvalidHandleDetector{ 38 // Valid handle pattern: domain segments + TLD 39 // Each segment: alphanumeric start/end, hyphens allowed in middle, max 63 chars per segment 40 // TLD must start with letter 41 validHandlePattern: regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`), 42 } 43} 44 45func (d *InvalidHandleDetector) Name() string { return "invalid_handle" } 46func (d *InvalidHandleDetector) Description() string { 47 return "Detects operations with invalid handle patterns (underscores, invalid chars, malformed)" 48} 49func (d *InvalidHandleDetector) Version() string { return "1.0.0" } 50 51func (d *InvalidHandleDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) { 52 // Parse Operation field on-demand 53 operation, err := op.GetOperationMap() 54 if err != nil { 55 return nil, err 56 } 57 if operation == nil { 58 return nil, nil 59 } 60 61 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok { 62 63 for _, aka := range alsoKnownAs { 64 if str, ok := aka.(string); ok { 65 // Check if it's an at:// handle 66 if !strings.HasPrefix(str, "at://") { 67 continue 68 } 69 70 // Extract handle (remove at:// prefix) 71 handle := strings.TrimPrefix(str, "at://") 72 73 // Remove any path component (e.g., at://user.bsky.social/profile -> user.bsky.social) 74 if idx := strings.Index(handle, "/"); idx > 0 { 75 handle = handle[:idx] 76 } 77 78 // Check for underscore (invalid in Bluesky handles) 79 if strings.Contains(handle, "_") { 80 return &Match{ 81 Reason: "underscore_in_handle", 82 Category: "invalid_handle", 83 Confidence: 0.99, 84 Note: "Handle contains underscore which is invalid in AT Protocol", 85 Metadata: map[string]interface{}{ 86 "invalid_handle": str, 87 "extracted": handle, 88 "violation": "underscore_character", 89 }, 90 }, nil 91 } 92 93 // Check for other invalid characters (anything not alphanumeric, hyphen, or dot) 94 invalidChars := regexp.MustCompile(`[^a-zA-Z0-9.-]`) 95 if invalidChars.MatchString(handle) { 96 return &Match{ 97 Reason: "invalid_characters", 98 Category: "invalid_handle", 99 Confidence: 0.99, 100 Note: "Handle contains invalid characters", 101 Metadata: map[string]interface{}{ 102 "invalid_handle": str, 103 "extracted": handle, 104 "violation": "invalid_characters", 105 }, 106 }, nil 107 } 108 109 // Check if handle matches valid AT Protocol pattern 110 if !d.validHandlePattern.MatchString(handle) { 111 return &Match{ 112 Reason: "invalid_handle_pattern", 113 Category: "invalid_handle", 114 Confidence: 0.95, 115 Note: "Handle does not match valid AT Protocol handle pattern", 116 Metadata: map[string]interface{}{ 117 "invalid_handle": str, 118 "extracted": handle, 119 "violation": "pattern_mismatch", 120 }, 121 }, nil 122 } 123 124 // Additional checks: handle length 125 if len(handle) > 253 { // DNS maximum 126 return &Match{ 127 Reason: "handle_too_long", 128 Category: "invalid_handle", 129 Confidence: 0.98, 130 Note: "Handle exceeds maximum length (253 characters)", 131 Metadata: map[string]interface{}{ 132 "invalid_handle": str, 133 "extracted": handle, 134 "length": len(handle), 135 "violation": "exceeds_max_length", 136 }, 137 }, nil 138 } 139 140 // Check segment lengths (each part between dots should be max 63 chars) 141 segments := strings.Split(handle, ".") 142 for i, segment := range segments { 143 if len(segment) == 0 { 144 return &Match{ 145 Reason: "empty_segment", 146 Category: "invalid_handle", 147 Confidence: 0.99, 148 Note: "Handle contains empty segment (consecutive dots)", 149 Metadata: map[string]interface{}{ 150 "invalid_handle": str, 151 "extracted": handle, 152 "violation": "empty_segment", 153 }, 154 }, nil 155 } 156 if len(segment) > 63 { 157 return &Match{ 158 Reason: "segment_too_long", 159 Category: "invalid_handle", 160 Confidence: 0.98, 161 Note: "Handle segment exceeds maximum length (63 characters)", 162 Metadata: map[string]interface{}{ 163 "invalid_handle": str, 164 "extracted": handle, 165 "segment": i, 166 "segment_value": segment, 167 "length": len(segment), 168 "violation": "segment_exceeds_max_length", 169 }, 170 }, nil 171 } 172 } 173 174 // Check minimum segments (at least 2: subdomain.tld) 175 if len(segments) < 2 { 176 return &Match{ 177 Reason: "insufficient_segments", 178 Category: "invalid_handle", 179 Confidence: 0.99, 180 Note: "Handle must have at least 2 segments (subdomain.tld)", 181 Metadata: map[string]interface{}{ 182 "invalid_handle": str, 183 "extracted": handle, 184 "segments": len(segments), 185 "violation": "insufficient_segments", 186 }, 187 }, nil 188 } 189 } 190 } 191 } 192 193 return nil, nil 194} 195 196// AlsoKnownAsSpamDetector detects excessive/garbage alsoKnownAs entries 197type AlsoKnownAsSpamDetector struct { 198 maxLegitimateEntries int 199 minGarbageLength int 200} 201 202func NewAlsoKnownAsSpamDetector() *AlsoKnownAsSpamDetector { 203 return &AlsoKnownAsSpamDetector{ 204 maxLegitimateEntries: 3, // Normal operations have 1-3 entries 205 minGarbageLength: 100, // Garbage strings are very long 206 } 207} 208 209func (d *AlsoKnownAsSpamDetector) Name() string { return "aka_spam" } 210func (d *AlsoKnownAsSpamDetector) Description() string { 211 return "Detects spam through excessive or garbage alsoKnownAs entries" 212} 213func (d *AlsoKnownAsSpamDetector) Version() string { return "1.0.0" } 214 215func (d *AlsoKnownAsSpamDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) { 216 // Parse Operation field on-demand 217 operation, err := op.GetOperationMap() 218 if err != nil { 219 return nil, err 220 } 221 if operation == nil { 222 return nil, nil 223 } 224 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok { 225 entryCount := len(alsoKnownAs) 226 227 // Count different types of entries 228 atURICount := 0 229 garbageCount := 0 230 var garbageExamples []string 231 232 for _, aka := range alsoKnownAs { 233 if str, ok := aka.(string); ok { 234 if strings.HasPrefix(str, "at://") { 235 atURICount++ 236 } else if len(str) > d.minGarbageLength { 237 garbageCount++ 238 if len(garbageExamples) < 2 { 239 // Store first few for evidence 240 preview := str 241 if len(preview) > 50 { 242 preview = preview[:50] + "..." 243 } 244 garbageExamples = append(garbageExamples, preview) 245 } 246 } 247 } 248 } 249 250 // Detection: Excessive entries 251 if entryCount > d.maxLegitimateEntries { 252 confidence := 0.80 253 if garbageCount > 0 { 254 confidence = 0.95 // Higher confidence if garbage detected 255 } 256 257 return &Match{ 258 Reason: "excessive_aka_entries", 259 Category: "spam", 260 Confidence: confidence, 261 Note: "Operation has excessive alsoKnownAs entries", 262 Metadata: map[string]interface{}{ 263 "total_entries": entryCount, 264 "at_uri_count": atURICount, 265 "garbage_count": garbageCount, 266 "garbage_examples": garbageExamples, 267 }, 268 }, nil 269 } 270 271 // Detection: Garbage entries present (even if count is low) 272 if garbageCount > 0 { 273 return &Match{ 274 Reason: "garbage_aka_entries", 275 Category: "spam", 276 Confidence: 0.98, 277 Note: "Operation contains garbage/random strings in alsoKnownAs", 278 Metadata: map[string]interface{}{ 279 "total_entries": entryCount, 280 "garbage_count": garbageCount, 281 "garbage_examples": garbageExamples, 282 }, 283 }, nil 284 } 285 } 286 287 return nil, nil 288} 289 290// SpamPDSDetector detects known spam PDS endpoints 291type SpamPDSDetector struct { 292 spamEndpoints map[string]bool 293 spamDomains map[string]bool 294} 295 296func NewSpamPDSDetector() *SpamPDSDetector { 297 return &SpamPDSDetector{ 298 spamEndpoints: map[string]bool{ 299 "pds.trump.com": true, 300 // Add more as discovered 301 }, 302 spamDomains: map[string]bool{ 303 "trump.com": true, 304 "donald.trump.com": true, 305 // Add more as discovered 306 }, 307 } 308} 309 310func (d *SpamPDSDetector) Name() string { return "spam_pds" } 311func (d *SpamPDSDetector) Description() string { 312 return "Detects operations using known spam PDS endpoints and fake domain claims" 313} 314func (d *SpamPDSDetector) Version() string { return "1.0.0" } 315 316func (d *SpamPDSDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) { 317 // Parse Operation field on-demand 318 operation, err := op.GetOperationMap() 319 if err != nil { 320 return nil, err 321 } 322 if operation == nil { 323 return nil, nil 324 } 325 // Check PDS endpoint 326 if services, ok := operation["services"].(map[string]interface{}); ok { 327 if pds, ok := services["atproto_pds"].(map[string]interface{}); ok { 328 if endpoint, ok := pds["endpoint"].(string); ok { 329 host := extractHost(endpoint) 330 331 // Check if it's a known spam PDS 332 if d.spamEndpoints[host] { 333 return &Match{ 334 Reason: "spam_pds_endpoint", 335 Category: "spam", 336 Confidence: 0.99, 337 Note: "Operation uses known spam PDS endpoint", 338 Metadata: map[string]interface{}{ 339 "endpoint": endpoint, 340 "host": host, 341 }, 342 }, nil 343 } 344 } 345 } 346 } 347 348 // Check for spam domain claims in alsoKnownAs 349 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok { 350 for _, aka := range alsoKnownAs { 351 if str, ok := aka.(string); ok { 352 if !strings.HasPrefix(str, "at://") { 353 continue 354 } 355 356 // Extract domain from at:// URI 357 domain := strings.TrimPrefix(str, "at://") 358 if idx := strings.Index(domain, "/"); idx > 0 { 359 domain = domain[:idx] 360 } 361 362 // Check if claiming spam domain 363 if d.spamDomains[domain] { 364 return &Match{ 365 Reason: "fake_domain_claim", 366 Category: "impersonation", 367 Confidence: 0.99, 368 Note: "Operation claims known spam/fake domain", 369 Metadata: map[string]interface{}{ 370 "claimed_domain": domain, 371 "handle": str, 372 }, 373 }, nil 374 } 375 376 // Check for subdomain patterns (like jr.donald.trump.com) 377 for spamDomain := range d.spamDomains { 378 if strings.HasSuffix(domain, "."+spamDomain) || domain == spamDomain { 379 return &Match{ 380 Reason: "fake_domain_claim", 381 Category: "impersonation", 382 Confidence: 0.99, 383 Note: "Operation claims domain related to known spam domain", 384 Metadata: map[string]interface{}{ 385 "claimed_domain": domain, 386 "spam_domain": spamDomain, 387 }, 388 }, nil 389 } 390 } 391 } 392 } 393 } 394 395 return nil, nil 396} 397 398// ServiceAbuseDetector detects operations with abused service structures 399type ServiceAbuseDetector struct { 400 maxServiceTypeLength int 401 maxEndpointLength int 402 maxHandleLength int 403} 404 405func NewServiceAbuseDetector() *ServiceAbuseDetector { 406 return &ServiceAbuseDetector{ 407 maxServiceTypeLength: 100, // Normal types are short (e.g., "AtprotoPersonalDataServer") 408 maxEndpointLength: 200, // Normal endpoints are reasonable URLs 409 maxHandleLength: 100, // Normal handles are short 410 } 411} 412 413func (d *ServiceAbuseDetector) Name() string { return "service_abuse" } 414func (d *ServiceAbuseDetector) Description() string { 415 return "Detects operations with abused service structures (random strings, numeric keys)" 416} 417func (d *ServiceAbuseDetector) Version() string { return "1.0.0" } 418 419func (d *ServiceAbuseDetector) Detect(ctx context.Context, op plcclient.PLCOperation) (*Match, error) { 420 // Parse Operation field on-demand 421 operation, err := op.GetOperationMap() 422 if err != nil { 423 return nil, err 424 } 425 if operation == nil { 426 return nil, nil 427 } 428 if services, ok := operation["services"].(map[string]interface{}); ok { 429 // Check for numeric service keys (spam uses "0", "1", "2" instead of proper names) 430 hasNumericKeys := false 431 numericKeyCount := 0 432 433 for key := range services { 434 // Check if key is a digit 435 if len(key) == 1 && key >= "0" && key <= "9" { 436 hasNumericKeys = true 437 numericKeyCount++ 438 } 439 } 440 441 if hasNumericKeys && numericKeyCount > 1 { 442 return &Match{ 443 Reason: "numeric_service_keys", 444 Category: "service_abuse", 445 Confidence: 0.98, 446 Note: "Services use numeric keys instead of proper names", 447 Metadata: map[string]interface{}{ 448 "numeric_key_count": numericKeyCount, 449 }, 450 }, nil 451 } 452 453 // Check each service for abuse patterns 454 for serviceName, serviceData := range services { 455 if serviceMap, ok := serviceData.(map[string]interface{}); ok { 456 // Check service type length 457 if serviceType, ok := serviceMap["type"].(string); ok { 458 if len(serviceType) > d.maxServiceTypeLength { 459 return &Match{ 460 Reason: "excessive_service_type_length", 461 Category: "service_abuse", 462 Confidence: 0.99, 463 Note: "Service type field contains excessively long random string", 464 Metadata: map[string]interface{}{ 465 "service_name": serviceName, 466 "type_length": len(serviceType), 467 "type_preview": serviceType[:50] + "...", 468 }, 469 }, nil 470 } 471 } 472 473 // Check endpoint length 474 if endpoint, ok := serviceMap["endpoint"].(string); ok { 475 if len(endpoint) > d.maxEndpointLength { 476 return &Match{ 477 Reason: "excessive_endpoint_length", 478 Category: "service_abuse", 479 Confidence: 0.99, 480 Note: "Service endpoint contains excessively long random string", 481 Metadata: map[string]interface{}{ 482 "service_name": serviceName, 483 "endpoint_length": len(endpoint), 484 "endpoint_preview": endpoint[:min(100, len(endpoint))] + "...", 485 }, 486 }, nil 487 } 488 } 489 } 490 } 491 } 492 493 // Check for excessively long handles in alsoKnownAs 494 if alsoKnownAs, ok := operation["alsoKnownAs"].([]interface{}); ok { 495 for _, aka := range alsoKnownAs { 496 if str, ok := aka.(string); ok { 497 if strings.HasPrefix(str, "at://") { 498 handle := strings.TrimPrefix(str, "at://") 499 if len(handle) > d.maxHandleLength { 500 return &Match{ 501 Reason: "excessive_handle_length", 502 Category: "service_abuse", 503 Confidence: 0.98, 504 Note: "Handle contains excessively long random string", 505 Metadata: map[string]interface{}{ 506 "handle_length": len(handle), 507 "handle_preview": handle[:min(50, len(handle))] + "...", 508 }, 509 }, nil 510 } 511 } 512 } 513 } 514 } 515 516 // Check for empty verificationMethods (common in this spam) 517 if vm, ok := operation["verificationMethods"].(map[string]interface{}); ok { 518 if len(vm) == 0 { 519 // Empty verificationMethods alone isn't enough, but combined with other signals... 520 // Check if there are other suspicious signals 521 if services, ok := operation["services"].(map[string]interface{}); ok { 522 if len(services) > 2 { 523 // Multiple services + empty verificationMethods = suspicious 524 return &Match{ 525 Reason: "empty_verification_methods", 526 Category: "service_abuse", 527 Confidence: 0.85, 528 Note: "Empty verificationMethods with multiple services", 529 Metadata: map[string]interface{}{ 530 "service_count": len(services), 531 }, 532 }, nil 533 } 534 } 535 } 536 } 537 538 return nil, nil 539} 540 541// Helper function for min 542func min(a, b int) int { 543 if a < b { 544 return a 545 } 546 return b 547} 548 549// Helper functions 550 551func extractHost(endpoint string) string { 552 // Extract host from URL 553 endpoint = strings.TrimPrefix(endpoint, "http://") 554 endpoint = strings.TrimPrefix(endpoint, "https://") 555 if idx := strings.Index(endpoint, "/"); idx > 0 { 556 endpoint = endpoint[:idx] 557 } 558 if idx := strings.Index(endpoint, ":"); idx > 0 { 559 endpoint = endpoint[:idx] 560 } 561 return endpoint 562}