Weighs the soul of incoming HTTP requests to stop AI crawlers

fix: make ogtags and dnsbl use the Store instead of memory (#760)

Signed-off-by: Xe Iaso <me@xeiaso.net>

authored by

Xe Iaso and committed by
GitHub
7d0c58d1 e870ede1

+134 -86
-15
cmd/anubis/main.go
··· 231 return rp, nil 232 } 233 234 - func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) { 235 - ticker := time.NewTicker(1 * time.Hour) 236 - defer ticker.Stop() 237 - 238 - for { 239 - select { 240 - case <-ticker.C: 241 - s.CleanupDecayMap() 242 - case <-ctx.Done(): 243 - return 244 - } 245 - } 246 - } 247 - 248 func main() { 249 flagenv.Parse() 250 flag.Parse() ··· 421 wg.Add(1) 422 go metricsServer(ctx, wg.Done) 423 } 424 - go startDecayMapCleanup(ctx, s) 425 426 var h http.Handler 427 h = s
··· 231 return rp, nil 232 } 233 234 func main() { 235 flagenv.Parse() 236 flag.Parse() ··· 407 wg.Add(1) 408 go metricsServer(ctx, wg.Done) 409 } 410 411 var h http.Handler 412 h = s
+1
docs/docs/CHANGELOG.md
··· 24 - Remove the "Success" interstitial after a proof of work challenge is concluded. 25 - Anubis now has the concept of [storage backends](./admin/policies.mdx#storage-backends). These allow you to change how Anubis stores temporary data (in memory, on the disk, or in Valkey). If you run Anubis in an environment where you have a low amount of memory available for Anubis (eg: less than 64 megabytes), be sure to configure the [`bbolt`](./admin/policies.mdx#bbolt) storage backend. 26 - The challenge issuance and validation process has been rewritten from scratch. Instead of generating challenge strings from request metadata (under the assumption that the values being compared against are stable), Anubis now generates random data for each challenge. This data is stored in the active [storage backend](./admin/policies.mdx#storage-backends) for up to 30 minutes. Fixes [#564](https://github.com/TecharoHQ/anubis/issues/564), [#746](https://github.com/TecharoHQ/anubis/issues/746), and other similar instances of this issue. 27 - Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742)) 28 - Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751)) 29 - Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape
··· 24 - Remove the "Success" interstitial after a proof of work challenge is concluded. 25 - Anubis now has the concept of [storage backends](./admin/policies.mdx#storage-backends). These allow you to change how Anubis stores temporary data (in memory, on the disk, or in Valkey). If you run Anubis in an environment where you have a low amount of memory available for Anubis (eg: less than 64 megabytes), be sure to configure the [`bbolt`](./admin/policies.mdx#bbolt) storage backend. 26 - The challenge issuance and validation process has been rewritten from scratch. Instead of generating challenge strings from request metadata (under the assumption that the values being compared against are stable), Anubis now generates random data for each challenge. This data is stored in the active [storage backend](./admin/policies.mdx#storage-backends) for up to 30 minutes. Fixes [#564](https://github.com/TecharoHQ/anubis/issues/564), [#746](https://github.com/TecharoHQ/anubis/issues/746), and other similar instances of this issue. 27 + - Make the [Open Graph](./admin/configuration/open-graph.mdx) subsystem and DNSBL subsystem use [storage backends](./admin/policies.mdx#storage-backends) instead of storing everything in memory by default. 28 - Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742)) 29 - Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751)) 30 - Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape
+7 -6
internal/ogtags/cache.go
··· 1 package ogtags 2 3 import ( 4 "errors" 5 "log/slog" 6 "net/url" ··· 8 ) 9 10 // GetOGTags is the main function that retrieves Open Graph tags for a URL 11 - func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) { 12 if url == nil { 13 return nil, errors.New("nil URL provided, cannot fetch OG tags") 14 } ··· 21 cacheKey := c.generateCacheKey(target, originalHost) 22 23 // Check cache first 24 - if cachedTags := c.checkCache(cacheKey); cachedTags != nil { 25 return cachedTags, nil 26 } 27 28 // Fetch HTML content, passing the original host 29 - doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey) 30 if errors.Is(err, syscall.ECONNREFUSED) { 31 slog.Debug("Connection refused, returning empty tags") 32 return nil, nil ··· 42 ogTags := c.extractOGTags(doc) 43 44 // Store in cache 45 - c.cache.Set(cacheKey, ogTags, c.ogTimeToLive) 46 47 return ogTags, nil 48 } ··· 59 } 60 61 // checkCache checks if we have the tags cached and returns them if so 62 - func (c *OGTagCache) checkCache(cacheKey string) map[string]string { 63 - if cachedTags, ok := c.cache.Get(cacheKey); ok { 64 slog.Debug("cache hit", "tags", cachedTags) 65 return cachedTags 66 }
··· 1 package ogtags 2 3 import ( 4 + "context" 5 "errors" 6 "log/slog" 7 "net/url" ··· 9 ) 10 11 // GetOGTags is the main function that retrieves Open Graph tags for a URL 12 + func (c *OGTagCache) GetOGTags(ctx context.Context, url *url.URL, originalHost string) (map[string]string, error) { 13 if url == nil { 14 return nil, errors.New("nil URL provided, cannot fetch OG tags") 15 } ··· 22 cacheKey := c.generateCacheKey(target, originalHost) 23 24 // Check cache first 25 + if cachedTags := c.checkCache(ctx, cacheKey); cachedTags != nil { 26 return cachedTags, nil 27 } 28 29 // Fetch HTML content, passing the original host 30 + doc, err := c.fetchHTMLDocumentWithCache(ctx, target, originalHost, cacheKey) 31 if errors.Is(err, syscall.ECONNREFUSED) { 32 slog.Debug("Connection refused, returning empty tags") 33 return nil, nil ··· 43 ogTags := c.extractOGTags(doc) 44 45 // Store in cache 46 + c.cache.Set(ctx, cacheKey, ogTags, c.ogTimeToLive) 47 48 return ogTags, nil 49 } ··· 60 } 61 62 // checkCache checks if we have the tags cached and returns them if so 63 + func (c *OGTagCache) checkCache(ctx context.Context, cacheKey string) map[string]string { 64 + if cachedTags, err := c.cache.Get(ctx, cacheKey); err == nil { 65 slog.Debug("cache hit", "tags", cachedTags) 66 return cachedTags 67 }
+13 -12
internal/ogtags/cache_test.go
··· 9 "time" 10 11 "github.com/TecharoHQ/anubis/lib/policy/config" 12 ) 13 14 func TestCacheReturnsDefault(t *testing.T) { ··· 21 TimeToLive: time.Minute, 22 ConsiderHost: false, 23 Override: want, 24 - }) 25 26 u, err := url.Parse("https://anubis.techaro.lol") 27 if err != nil { 28 t.Fatal(err) 29 } 30 31 - result, err := cache.GetOGTags(u, "anubis.techaro.lol") 32 if err != nil { 33 t.Fatal(err) 34 } ··· 49 Enabled: true, 50 TimeToLive: time.Minute, 51 ConsiderHost: false, 52 - }) 53 54 // Set up test data 55 urlStr := "http://example.com/page" ··· 60 cacheKey := cache.generateCacheKey(urlStr, "example.com") 61 62 // Test cache miss 63 - tags := cache.checkCache(cacheKey) 64 if tags != nil { 65 t.Errorf("expected nil tags on cache miss, got %v", tags) 66 } 67 68 // Manually add to cache 69 - cache.cache.Set(cacheKey, expectedTags, time.Minute) 70 71 // Test cache hit 72 - tags = cache.checkCache(cacheKey) 73 if tags == nil { 74 t.Fatal("expected non-nil tags on cache hit, got nil") 75 } ··· 112 Enabled: true, 113 TimeToLive: time.Minute, 114 ConsiderHost: false, 115 - }) 116 117 // Parse the test server URL 118 parsedURL, err := url.Parse(ts.URL) ··· 122 123 // Test fetching OG tags from the test server 124 // Pass the host from the parsed test server URL 125 - ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host) 126 if err != nil { 127 t.Fatalf("failed to get OG tags: %v", err) 128 } ··· 142 143 // Test fetching OG tags from the cache 144 // Pass the host from the parsed test server URL 145 - ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host) 146 if err != nil { 147 t.Fatalf("failed to get OG tags from cache: %v", err) 148 } 149 150 // Test fetching OG tags from the cache (3rd time) 151 // Pass the host from the parsed test server URL 152 - newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host) 153 if err != nil { 154 t.Fatalf("failed to get OG tags from cache: %v", err) 155 } ··· 263 Enabled: true, 264 TimeToLive: time.Minute, 265 ConsiderHost: tc.ogCacheConsiderHost, 266 - }) 267 268 for i, req := range tc.requests { 269 - ogTags, err := cache.GetOGTags(parsedURL, req.host) 270 if err != nil { 271 t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err) 272 continue // Skip further checks for this request if error occurred
··· 9 "time" 10 11 "github.com/TecharoHQ/anubis/lib/policy/config" 12 + "github.com/TecharoHQ/anubis/lib/store/memory" 13 ) 14 15 func TestCacheReturnsDefault(t *testing.T) { ··· 22 TimeToLive: time.Minute, 23 ConsiderHost: false, 24 Override: want, 25 + }, memory.New(t.Context())) 26 27 u, err := url.Parse("https://anubis.techaro.lol") 28 if err != nil { 29 t.Fatal(err) 30 } 31 32 + result, err := cache.GetOGTags(t.Context(), u, "anubis.techaro.lol") 33 if err != nil { 34 t.Fatal(err) 35 } ··· 50 Enabled: true, 51 TimeToLive: time.Minute, 52 ConsiderHost: false, 53 + }, memory.New(t.Context())) 54 55 // Set up test data 56 urlStr := "http://example.com/page" ··· 61 cacheKey := cache.generateCacheKey(urlStr, "example.com") 62 63 // Test cache miss 64 + tags := cache.checkCache(t.Context(), cacheKey) 65 if tags != nil { 66 t.Errorf("expected nil tags on cache miss, got %v", tags) 67 } 68 69 // Manually add to cache 70 + cache.cache.Set(t.Context(), cacheKey, expectedTags, time.Minute) 71 72 // Test cache hit 73 + tags = cache.checkCache(t.Context(), cacheKey) 74 if tags == nil { 75 t.Fatal("expected non-nil tags on cache hit, got nil") 76 } ··· 113 Enabled: true, 114 TimeToLive: time.Minute, 115 ConsiderHost: false, 116 + }, memory.New(t.Context())) 117 118 // Parse the test server URL 119 parsedURL, err := url.Parse(ts.URL) ··· 123 124 // Test fetching OG tags from the test server 125 // Pass the host from the parsed test server URL 126 + ogTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) 127 if err != nil { 128 t.Fatalf("failed to get OG tags: %v", err) 129 } ··· 143 144 // Test fetching OG tags from the cache 145 // Pass the host from the parsed test server URL 146 + ogTags, err = cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) 147 if err != nil { 148 t.Fatalf("failed to get OG tags from cache: %v", err) 149 } 150 151 // Test fetching OG tags from the cache (3rd time) 152 // Pass the host from the parsed test server URL 153 + newOgTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) 154 if err != nil { 155 t.Fatalf("failed to get OG tags from cache: %v", err) 156 } ··· 264 Enabled: true, 265 TimeToLive: time.Minute, 266 ConsiderHost: tc.ogCacheConsiderHost, 267 + }, memory.New(t.Context())) 268 269 for i, req := range tc.requests { 270 + ogTags, err := cache.GetOGTags(t.Context(), parsedURL, req.host) 271 if err != nil { 272 t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err) 273 continue // Skip further checks for this request if error occurred
+4 -4
internal/ogtags/fetch.go
··· 20 21 // fetchHTMLDocumentWithCache fetches the HTML document from the given URL string, 22 // preserving the original host header. 23 - func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) { 24 - req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil) 25 if err != nil { 26 return nil, fmt.Errorf("failed to create http request: %w", err) 27 } ··· 41 var netErr net.Error 42 if errors.As(err, &netErr) && netErr.Timeout() { 43 slog.Debug("og: request timed out", "url", urlStr) 44 - c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server 45 } 46 return nil, fmt.Errorf("http get failed: %w", err) 47 } ··· 56 57 if resp.StatusCode != http.StatusOK { 58 slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode) 59 - c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes 60 return nil, fmt.Errorf("%w: page not found", ErrOgHandled) 61 } 62
··· 20 21 // fetchHTMLDocumentWithCache fetches the HTML document from the given URL string, 22 // preserving the original host header. 23 + func (c *OGTagCache) fetchHTMLDocumentWithCache(ctx context.Context, urlStr string, originalHost string, cacheKey string) (*html.Node, error) { 24 + req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil) 25 if err != nil { 26 return nil, fmt.Errorf("failed to create http request: %w", err) 27 } ··· 41 var netErr net.Error 42 if errors.As(err, &netErr) && netErr.Timeout() { 43 slog.Debug("og: request timed out", "url", urlStr) 44 + c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server 45 } 46 return nil, fmt.Errorf("http get failed: %w", err) 47 } ··· 56 57 if resp.StatusCode != http.StatusOK { 58 slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode) 59 + c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes 60 return nil, fmt.Errorf("%w: page not found", ErrOgHandled) 61 } 62
+8 -6
internal/ogtags/fetch_test.go
··· 1 package ogtags 2 3 import ( 4 "fmt" 5 "io" 6 "net/http" ··· 11 "time" 12 13 "github.com/TecharoHQ/anubis/lib/policy/config" 14 "golang.org/x/net/html" 15 ) 16 ··· 85 Enabled: true, 86 TimeToLive: time.Minute, 87 ConsiderHost: false, 88 - }) 89 - doc, err := cache.fetchHTMLDocument(ts.URL, "anything") 90 91 if tt.expectError { 92 if err == nil { ··· 116 Enabled: true, 117 TimeToLive: time.Minute, 118 ConsiderHost: false, 119 - }) 120 121 - doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything") 122 123 if err == nil { 124 t.Error("expected error for invalid URL, got nil") ··· 130 } 131 132 // fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call 133 - func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) { 134 cacheKey := c.generateCacheKey(urlStr, originalHost) 135 - return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey) 136 }
··· 1 package ogtags 2 3 import ( 4 + "context" 5 "fmt" 6 "io" 7 "net/http" ··· 12 "time" 13 14 "github.com/TecharoHQ/anubis/lib/policy/config" 15 + "github.com/TecharoHQ/anubis/lib/store/memory" 16 "golang.org/x/net/html" 17 ) 18 ··· 87 Enabled: true, 88 TimeToLive: time.Minute, 89 ConsiderHost: false, 90 + }, memory.New(t.Context())) 91 + doc, err := cache.fetchHTMLDocument(t.Context(), ts.URL, "anything") 92 93 if tt.expectError { 94 if err == nil { ··· 118 Enabled: true, 119 TimeToLive: time.Minute, 120 ConsiderHost: false, 121 + }, memory.New(t.Context())) 122 123 + doc, err := cache.fetchHTMLDocument(t.Context(), "http://invalid.url.that.doesnt.exist.example", "anything") 124 125 if err == nil { 126 t.Error("expected error for invalid URL, got nil") ··· 132 } 133 134 // fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call 135 + func (c *OGTagCache) fetchHTMLDocument(ctx context.Context, urlStr string, originalHost string) (*html.Node, error) { 136 cacheKey := c.generateCacheKey(urlStr, originalHost) 137 + return c.fetchHTMLDocumentWithCache(ctx, urlStr, originalHost, cacheKey) 138 }
+4 -3
internal/ogtags/integration_test.go
··· 8 "time" 9 10 "github.com/TecharoHQ/anubis/lib/policy/config" 11 ) 12 13 func TestIntegrationGetOGTags(t *testing.T) { ··· 110 Enabled: true, 111 TimeToLive: time.Minute, 112 ConsiderHost: false, 113 - }) 114 115 // Create URL for test 116 testURL, _ := url.Parse(ts.URL) ··· 119 120 // Get OG tags 121 // Pass the host from the test URL 122 - ogTags, err := cache.GetOGTags(testURL, testURL.Host) 123 124 // Check error expectation 125 if tc.expectError { ··· 147 148 // Test cache retrieval 149 // Pass the host from the test URL 150 - cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host) 151 if err != nil { 152 t.Fatalf("failed to get OG tags from cache: %v", err) 153 }
··· 8 "time" 9 10 "github.com/TecharoHQ/anubis/lib/policy/config" 11 + "github.com/TecharoHQ/anubis/lib/store/memory" 12 ) 13 14 func TestIntegrationGetOGTags(t *testing.T) { ··· 111 Enabled: true, 112 TimeToLive: time.Minute, 113 ConsiderHost: false, 114 + }, memory.New(t.Context())) 115 116 // Create URL for test 117 testURL, _ := url.Parse(ts.URL) ··· 120 121 // Get OG tags 122 // Pass the host from the test URL 123 + ogTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host) 124 125 // Check error expectation 126 if tc.expectError { ··· 148 149 // Test cache retrieval 150 // Pass the host from the test URL 151 + cachedOGTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host) 152 if err != nil { 153 t.Fatalf("failed to get OG tags from cache: %v", err) 154 }
+4 -3
internal/ogtags/mem_test.go
··· 7 "testing" 8 9 "github.com/TecharoHQ/anubis/lib/policy/config" 10 "golang.org/x/net/html" 11 ) 12 ··· 30 31 for _, tt := range tests { 32 b.Run(tt.name, func(b *testing.B) { 33 - cache := NewOGTagCache(tt.target, config.OpenGraph{}) 34 urls := make([]*url.URL, len(tt.paths)) 35 for i, path := range tt.paths { 36 u, _ := url.Parse(path) ··· 66 </head><body><div><p>Content</p></div></body></html>`, 67 } 68 69 - cache := NewOGTagCache("http://example.com", config.OpenGraph{}) 70 docs := make([]*html.Node, len(htmlSamples)) 71 72 for i, sample := range htmlSamples { ··· 84 85 // Memory usage test 86 func TestMemoryUsage(t *testing.T) { 87 - cache := NewOGTagCache("http://example.com", config.OpenGraph{}) 88 89 // Force GC and wait for it to complete 90 runtime.GC()
··· 7 "testing" 8 9 "github.com/TecharoHQ/anubis/lib/policy/config" 10 + "github.com/TecharoHQ/anubis/lib/store/memory" 11 "golang.org/x/net/html" 12 ) 13 ··· 31 32 for _, tt := range tests { 33 b.Run(tt.name, func(b *testing.B) { 34 + cache := NewOGTagCache(tt.target, config.OpenGraph{}, memory.New(b.Context())) 35 urls := make([]*url.URL, len(tt.paths)) 36 for i, path := range tt.paths { 37 u, _ := url.Parse(path) ··· 67 </head><body><div><p>Content</p></div></body></html>`, 68 } 69 70 + cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(b.Context())) 71 docs := make([]*html.Node, len(htmlSamples)) 72 73 for i, sample := range htmlSamples { ··· 85 86 // Memory usage test 87 func TestMemoryUsage(t *testing.T) { 88 + cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(t.Context())) 89 90 // Force GC and wait for it to complete 91 runtime.GC()
+7 -10
internal/ogtags/ogtags.go
··· 9 "strings" 10 "time" 11 12 - "github.com/TecharoHQ/anubis/decaymap" 13 "github.com/TecharoHQ/anubis/lib/policy/config" 14 ) 15 16 const ( ··· 22 ) 23 24 type OGTagCache struct { 25 - cache *decaymap.Impl[string, map[string]string] 26 targetURL *url.URL 27 client *http.Client 28 ··· 36 ogOverride map[string]string 37 } 38 39 - func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache { 40 // Predefined approved tags and prefixes 41 defaultApprovedTags := []string{"description", "keywords", "author"} 42 defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"} ··· 77 } 78 79 return &OGTagCache{ 80 - cache: decaymap.New[string, map[string]string](), 81 targetURL: parsedTargetURL, 82 ogPassthrough: conf.Enabled, 83 ogTimeToLive: conf.TimeToLive, ··· 124 125 return sb.String() 126 } 127 - 128 - func (c *OGTagCache) Cleanup() { 129 - if c.cache != nil { 130 - c.cache.Cleanup() 131 - } 132 - }
··· 9 "strings" 10 "time" 11 12 "github.com/TecharoHQ/anubis/lib/policy/config" 13 + "github.com/TecharoHQ/anubis/lib/store" 14 ) 15 16 const ( ··· 22 ) 23 24 type OGTagCache struct { 25 + cache store.JSON[map[string]string] 26 targetURL *url.URL 27 client *http.Client 28 ··· 36 ogOverride map[string]string 37 } 38 39 + func NewOGTagCache(target string, conf config.OpenGraph, backend store.Interface) *OGTagCache { 40 // Predefined approved tags and prefixes 41 defaultApprovedTags := []string{"description", "keywords", "author"} 42 defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"} ··· 77 } 78 79 return &OGTagCache{ 80 + cache: store.JSON[map[string]string]{ 81 + Underlying: backend, 82 + Prefix: "ogtags:", 83 + }, 84 targetURL: parsedTargetURL, 85 ogPassthrough: conf.Enabled, 86 ogTimeToLive: conf.TimeToLive, ··· 127 128 return sb.String() 129 }
+7 -5
internal/ogtags/ogtags_fuzz_test.go
··· 1 package ogtags 2 3 import ( 4 "net/url" 5 "strings" 6 "testing" 7 "unicode/utf8" 8 9 "github.com/TecharoHQ/anubis/lib/policy/config" 10 "golang.org/x/net/html" 11 ) 12 ··· 46 } 47 48 // Create cache - should not panic 49 - cache := NewOGTagCache(target, config.OpenGraph{}) 50 51 // Create URL 52 u := &url.URL{ ··· 130 return 131 } 132 133 - cache := NewOGTagCache("http://example.com", config.OpenGraph{}) 134 135 // Should not panic 136 tags := cache.extractOGTags(doc) ··· 186 t.Skip() 187 } 188 189 - cache := NewOGTagCache(target, config.OpenGraph{}) 190 u := &url.URL{Path: path, RawQuery: query} 191 192 result := cache.getTarget(u) ··· 243 }, 244 } 245 246 - cache := NewOGTagCache("http://example.com", config.OpenGraph{}) 247 248 // Should not panic 249 property, content := cache.extractMetaTagInfo(node) ··· 296 297 for _, input := range inputs { 298 b.Run(input.name, func(b *testing.B) { 299 - cache := NewOGTagCache(input.target, config.OpenGraph{}) 300 u := &url.URL{Path: input.path, RawQuery: input.query} 301 302 b.ResetTimer()
··· 1 package ogtags 2 3 import ( 4 + "context" 5 "net/url" 6 "strings" 7 "testing" 8 "unicode/utf8" 9 10 "github.com/TecharoHQ/anubis/lib/policy/config" 11 + "github.com/TecharoHQ/anubis/lib/store/memory" 12 "golang.org/x/net/html" 13 ) 14 ··· 48 } 49 50 // Create cache - should not panic 51 + cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background())) 52 53 // Create URL 54 u := &url.URL{ ··· 132 return 133 } 134 135 + cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background())) 136 137 // Should not panic 138 tags := cache.extractOGTags(doc) ··· 188 t.Skip() 189 } 190 191 + cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background())) 192 u := &url.URL{Path: path, RawQuery: query} 193 194 result := cache.getTarget(u) ··· 245 }, 246 } 247 248 + cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background())) 249 250 // Should not panic 251 property, content := cache.extractMetaTagInfo(node) ··· 298 299 for _, input := range inputs { 300 b.Run(input.name, func(b *testing.B) { 301 + cache := NewOGTagCache(input.target, config.OpenGraph{}, memory.New(context.Background())) 302 u := &url.URL{Path: input.path, RawQuery: input.query} 303 304 b.ResetTimer()
+7 -6
internal/ogtags/ogtags_test.go
··· 15 "time" 16 17 "github.com/TecharoHQ/anubis/lib/policy/config" 18 ) 19 20 func TestNewOGTagCache(t *testing.T) { ··· 44 Enabled: tt.ogPassthrough, 45 TimeToLive: tt.ogTimeToLive, 46 ConsiderHost: false, 47 - }) 48 49 if cache == nil { 50 t.Fatal("expected non-nil cache, got nil") ··· 84 Enabled: true, 85 TimeToLive: 5 * time.Minute, 86 ConsiderHost: false, 87 - }) 88 89 if cache == nil { 90 t.Fatal("expected non-nil cache, got nil") ··· 169 Enabled: true, 170 TimeToLive: time.Minute, 171 ConsiderHost: false, 172 - }) 173 174 u := &url.URL{ 175 Path: tt.path, ··· 242 Enabled: true, 243 TimeToLive: time.Minute, 244 ConsiderHost: false, 245 - }) 246 247 // Create a dummy URL for the request (path and query matter) 248 testReqURL, _ := url.Parse("/some/page?query=1") 249 250 // Get OG tags 251 // Pass an empty string for host, as it's irrelevant for unix sockets 252 - ogTags, err := cache.GetOGTags(testReqURL, "") 253 254 if err != nil { 255 t.Fatalf("GetOGTags failed for unix socket: %v", err) ··· 265 266 // Test cache retrieval (should hit cache) 267 // Pass an empty string for host 268 - cachedTags, err := cache.GetOGTags(testReqURL, "") 269 if err != nil { 270 t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err) 271 }
··· 15 "time" 16 17 "github.com/TecharoHQ/anubis/lib/policy/config" 18 + "github.com/TecharoHQ/anubis/lib/store/memory" 19 ) 20 21 func TestNewOGTagCache(t *testing.T) { ··· 45 Enabled: tt.ogPassthrough, 46 TimeToLive: tt.ogTimeToLive, 47 ConsiderHost: false, 48 + }, memory.New(t.Context())) 49 50 if cache == nil { 51 t.Fatal("expected non-nil cache, got nil") ··· 85 Enabled: true, 86 TimeToLive: 5 * time.Minute, 87 ConsiderHost: false, 88 + }, memory.New(t.Context())) 89 90 if cache == nil { 91 t.Fatal("expected non-nil cache, got nil") ··· 170 Enabled: true, 171 TimeToLive: time.Minute, 172 ConsiderHost: false, 173 + }, memory.New(t.Context())) 174 175 u := &url.URL{ 176 Path: tt.path, ··· 243 Enabled: true, 244 TimeToLive: time.Minute, 245 ConsiderHost: false, 246 + }, memory.New(t.Context())) 247 248 // Create a dummy URL for the request (path and query matter) 249 testReqURL, _ := url.Parse("/some/page?query=1") 250 251 // Get OG tags 252 // Pass an empty string for host, as it's irrelevant for unix sockets 253 + ogTags, err := cache.GetOGTags(t.Context(), testReqURL, "") 254 255 if err != nil { 256 t.Fatalf("GetOGTags failed for unix socket: %v", err) ··· 266 267 // Test cache retrieval (should hit cache) 268 // Pass an empty string for host 269 + cachedTags, err := cache.GetOGTags(t.Context(), testReqURL, "") 270 if err != nil { 271 t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err) 272 }
+3 -2
internal/ogtags/parse_test.go
··· 7 "time" 8 9 "github.com/TecharoHQ/anubis/lib/policy/config" 10 "golang.org/x/net/html" 11 ) 12 ··· 17 Enabled: false, 18 ConsiderHost: false, 19 TimeToLive: time.Minute, 20 - }) 21 // Manually set approved tags/prefixes based on the user request for clarity 22 testCache.approvedTags = []string{"description"} 23 testCache.approvedPrefixes = []string{"og:"} ··· 198 Enabled: false, 199 ConsiderHost: false, 200 TimeToLive: time.Minute, 201 - }) 202 testCache.approvedTags = []string{"description"} 203 testCache.approvedPrefixes = []string{"og:"} 204
··· 7 "time" 8 9 "github.com/TecharoHQ/anubis/lib/policy/config" 10 + "github.com/TecharoHQ/anubis/lib/store/memory" 11 "golang.org/x/net/html" 12 ) 13 ··· 18 Enabled: false, 19 ConsiderHost: false, 20 TimeToLive: time.Minute, 21 + }, memory.New(t.Context())) 22 // Manually set approved tags/prefixes based on the user request for clarity 23 testCache.approvedTags = []string{"description"} 24 testCache.approvedPrefixes = []string{"og:"} ··· 199 Enabled: false, 200 ConsiderHost: false, 201 TimeToLive: time.Minute, 202 + }, memory.New(t.Context())) 203 testCache.approvedTags = []string{"description"} 204 testCache.approvedPrefixes = []string{"og:"} 205
+4 -9
lib/anubis.go
··· 70 next http.Handler 71 mux *http.ServeMux 72 policy *policy.ParsedConfig 73 - DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse] 74 OGTags *ogtags.OGTagCache 75 ed25519Priv ed25519.PrivateKey 76 hs512Secret []byte ··· 279 } 280 281 func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string, lg *slog.Logger) bool { 282 if s.policy.DNSBL && ip != "" { 283 - resp, ok := s.DNSBLCache.Get(ip) 284 - if !ok { 285 lg.Debug("looking up ip in dnsbl") 286 resp, err := dnsbl.Lookup(ip) 287 if err != nil { 288 lg.Error("can't look up ip in dnsbl", "err", err) 289 } 290 - s.DNSBLCache.Set(ip, resp, 24*time.Hour) 291 droneBLHits.WithLabelValues(resp.String()).Inc() 292 } 293 ··· 551 Rules: &checker.List{}, 552 }, nil 553 } 554 - 555 - func (s *Server) CleanupDecayMap() { 556 - s.DNSBLCache.Cleanup() 557 - s.OGTags.Cleanup() 558 - }
··· 70 next http.Handler 71 mux *http.ServeMux 72 policy *policy.ParsedConfig 73 OGTags *ogtags.OGTagCache 74 ed25519Priv ed25519.PrivateKey 75 hs512Secret []byte ··· 278 } 279 280 func (s *Server) handleDNSBL(w http.ResponseWriter, r *http.Request, ip string, lg *slog.Logger) bool { 281 + db := &store.JSON[dnsbl.DroneBLResponse]{Underlying: s.store, Prefix: "dronebl:"} 282 if s.policy.DNSBL && ip != "" { 283 + resp, err := db.Get(r.Context(), ip) 284 + if err != nil { 285 lg.Debug("looking up ip in dnsbl") 286 resp, err := dnsbl.Lookup(ip) 287 if err != nil { 288 lg.Error("can't look up ip in dnsbl", "err", err) 289 } 290 + db.Set(r.Context(), ip, resp, 24*time.Hour) 291 droneBLHits.WithLabelValues(resp.String()).Inc() 292 } 293 ··· 551 Rules: &checker.List{}, 552 }, nil 553 }
+1 -4
lib/config.go
··· 15 16 "github.com/TecharoHQ/anubis" 17 "github.com/TecharoHQ/anubis/data" 18 - "github.com/TecharoHQ/anubis/decaymap" 19 "github.com/TecharoHQ/anubis/internal" 20 - "github.com/TecharoHQ/anubis/internal/dnsbl" 21 "github.com/TecharoHQ/anubis/internal/ogtags" 22 "github.com/TecharoHQ/anubis/lib/challenge" 23 "github.com/TecharoHQ/anubis/lib/localization" ··· 108 hs512Secret: opts.HS512Secret, 109 policy: opts.Policy, 110 opts: opts, 111 - DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](), 112 - OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph), 113 store: opts.Policy.Store, 114 } 115
··· 15 16 "github.com/TecharoHQ/anubis" 17 "github.com/TecharoHQ/anubis/data" 18 "github.com/TecharoHQ/anubis/internal" 19 "github.com/TecharoHQ/anubis/internal/ogtags" 20 "github.com/TecharoHQ/anubis/lib/challenge" 21 "github.com/TecharoHQ/anubis/lib/localization" ··· 106 hs512Secret: opts.HS512Secret, 107 policy: opts.Policy, 108 opts: opts, 109 + OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph, opts.Policy.Store), 110 store: opts.Policy.Store, 111 } 112
+1 -1
lib/http.go
··· 138 var ogTags map[string]string = nil 139 if s.opts.OpenGraph.Enabled { 140 var err error 141 - ogTags, err = s.OGTags.GetOGTags(r.URL, r.Host) 142 if err != nil { 143 lg.Error("failed to get OG tags", "err", err) 144 }
··· 138 var ogTags map[string]string = nil 139 if s.opts.OpenGraph.Enabled { 140 var err error 141 + ogTags, err = s.OGTags.GetOGTags(r.Context(), r.URL, r.Host) 142 if err != nil { 143 lg.Error("failed to get OG tags", "err", err) 144 }
+13
lib/store/interface.go
··· 43 44 type JSON[T any] struct { 45 Underlying Interface 46 } 47 48 func (j *JSON[T]) Delete(ctx context.Context, key string) error { 49 return j.Underlying.Delete(ctx, key) 50 } 51 52 func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) { 53 data, err := j.Underlying.Get(ctx, key) 54 if err != nil { 55 return z[T](), err ··· 64 } 65 66 func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Duration) error { 67 data, err := json.Marshal(value) 68 if err != nil { 69 return fmt.Errorf("%w: %w", ErrCantEncode, err)
··· 43 44 type JSON[T any] struct { 45 Underlying Interface 46 + Prefix string 47 } 48 49 func (j *JSON[T]) Delete(ctx context.Context, key string) error { 50 + if j.Prefix != "" { 51 + key = j.Prefix + key 52 + } 53 + 54 return j.Underlying.Delete(ctx, key) 55 } 56 57 func (j *JSON[T]) Get(ctx context.Context, key string) (T, error) { 58 + if j.Prefix != "" { 59 + key = j.Prefix + key 60 + } 61 + 62 data, err := j.Underlying.Get(ctx, key) 63 if err != nil { 64 return z[T](), err ··· 73 } 74 75 func (j *JSON[T]) Set(ctx context.Context, key string, value T, expiry time.Duration) error { 76 + if j.Prefix != "" { 77 + key = j.Prefix + key 78 + } 79 + 80 data, err := json.Marshal(value) 81 if err != nil { 82 return fmt.Errorf("%w: %w", ErrCantEncode, err)
+50
lib/store/json_test.go
···
··· 1 + package store_test 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + 7 + "github.com/TecharoHQ/anubis/lib/store" 8 + "github.com/TecharoHQ/anubis/lib/store/memory" 9 + ) 10 + 11 + func TestJSON(t *testing.T) { 12 + type data struct { 13 + ID string `json:"id"` 14 + } 15 + 16 + st := memory.New(t.Context()) 17 + db := store.JSON[data]{ 18 + Underlying: st, 19 + Prefix: "foo:", 20 + } 21 + 22 + if err := db.Set(t.Context(), "test", data{ID: t.Name()}, time.Minute); err != nil { 23 + t.Fatal(err) 24 + } 25 + 26 + got, err := db.Get(t.Context(), "test") 27 + if err != nil { 28 + t.Fatal(err) 29 + } 30 + 31 + if got.ID != t.Name() { 32 + t.Fatalf("got wrong data for key \"test\", wanted %q but got: %q", t.Name(), got.ID) 33 + } 34 + 35 + if err := db.Delete(t.Context(), "test"); err != nil { 36 + t.Fatal(err) 37 + } 38 + 39 + if _, err := db.Get(t.Context(), "test"); err == nil { 40 + t.Fatal("wanted invalid get to fail, it did not") 41 + } 42 + 43 + if err := st.Set(t.Context(), "foo:test", []byte("}"), time.Minute); err != nil { 44 + t.Fatal(err) 45 + } 46 + 47 + if _, err := db.Get(t.Context(), "test"); err == nil { 48 + t.Fatal("wanted invalid get to fail, it did not") 49 + } 50 + }