A community based topic aggregation platform built on atproto

feat: add unfurl cache infrastructure for URL metadata persistence

Add PostgreSQL-backed cache for oEmbed and OpenGraph unfurl results to reduce
external API calls and improve performance.

**Database Layer:**
- Migration 017: Create unfurl_cache table with JSONB metadata storage
- Index on expires_at for efficient TTL-based cleanup
- Store provider, metadata, and thumbnail_url with expiration

**Repository Layer:**
- Repository interface with Get/Set operations
- PostgreSQL implementation with JSON marshaling
- Automatic TTL handling via PostgreSQL intervals
- Returns nil on cache miss (not an error)

**Error Types:**
- ErrNotFound: Cache miss or expired entry
- ErrInvalidURL: Invalid URL format
- ErrInvalidTTL: Non-positive TTL value

Design decisions:
- JSONB metadata column for flexible schema evolution
- Separate thumbnail_url for potential query optimization
- ON CONFLICT for upsert behavior (update on re-fetch)
- TTL-based expiration (default: 24 hours)

Part of URL unfurling feature to auto-populate external embeds with rich
metadata from supported providers (Streamable, YouTube, Reddit, Kagi, etc.).

Related: Circuit breaker pattern prevents cascading failures when providers
go down (already implemented in previous commits).

+173
+14
internal/core/unfurl/errors.go
··· 1 + package unfurl 2 + 3 + import "errors" 4 + 5 + var ( 6 + // ErrNotFound is returned when an unfurl cache entry is not found or has expired 7 + ErrNotFound = errors.New("unfurl cache entry not found or expired") 8 + 9 + // ErrInvalidURL is returned when the provided URL is invalid 10 + ErrInvalidURL = errors.New("invalid URL") 11 + 12 + // ErrInvalidTTL is returned when the provided TTL is invalid (e.g., negative or zero) 13 + ErrInvalidTTL = errors.New("invalid TTL: must be positive") 14 + )
+19
internal/core/unfurl/interfaces.go
··· 1 + package unfurl 2 + 3 + import ( 4 + "context" 5 + "time" 6 + ) 7 + 8 + // Repository defines the interface for unfurl cache persistence 9 + type Repository interface { 10 + // Get retrieves a cached unfurl result for the given URL. 11 + // Returns nil, nil if not found or expired (not an error condition). 12 + // Returns error only on database failures. 13 + Get(ctx context.Context, url string) (*UnfurlResult, error) 14 + 15 + // Set stores an unfurl result in the cache with the specified TTL. 16 + // If an entry already exists for the URL, it will be updated. 17 + // The expires_at is calculated as NOW() + ttl. 18 + Set(ctx context.Context, url string, result *UnfurlResult, ttl time.Duration) error 19 + }
+117
internal/core/unfurl/repository.go
··· 1 + package unfurl 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "encoding/json" 7 + "fmt" 8 + "time" 9 + ) 10 + 11 + type postgresUnfurlRepo struct { 12 + db *sql.DB 13 + } 14 + 15 + // NewRepository creates a new PostgreSQL unfurl cache repository 16 + func NewRepository(db *sql.DB) Repository { 17 + return &postgresUnfurlRepo{db: db} 18 + } 19 + 20 + // Get retrieves a cached unfurl result for the given URL. 21 + // Returns nil, nil if not found or expired (not an error condition). 22 + // Returns error only on database failures. 23 + func (r *postgresUnfurlRepo) Get(ctx context.Context, url string) (*UnfurlResult, error) { 24 + query := ` 25 + SELECT metadata, thumbnail_url, provider 26 + FROM unfurl_cache 27 + WHERE url = $1 AND expires_at > NOW() 28 + ` 29 + 30 + var metadataJSON []byte 31 + var thumbnailURL sql.NullString 32 + var provider string 33 + 34 + err := r.db.QueryRowContext(ctx, query, url).Scan(&metadataJSON, &thumbnailURL, &provider) 35 + if err == sql.ErrNoRows { 36 + // Not found or expired is not an error 37 + return nil, nil 38 + } 39 + if err != nil { 40 + return nil, fmt.Errorf("failed to get unfurl cache entry: %w", err) 41 + } 42 + 43 + // Unmarshal metadata JSONB to UnfurlResult 44 + var result UnfurlResult 45 + if err := json.Unmarshal(metadataJSON, &result); err != nil { 46 + return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) 47 + } 48 + 49 + // Ensure provider and thumbnailURL are set (may not be in metadata JSON) 50 + result.Provider = provider 51 + if thumbnailURL.Valid { 52 + result.ThumbnailURL = thumbnailURL.String 53 + } 54 + 55 + return &result, nil 56 + } 57 + 58 + // Set stores an unfurl result in the cache with the specified TTL. 59 + // If an entry already exists for the URL, it will be updated. 60 + // The expires_at is calculated as NOW() + ttl. 61 + func (r *postgresUnfurlRepo) Set(ctx context.Context, url string, result *UnfurlResult, ttl time.Duration) error { 62 + // Marshal UnfurlResult to JSON for metadata column 63 + metadataJSON, err := json.Marshal(result) 64 + if err != nil { 65 + return fmt.Errorf("failed to marshal metadata: %w", err) 66 + } 67 + 68 + // Store thumbnail_url separately for potential queries 69 + var thumbnailURL sql.NullString 70 + if result.ThumbnailURL != "" { 71 + thumbnailURL.String = result.ThumbnailURL 72 + thumbnailURL.Valid = true 73 + } 74 + 75 + // Convert Go duration to PostgreSQL interval string 76 + // e.g., "1 hour", "24 hours", "7 days" 77 + intervalStr := formatInterval(ttl) 78 + 79 + query := ` 80 + INSERT INTO unfurl_cache (url, provider, metadata, thumbnail_url, expires_at) 81 + VALUES ($1, $2, $3, $4, NOW() + $5::interval) 82 + ON CONFLICT (url) DO UPDATE 83 + SET provider = EXCLUDED.provider, 84 + metadata = EXCLUDED.metadata, 85 + thumbnail_url = EXCLUDED.thumbnail_url, 86 + expires_at = EXCLUDED.expires_at, 87 + fetched_at = NOW() 88 + ` 89 + 90 + _, err = r.db.ExecContext(ctx, query, url, result.Provider, metadataJSON, thumbnailURL, intervalStr) 91 + if err != nil { 92 + return fmt.Errorf("failed to insert/update unfurl cache entry: %w", err) 93 + } 94 + 95 + return nil 96 + } 97 + 98 + // formatInterval converts a Go duration to a PostgreSQL interval string 99 + // PostgreSQL accepts intervals like "1 hour", "24 hours", "7 days" 100 + func formatInterval(d time.Duration) string { 101 + seconds := int64(d.Seconds()) 102 + 103 + // Convert to appropriate unit for readability 104 + switch { 105 + case seconds >= 86400: // >= 1 day 106 + days := seconds / 86400 107 + return fmt.Sprintf("%d days", days) 108 + case seconds >= 3600: // >= 1 hour 109 + hours := seconds / 3600 110 + return fmt.Sprintf("%d hours", hours) 111 + case seconds >= 60: // >= 1 minute 112 + minutes := seconds / 60 113 + return fmt.Sprintf("%d minutes", minutes) 114 + default: 115 + return fmt.Sprintf("%d seconds", seconds) 116 + } 117 + }
+23
internal/db/migrations/017_create_unfurl_cache.sql
··· 1 + -- +goose Up 2 + CREATE TABLE unfurl_cache ( 3 + url TEXT PRIMARY KEY, 4 + provider TEXT NOT NULL, 5 + metadata JSONB NOT NULL, 6 + thumbnail_url TEXT, 7 + fetched_at TIMESTAMP NOT NULL DEFAULT NOW(), 8 + expires_at TIMESTAMP NOT NULL, 9 + created_at TIMESTAMP NOT NULL DEFAULT NOW() 10 + ); 11 + 12 + CREATE INDEX idx_unfurl_cache_expires ON unfurl_cache(expires_at); 13 + 14 + COMMENT ON TABLE unfurl_cache IS 'Cache for oEmbed/URL unfurl results to reduce external API calls'; 15 + COMMENT ON COLUMN unfurl_cache.url IS 'The URL that was unfurled (primary key)'; 16 + COMMENT ON COLUMN unfurl_cache.provider IS 'Provider name (streamable, youtube, reddit, etc.)'; 17 + COMMENT ON COLUMN unfurl_cache.metadata IS 'Full unfurl result as JSON (title, description, type, etc.)'; 18 + COMMENT ON COLUMN unfurl_cache.thumbnail_url IS 'URL of the thumbnail image'; 19 + COMMENT ON COLUMN unfurl_cache.expires_at IS 'When this cache entry should be refetched (TTL-based)'; 20 + 21 + -- +goose Down 22 + DROP INDEX IF EXISTS idx_unfurl_cache_expires; 23 + DROP TABLE IF EXISTS unfurl_cache;