audio streaming app plyr.fm
at main 229 lines 7.1 kB view raw
1#!/usr/bin/env -S uv run --script --quiet 2"""backfill thumbnails for existing track/album/playlist images. 3 4## Context 5 6Track artwork and avatars display at 48px but full-resolution images are 7served. This generates 96x96 WebP thumbnails (2x retina) and stores them 8alongside the originals in R2. 9 10## Usage 11 12```bash 13# dry run (show what would be thumbnailed) 14uv run scripts/backfill_thumbnails.py --dry-run 15 16# generate first 5 thumbnails 17uv run scripts/backfill_thumbnails.py --limit 5 18 19# full backfill with custom concurrency 20uv run scripts/backfill_thumbnails.py --concurrency 20 21``` 22""" 23 24import argparse 25import asyncio 26import logging 27import time 28 29import httpx 30from sqlalchemy import select, update 31 32from backend._internal.thumbnails import generate_thumbnail 33from backend.models import Album, Track 34from backend.models.playlist import Playlist 35from backend.storage import storage 36from backend.utilities.database import db_session 37 38logging.basicConfig( 39 level=logging.INFO, 40 format="%(asctime)s - %(levelname)s - %(message)s", 41) 42logger = logging.getLogger(__name__) 43 44 45async def _process_one( 46 row: dict, 47 http: httpx.AsyncClient, 48 sem: asyncio.Semaphore, 49 counter: dict[str, int], 50 total: int, 51) -> None: 52 """download original image, generate thumbnail, upload and update DB.""" 53 async with sem: 54 idx = counter["started"] + 1 55 counter["started"] += 1 56 57 try: 58 logger.info( 59 "thumbnailing [%d/%d] %s %s: %s", 60 idx, 61 total, 62 row["table"], 63 row["id"], 64 row["image_url"], 65 ) 66 67 resp = await http.get(row["image_url"]) 68 resp.raise_for_status() 69 70 thumb_data = generate_thumbnail(resp.content) 71 thumbnail_url = await storage.save_thumbnail(thumb_data, row["image_id"]) 72 73 # update DB row 74 async with db_session() as db: 75 await db.execute( 76 update(row["model"]) 77 .where(row["model"].id == row["id"]) 78 .values(thumbnail_url=thumbnail_url) 79 ) 80 await db.commit() 81 82 counter["generated"] += 1 83 logger.info( 84 "generated thumbnail for %s %s (%d bytes)", 85 row["table"], 86 row["id"], 87 len(thumb_data), 88 ) 89 90 except Exception: 91 logger.exception("failed to thumbnail %s %s", row["table"], row["id"]) 92 counter["failed"] += 1 93 94 95async def backfill_thumbnails( 96 dry_run: bool = False, 97 limit: int | None = None, 98 concurrency: int = 10, 99) -> None: 100 """backfill thumbnails for images missing thumbnail_url.""" 101 102 rows: list[dict] = [] 103 104 async with db_session() as db: 105 # tracks with images but no thumbnail 106 stmt = ( 107 select(Track) 108 .where(Track.image_id.isnot(None), Track.thumbnail_url.is_(None)) 109 .order_by(Track.id) 110 ) 111 if limit: 112 stmt = stmt.limit(limit) 113 result = await db.execute(stmt) 114 for track in result.scalars(): 115 if track.image_url: 116 rows.append( 117 { 118 "table": "track", 119 "id": track.id, 120 "image_id": track.image_id, 121 "image_url": track.image_url, 122 "model": Track, 123 } 124 ) 125 126 # albums with images but no thumbnail 127 remaining = (limit - len(rows)) if limit else None 128 if remaining is None or remaining > 0: 129 stmt = ( 130 select(Album) 131 .where(Album.image_id.isnot(None), Album.thumbnail_url.is_(None)) 132 .order_by(Album.id) 133 ) 134 if remaining: 135 stmt = stmt.limit(remaining) 136 result = await db.execute(stmt) 137 for album in result.scalars(): 138 if album.image_url: 139 rows.append( 140 { 141 "table": "album", 142 "id": album.id, 143 "image_id": album.image_id, 144 "image_url": album.image_url, 145 "model": Album, 146 } 147 ) 148 149 # playlists with images but no thumbnail 150 remaining = (limit - len(rows)) if limit else None 151 if remaining is None or remaining > 0: 152 stmt = ( 153 select(Playlist) 154 .where(Playlist.image_id.isnot(None), Playlist.thumbnail_url.is_(None)) 155 .order_by(Playlist.id) 156 ) 157 if remaining: 158 stmt = stmt.limit(remaining) 159 result = await db.execute(stmt) 160 for playlist in result.scalars(): 161 if playlist.image_url: 162 rows.append( 163 { 164 "table": "playlist", 165 "id": playlist.id, 166 "image_id": playlist.image_id, 167 "image_url": playlist.image_url, 168 "model": Playlist, 169 } 170 ) 171 172 if not rows: 173 logger.info("no images found needing thumbnails") 174 return 175 176 logger.info("found %d images to thumbnail (concurrency=%d)", len(rows), concurrency) 177 178 if dry_run: 179 for row in rows: 180 logger.info( 181 "would thumbnail: %s %s (image_id=%s)", 182 row["table"], 183 row["id"], 184 row["image_id"], 185 ) 186 return 187 188 sem = asyncio.Semaphore(concurrency) 189 counter: dict[str, int] = {"started": 0, "generated": 0, "failed": 0} 190 t0 = time.monotonic() 191 192 async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as http: 193 tasks = [_process_one(row, http, sem, counter, len(rows)) for row in rows] 194 await asyncio.gather(*tasks) 195 196 elapsed = time.monotonic() - t0 197 logger.info( 198 "backfill complete: %d generated, %d failed, %d total in %.0fs (%.1f/s)", 199 counter["generated"], 200 counter["failed"], 201 len(rows), 202 elapsed, 203 len(rows) / elapsed if elapsed > 0 else 0, 204 ) 205 206 207async def main() -> None: 208 parser = argparse.ArgumentParser(description="backfill image thumbnails") 209 parser.add_argument( 210 "--dry-run", action="store_true", help="show what would be done" 211 ) 212 parser.add_argument("--limit", type=int, default=None, help="max images to process") 213 parser.add_argument( 214 "--concurrency", type=int, default=10, help="concurrent workers" 215 ) 216 args = parser.parse_args() 217 218 if args.dry_run: 219 logger.info("running in DRY RUN mode — no uploads will be made") 220 221 await backfill_thumbnails( 222 dry_run=args.dry_run, 223 limit=args.limit, 224 concurrency=args.concurrency, 225 ) 226 227 228if __name__ == "__main__": 229 asyncio.run(main())