audio streaming app
plyr.fm
1#!/usr/bin/env -S uv run --script --quiet
2"""backfill thumbnails for existing track/album/playlist images.
3
4## Context
5
6Track artwork and avatars display at 48px but full-resolution images are
7served. This generates 96x96 WebP thumbnails (2x retina) and stores them
8alongside the originals in R2.
9
10## Usage
11
12```bash
13# dry run (show what would be thumbnailed)
14uv run scripts/backfill_thumbnails.py --dry-run
15
16# generate first 5 thumbnails
17uv run scripts/backfill_thumbnails.py --limit 5
18
19# full backfill with custom concurrency
20uv run scripts/backfill_thumbnails.py --concurrency 20
21```
22"""
23
24import argparse
25import asyncio
26import logging
27import time
28
29import httpx
30from sqlalchemy import select, update
31
32from backend._internal.thumbnails import generate_thumbnail
33from backend.models import Album, Track
34from backend.models.playlist import Playlist
35from backend.storage import storage
36from backend.utilities.database import db_session
37
38logging.basicConfig(
39 level=logging.INFO,
40 format="%(asctime)s - %(levelname)s - %(message)s",
41)
42logger = logging.getLogger(__name__)
43
44
45async def _process_one(
46 row: dict,
47 http: httpx.AsyncClient,
48 sem: asyncio.Semaphore,
49 counter: dict[str, int],
50 total: int,
51) -> None:
52 """download original image, generate thumbnail, upload and update DB."""
53 async with sem:
54 idx = counter["started"] + 1
55 counter["started"] += 1
56
57 try:
58 logger.info(
59 "thumbnailing [%d/%d] %s %s: %s",
60 idx,
61 total,
62 row["table"],
63 row["id"],
64 row["image_url"],
65 )
66
67 resp = await http.get(row["image_url"])
68 resp.raise_for_status()
69
70 thumb_data = generate_thumbnail(resp.content)
71 thumbnail_url = await storage.save_thumbnail(thumb_data, row["image_id"])
72
73 # update DB row
74 async with db_session() as db:
75 await db.execute(
76 update(row["model"])
77 .where(row["model"].id == row["id"])
78 .values(thumbnail_url=thumbnail_url)
79 )
80 await db.commit()
81
82 counter["generated"] += 1
83 logger.info(
84 "generated thumbnail for %s %s (%d bytes)",
85 row["table"],
86 row["id"],
87 len(thumb_data),
88 )
89
90 except Exception:
91 logger.exception("failed to thumbnail %s %s", row["table"], row["id"])
92 counter["failed"] += 1
93
94
95async def backfill_thumbnails(
96 dry_run: bool = False,
97 limit: int | None = None,
98 concurrency: int = 10,
99) -> None:
100 """backfill thumbnails for images missing thumbnail_url."""
101
102 rows: list[dict] = []
103
104 async with db_session() as db:
105 # tracks with images but no thumbnail
106 stmt = (
107 select(Track)
108 .where(Track.image_id.isnot(None), Track.thumbnail_url.is_(None))
109 .order_by(Track.id)
110 )
111 if limit:
112 stmt = stmt.limit(limit)
113 result = await db.execute(stmt)
114 for track in result.scalars():
115 if track.image_url:
116 rows.append(
117 {
118 "table": "track",
119 "id": track.id,
120 "image_id": track.image_id,
121 "image_url": track.image_url,
122 "model": Track,
123 }
124 )
125
126 # albums with images but no thumbnail
127 remaining = (limit - len(rows)) if limit else None
128 if remaining is None or remaining > 0:
129 stmt = (
130 select(Album)
131 .where(Album.image_id.isnot(None), Album.thumbnail_url.is_(None))
132 .order_by(Album.id)
133 )
134 if remaining:
135 stmt = stmt.limit(remaining)
136 result = await db.execute(stmt)
137 for album in result.scalars():
138 if album.image_url:
139 rows.append(
140 {
141 "table": "album",
142 "id": album.id,
143 "image_id": album.image_id,
144 "image_url": album.image_url,
145 "model": Album,
146 }
147 )
148
149 # playlists with images but no thumbnail
150 remaining = (limit - len(rows)) if limit else None
151 if remaining is None or remaining > 0:
152 stmt = (
153 select(Playlist)
154 .where(Playlist.image_id.isnot(None), Playlist.thumbnail_url.is_(None))
155 .order_by(Playlist.id)
156 )
157 if remaining:
158 stmt = stmt.limit(remaining)
159 result = await db.execute(stmt)
160 for playlist in result.scalars():
161 if playlist.image_url:
162 rows.append(
163 {
164 "table": "playlist",
165 "id": playlist.id,
166 "image_id": playlist.image_id,
167 "image_url": playlist.image_url,
168 "model": Playlist,
169 }
170 )
171
172 if not rows:
173 logger.info("no images found needing thumbnails")
174 return
175
176 logger.info("found %d images to thumbnail (concurrency=%d)", len(rows), concurrency)
177
178 if dry_run:
179 for row in rows:
180 logger.info(
181 "would thumbnail: %s %s (image_id=%s)",
182 row["table"],
183 row["id"],
184 row["image_id"],
185 )
186 return
187
188 sem = asyncio.Semaphore(concurrency)
189 counter: dict[str, int] = {"started": 0, "generated": 0, "failed": 0}
190 t0 = time.monotonic()
191
192 async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as http:
193 tasks = [_process_one(row, http, sem, counter, len(rows)) for row in rows]
194 await asyncio.gather(*tasks)
195
196 elapsed = time.monotonic() - t0
197 logger.info(
198 "backfill complete: %d generated, %d failed, %d total in %.0fs (%.1f/s)",
199 counter["generated"],
200 counter["failed"],
201 len(rows),
202 elapsed,
203 len(rows) / elapsed if elapsed > 0 else 0,
204 )
205
206
207async def main() -> None:
208 parser = argparse.ArgumentParser(description="backfill image thumbnails")
209 parser.add_argument(
210 "--dry-run", action="store_true", help="show what would be done"
211 )
212 parser.add_argument("--limit", type=int, default=None, help="max images to process")
213 parser.add_argument(
214 "--concurrency", type=int, default=10, help="concurrent workers"
215 )
216 args = parser.parse_args()
217
218 if args.dry_run:
219 logger.info("running in DRY RUN mode — no uploads will be made")
220
221 await backfill_thumbnails(
222 dry_run=args.dry_run,
223 limit=args.limit,
224 concurrency=args.concurrency,
225 )
226
227
228if __name__ == "__main__":
229 asyncio.run(main())