scripts/bench-search at main · zzstoatzz.io/leaflet-search

zzstoatzz.io / leaflet-search
fork atom
search for standard sites pub-search.waow.tech
search zig blog atproto
fork atom
leaflet-search / scripts / bench-search
at main 198 lines 5.5 kB view raw
wrap content
zzstoatzz.io fix: increase VM memory and keep machine running 7w ago
1f07b37f
  1#!/usr/bin/env -S uv run --script --quiet
  2# /// script
  3# requires-python = ">=3.12"
  4# dependencies = ["httpx", "rich"]
  5# ///
  6"""
  7benchmark search API permutations to find performance issues.
  8
  9Usage:
 10    ./scripts/bench-search              # run with defaults
 11    ./scripts/bench-search --runs 5     # more runs per permutation
 12    ./scripts/bench-search --local      # test local server
 13"""
 14
 15import asyncio
 16import statistics
 17import sys
 18import time
 19from dataclasses import dataclass
 20
 21import httpx
 22from rich.console import Console
 23
 24BASE_URL = "https://leaflet-search-backend.fly.dev"
 25
 26QUERIES = ["python", "atproto", "rust", "blog", ""]
 27TAGS = ["atproto", "bluesky", "rust", "Webworld", ""]
 28PLATFORMS = ["leaflet", "pckt", ""]
 29LIMITS = [10, 40, ""]
 30
 31
 32@dataclass
 33class Result:
 34    name: str
 35    params: dict
 36    times: list[float]
 37    count: int
 38    status: int
 39
 40    @property
 41    def avg(self) -> float:
 42        return statistics.mean(self.times) * 1000
 43
 44    @property
 45    def min(self) -> float:
 46        return min(self.times) * 1000
 47
 48    @property
 49    def max(self) -> float:
 50        return max(self.times) * 1000
 51
 52    @property
 53    def p50(self) -> float:
 54        return statistics.median(self.times) * 1000
 55
 56    @property
 57    def stdev(self) -> float:
 58        return statistics.stdev(self.times) * 1000 if len(self.times) > 1 else 0
 59
 60
 61async def bench_search(
 62    client: httpx.AsyncClient, params: dict, runs: int
 63) -> Result:
 64    """benchmark a single search permutation."""
 65    times = []
 66    count = 0
 67    status = 0
 68
 69    # filter empty params
 70    clean_params = {k: v for k, v in params.items() if v}
 71
 72    name = " + ".join(f"{k}={v}" for k, v in clean_params.items()) or "(empty)"
 73
 74    for _ in range(runs):
 75        start = time.perf_counter()
 76        try:
 77            resp = await client.get("/search", params=clean_params)
 78            elapsed = time.perf_counter() - start
 79            times.append(elapsed)
 80            status = resp.status_code
 81            if resp.status_code == 200:
 82                count = len(resp.json())
 83        except Exception as e:
 84            times.append(30.0)  # timeout
 85            status = 0
 86        # small delay between runs to avoid overwhelming server
 87        await asyncio.sleep(0.1)
 88
 89    return Result(name=name, params=clean_params, times=times, count=count, status=status)
 90
 91
 92async def run_benchmarks(runs: int, console: Console) -> list[Result]:
 93    """run all search permutations."""
 94    results = []
 95
 96    # build permutations - focus on meaningful combinations
 97    permutations = []
 98
 99    # query only
100    for q in QUERIES:
101        if q:
102            permutations.append({"q": q})
103
104    # tag only
105    for tag in TAGS:
106        if tag:
107            permutations.append({"tag": tag})
108
109    # query + tag
110    for q in ["python", "blog"]:
111        for tag in ["atproto", "rust"]:
112            permutations.append({"q": q, "tag": tag})
113
114    # platform filter
115    for platform in PLATFORMS:
116        if platform:
117            permutations.append({"q": "blog", "platform": platform})
118
119    # limit variations
120    for limit in LIMITS:
121        if limit:
122            permutations.append({"q": "python", "limit": limit})
123
124    # tag + platform
125    for tag in ["atproto", "bluesky"]:
126        permutations.append({"tag": tag, "platform": "leaflet"})
127
128    # empty (should return recent)
129    permutations.append({})
130
131    console.print(f"[dim]running {len(permutations)} permutations × {runs} runs each...[/dim]\n")
132
133    async with httpx.AsyncClient(base_url=BASE_URL, timeout=30) as client:
134        # warmup
135        await client.get("/health")
136
137        for i, params in enumerate(permutations):
138            result = await bench_search(client, params, runs)
139            results.append(result)
140            # progress dot
141            console.print(".", end="", style="dim")
142            if (i + 1) % 20 == 0:
143                console.print()
144
145    console.print("\n")
146    return results
147
148
149def print_results(results: list[Result], console: Console):
150    """print results as plain text."""
151    # sort by p50 descending to show slowest first
152    results.sort(key=lambda r: r.p50, reverse=True)
153
154    console.print("results (sorted by p50, slowest first):\n")
155    console.print(f"{'permutation':<40} {'p50':>8} {'avg':>8} {'min':>8} {'max':>8} {'count':>6}")
156    console.print("-" * 80)
157
158    for r in results:
159        p50_str = f"{r.p50:.0f}ms"
160        if r.p50 > 1000:
161            p50_str = f"[red bold]{p50_str}[/red bold]"
162        elif r.p50 > 500:
163            p50_str = f"[yellow]{p50_str}[/yellow]"
164        elif r.p50 < 200:
165            p50_str = f"[green]{p50_str}[/green]"
166
167        console.print(f"{r.name:<40} {p50_str:>8} {r.avg:>7.0f}ms {r.min:>7.0f}ms {r.max:>7.0f}ms {r.count:>6}")
168
169    # summary
170    console.print()
171    slow = [r for r in results if r.p50 > 500]
172    if slow:
173        console.print(f"[yellow]⚠ {len(slow)} slow (p50 > 500ms)[/yellow]")
174    else:
175        console.print("[green]✓ all under 500ms p50[/green]")
176
177
178async def main():
179    global BASE_URL
180
181    runs = 3
182    if "--runs" in sys.argv:
183        idx = sys.argv.index("--runs")
184        if idx + 1 < len(sys.argv):
185            runs = int(sys.argv[idx + 1])
186
187    if "--local" in sys.argv:
188        BASE_URL = "http://localhost:3000"
189
190    console = Console()
191    console.print(f"[bold]benchmarking {BASE_URL}[/bold]\n")
192
193    results = await run_benchmarks(runs, console)
194    print_results(results, console)
195
196
197if __name__ == "__main__":
198    asyncio.run(main())