src/cli.ts at main · danabra.mov/woodshed

danabra.mov / woodshed
fork atom
Create, run, rate, and iterate on your Claude Skills
claude-skills
fork atom
woodshed / src / cli.ts
at main 841 lines 24 kB view raw
wrap content
danabra.mov woodshed 1mo ago
d8ce6966
  1#!/usr/bin/env node
  2
  3import { readdir, mkdir, writeFile, stat } from "node:fs/promises";
  4import { join } from "node:path";
  5import { createInterface } from "node:readline";
  6import { confirm, checkbox, input } from "@inquirer/prompts";
  7import { initProject } from "./init.js";
  8import { runBench, discoverCombos, type BenchResult } from "./index.js";
  9import {
 10  computeProbabilityBest,
 11  runsToTargetProbability,
 12  verdictFromProbability,
 13  comparisonConfidence,
 14  bucketRunEstimate,
 15  probabilityUncertainty,
 16  leaderEffectSize,
 17  type GroupStats,
 18  type VerdictLevel,
 19  type ConfidenceLevel,
 20  type EffectMagnitude,
 21} from "./stats.js";
 22import { renderPanel, type PanelState } from "./panel.js";
 23import {
 24  NAME_PATTERN,
 25  SKILL_BASELINE,
 26  SKILL_EXPERIMENT,
 27  FIXTURE_PROMPT,
 28  FIXTURE_EVAL,
 29} from "./example.js";
 30
 31const DIM = "\x1B[2m";
 32const RESET = "\x1B[0m";
 33const CYAN = "\x1B[36m";
 34const GREEN = "\x1B[32m";
 35const YELLOW = "\x1B[33m";
 36const RED = "\x1B[31m";
 37
 38interface CliArgs {
 39  fixtures: string[];
 40  runs: number;
 41  cacheOnly: boolean;
 42  reset: boolean;
 43  reeval: boolean;
 44}
 45
 46type RunOutcome = number | "fail";
 47
 48interface RunOption {
 49  runs: number;
 50  label: string;
 51}
 52
 53interface VerdictResult {
 54  hasWinner: boolean;
 55  leaderName: string;
 56  options: RunOption[];
 57}
 58
 59async function parseArgs(args: string[]): Promise<CliArgs> {
 60  let runs = 10;
 61  let cacheOnly = false;
 62  let reset = false;
 63  let reeval = false;
 64  const fixtures: string[] = [];
 65
 66  for (let i = 0; i < args.length; i++) {
 67    const arg = args[i];
 68    if (arg === "--runs" && args[i + 1]) {
 69      runs = parseInt(args[i + 1]!, 10);
 70      i++;
 71    } else if (arg === "--cache-only") {
 72      cacheOnly = true;
 73    } else if (arg === "--reset") {
 74      reset = true;
 75    } else if (arg === "--reeval") {
 76      reeval = true;
 77    } else if (!arg?.startsWith("-")) {
 78      fixtures.push(arg!);
 79    }
 80  }
 81
 82  if (fixtures.length === 0) {
 83    let entries;
 84    try {
 85      entries = await readdir("./fixtures", { withFileTypes: true });
 86    } catch {
 87      console.error(`${DIM}No fixtures/ folder found.${RESET}`);
 88      console.error("");
 89      console.error(`${DIM}To create a new workspace:${RESET}`);
 90      console.error(`  ${CYAN}npx woodshed create${RESET} my-idea`);
 91      console.error(`  ${CYAN}cd${RESET} my-idea`);
 92      console.error(`  ${CYAN}npx woodshed${RESET}`);
 93      console.error("");
 94      process.exit(1);
 95    }
 96    for (const entry of entries) {
 97      if (entry.isDirectory() && !entry.name.startsWith("_")) {
 98        fixtures.push(entry.name);
 99      }
100    }
101  }
102
103  if (fixtures.length === 0) {
104    console.error("No fixtures found in ./fixtures");
105    process.exit(1);
106  }
107
108  return { fixtures, runs, cacheOnly, reset, reeval };
109}
110
111async function dirExists(path: string): Promise<boolean> {
112  try {
113    const s = await stat(path);
114    return s.isDirectory();
115  } catch {
116    return false;
117  }
118}
119
120async function getSubdirs(path: string): Promise<string[]> {
121  try {
122    const entries = await readdir(path, { withFileTypes: true });
123    return entries
124      .filter((e) => e.isDirectory() && !e.name.startsWith("_"))
125      .map((e) => e.name);
126  } catch {
127    return [];
128  }
129}
130
131async function getFiles(path: string): Promise<string[]> {
132  try {
133    const entries = await readdir(path, { withFileTypes: true });
134    return entries
135      .filter((e) => e.isFile() && !e.name.startsWith("."))
136      .map((e) => e.name);
137  } catch {
138    return [];
139  }
140}
141
142async function createSkill(skillName: string): Promise<void> {
143  await mkdir(join("./skills", skillName, "baseline"), { recursive: true });
144  await mkdir(join("./skills", skillName, "experiment"), { recursive: true });
145  await writeFile(
146    join("./skills", skillName, "baseline", "SKILL.md"),
147    SKILL_BASELINE(skillName),
148  );
149  await writeFile(
150    join("./skills", skillName, "experiment", "SKILL.md"),
151    SKILL_EXPERIMENT(skillName),
152  );
153}
154
155async function createFixture(name: string): Promise<boolean> {
156  const skills = await getSubdirs("./skills");
157  const assets = await getFiles("./assets");
158
159  let selectedSkills: string[] = [];
160  let selectedAssets: string[] = [];
161
162  const ADD_NEW = "__add_new__";
163  const skillChoices = [
164    ...skills.map((s) => ({ name: s, value: s })),
165    { name: `${DIM}Add new Skill...${RESET}`, value: ADD_NEW },
166  ];
167
168  const skillSelection = await checkbox({
169    message: "Include Skills:",
170    choices: skillChoices,
171    validate: (items) => items.length > 0 || "Choose at least one Skill",
172  });
173
174  if (skillSelection.includes(ADD_NEW)) {
175    const newSkillName = await input({
176      message: "New Skill name:",
177      validate: (v) => {
178        if (!v) return "Name is required";
179        if (!NAME_PATTERN.test(v)) {
180          return "Only lowercase letters, numbers, and hyphens (max 64 chars)";
181        }
182        if (skills.includes(v)) return "Skill already exists";
183        return true;
184      },
185    });
186    await createSkill(newSkillName);
187    console.log(`  ${DIM}Created skills/${newSkillName}/${RESET}`);
188    selectedSkills = [
189      ...skillSelection.filter((s) => s !== ADD_NEW),
190      newSkillName,
191    ];
192  } else {
193    selectedSkills = skillSelection;
194  }
195
196  // Ensure at least one actual skill (not just "Add new")
197  if (selectedSkills.length === 0) {
198    console.log(`${RED}At least one Skill is required.${RESET}`);
199    return false;
200  }
201
202  if (assets.length > 0) {
203    selectedAssets = await checkbox({
204      message: "Include assets:",
205      choices: assets.map((a) => ({ name: a, value: a })),
206    });
207  }
208
209  // Create fixture directory
210  const fixtureDir = join("./fixtures", name);
211  await mkdir(fixtureDir, { recursive: true });
212
213  await writeFile(
214    join(fixtureDir, "prompt.md"),
215    FIXTURE_PROMPT(selectedSkills, selectedAssets),
216  );
217  await writeFile(join(fixtureDir, "eval.md"), FIXTURE_EVAL);
218
219  console.log("");
220  console.log(`Created ${CYAN}fixtures/${name}/${RESET}`);
221  console.log(`  ${DIM}prompt.md${RESET} - edit this to define the task`);
222  console.log(`  ${DIM}eval.md${RESET} - edit this to define scoring`);
223  console.log("");
224
225  return true;
226}
227
228async function validateFixtures(fixtures: string[]): Promise<string[]> {
229  const hasFixturesDir = await dirExists("./fixtures");
230
231  if (!hasFixturesDir) {
232    // No fixtures/ at all - suggest create command
233    const name = fixtures[0] || "my-idea";
234    console.error(`${DIM}No fixtures/ folder found.${RESET}`);
235    console.error("");
236    console.error(`${DIM}Did you mean to create a new project?${RESET}`);
237    console.error(`  ${CYAN}npx woodshed create ${name}${RESET}`);
238    console.error("");
239    process.exit(1);
240  }
241
242  const validFixtures: string[] = [];
243
244  for (const fixture of fixtures) {
245    const fixtureDir = join("./fixtures", fixture);
246    const exists = await dirExists(fixtureDir);
247
248    if (!exists) {
249      if (!NAME_PATTERN.test(fixture)) {
250        console.error(
251          `${RED}Invalid fixture name '${fixture}'. Use only lowercase letters, numbers, and hyphens (max 64 chars).${RESET}`,
252        );
253        continue;
254      }
255
256      console.log(`${DIM}Fixture '${fixture}' doesn't exist.${RESET}`);
257      const shouldCreate = await confirm({
258        message: `Create fixture '${fixture}'?`,
259        default: true,
260      });
261
262      if (shouldCreate) {
263        await createFixture(fixture);
264        // Don't add to validFixtures - user needs to edit prompt.md first
265        console.log(
266          `${DIM}Edit the prompt.md and eval.md files, then run again.${RESET}`,
267        );
268      }
269    } else {
270      validFixtures.push(fixture);
271    }
272  }
273
274  return validFixtures;
275}
276
277// Shared computation for both live panel and final verdict
278interface ComboRow {
279  name: string;
280  scores: number[];
281  mean: number;
282  std: number;
283  probability: number; // Probability this combo is best
284  verdict: VerdictLevel;
285  runsToConfidence: number; // Runs needed for 95% confidence
286}
287
288interface ComputedStats {
289  rows: ComboRow[];
290  leader: ComboRow | null;
291  verdict: VerdictLevel;
292  runOptions: RunOption[];
293  confidence: ConfidenceLevel;
294}
295
296function computeStats(
297  combos: { name: string; scores: number[]; mean: number; std: number }[],
298): ComputedStats {
299  if (combos.length === 0) {
300    return {
301      rows: [],
302      leader: null,
303      verdict: "insufficient",
304      runOptions: [],
305      confidence: "low",
306    };
307  }
308
309  // Convert to GroupStats
310  const groupStats: GroupStats[] = combos.map((c) => ({
311    n: c.scores.length,
312    mean: c.mean,
313    std: c.std,
314    scores: c.scores,
315  }));
316
317  // Compute probability each combo is best
318  const probabilities = computeProbabilityBest(groupStats);
319
320  // Sort by probability (descending)
321  const indexed = combos.map((c, i) => ({
322    combo: c,
323    prob: probabilities[i]!,
324    index: i,
325  }));
326  indexed.sort((a, b) => b.prob - a.prob);
327
328  const leaderIndex = indexed[0]?.index ?? 0;
329  const leaderStats = groupStats[leaderIndex]!;
330  const otherStats = groupStats.filter((_, i) => i !== leaderIndex);
331  const leaderRunsToConfidence = runsToTargetProbability(
332    leaderStats,
333    otherStats,
334    0.95,
335  );
336
337  const rows: ComboRow[] = indexed.map(({ combo, prob, index }) => {
338    const n = combo.scores.length;
339    const verdict = verdictFromProbability(prob, n);
340
341    // Only show runs-to-confidence for the leader
342    const runsToConf = index === leaderIndex ? leaderRunsToConfidence : 0;
343
344    return {
345      ...combo,
346      probability: prob,
347      verdict,
348      runsToConfidence: runsToConf,
349    };
350  });
351
352  // Determine overall verdict from leader
353  const leaderProb = probabilities[leaderIndex]!;
354  const minN = Math.min(...combos.map((c) => c.scores.length));
355  const overallVerdict = verdictFromProbability(leaderProb, minN);
356
357  // Build run options from runs-to-confidence (bucketed to avoid false precision)
358  const runOptions: RunOption[] = [];
359  const bucketed = bucketRunEstimate(leaderRunsToConfidence);
360  if (bucketed > 0 && isFinite(bucketed)) {
361    runOptions.push({ runs: bucketed, label: "" });
362  }
363
364  // Compute confidence level
365  const confidence = comparisonConfidence(groupStats);
366
367  return {
368    rows,
369    leader: rows[0] || null,
370    verdict: overallVerdict,
371    runOptions,
372    confidence,
373  };
374}
375
376/** Compute verdict without printing (live panel already shows results) */
377function computeVerdict(result: BenchResult): VerdictResult {
378  const combos = Object.entries(result.combos).map(([name, r]) => ({
379    name,
380    scores: r.scores,
381    mean: r.mean,
382    std: r.std,
383  }));
384
385  if (combos.length === 0) {
386    return { hasWinner: true, leaderName: "", options: [] };
387  }
388
389  const stats = computeStats(combos);
390  return {
391    hasWinner: stats.verdict === "winner",
392    leaderName: stats.leader?.name || "",
393    options: stats.runOptions,
394  };
395}
396
397async function prompt(question: string): Promise<string> {
398  const rl = createInterface({ input: process.stdin, output: process.stdout });
399  return new Promise((resolve) => {
400    rl.question(question, (answer) => {
401      rl.close();
402      resolve(answer.trim().toLowerCase());
403    });
404  });
405}
406
407async function selectMenu(
408  options: { label: string; value: number | null }[],
409): Promise<number | null> {
410  // Fallback for non-TTY (piped input)
411  if (!process.stdin.isTTY) {
412    for (let i = 0; i < options.length; i++) {
413      console.log(`  [${i + 1}] ${options[i]!.label}`);
414    }
415    const rl = createInterface({
416      input: process.stdin,
417      output: process.stdout,
418    });
419    const answer = await new Promise<string>((resolve) => {
420      rl.question("> ", (a) => {
421        rl.close();
422        resolve(a.trim());
423      });
424    });
425    if (answer === "n" || answer === "") return null;
426    const idx = parseInt(answer, 10) - 1;
427    return idx >= 0 && idx < options.length ? options[idx]!.value : null;
428  }
429
430  let selected = 0;
431
432  const render = () => {
433    process.stdout.write(`\x1B[${options.length}A`);
434    for (let i = 0; i < options.length; i++) {
435      const prefix = i === selected ? `${CYAN}▸${RESET}` : " ";
436      const label =
437        i === selected
438          ? `${CYAN}${options[i]!.label}${RESET}`
439          : options[i]!.label;
440      process.stdout.write(`\x1B[2K${prefix} ${label}\n`);
441    }
442  };
443
444  // Initial render
445  for (let i = 0; i < options.length; i++) {
446    const prefix = i === 0 ? `${CYAN}▸${RESET}` : " ";
447    const label =
448      i === 0 ? `${CYAN}${options[i]!.label}${RESET}` : options[i]!.label;
449    console.log(`${prefix} ${label}`);
450  }
451
452  return new Promise((resolve) => {
453    process.stdin.setRawMode(true);
454    process.stdin.resume();
455
456    const onKey = (key: Buffer) => {
457      const str = key.toString();
458
459      if (str === "\x1B[A") {
460        // Up arrow
461        selected = (selected - 1 + options.length) % options.length;
462        render();
463      } else if (str === "\x1B[B") {
464        // Down arrow
465        selected = (selected + 1) % options.length;
466        render();
467      } else if (str === "\r" || str === "\n") {
468        // Enter
469        process.stdin.setRawMode(false);
470        process.stdin.pause();
471        process.stdin.removeListener("data", onKey);
472        resolve(options[selected]!.value);
473      } else if (str === "\x03") {
474        // Ctrl+C
475        process.stdin.setRawMode(false);
476        process.exit(0);
477      } else if (str === "q" || str === "n") {
478        // q or n to quit
479        process.stdin.setRawMode(false);
480        process.stdin.pause();
481        process.stdin.removeListener("data", onKey);
482        resolve(null);
483      }
484    };
485
486    process.stdin.on("data", onKey);
487  });
488}
489
490async function runFixture(
491  fixture: string,
492  runs: number,
493  cacheOnly: boolean,
494  reset: boolean = false,
495  reeval: boolean = false,
496): Promise<BenchResult> {
497  const discovery = await discoverCombos(fixture, "./fixtures", "./skills");
498
499  if (cacheOnly) {
500    return await runBench(fixture, {
501      fixturesDir: "./fixtures",
502      skillsDir: "./skills",
503      assetsDir: "./assets",
504      resultsDir: "./results",
505      runs,
506      cacheOnly: true,
507      reset,
508      reeval,
509      discovery,
510    });
511  }
512
513  const panelState: PanelState = {
514    combos: discovery.combos.map((c) => ({
515      name: c.name,
516      outcomes: [],
517      target: runs,
518    })),
519    activeLogs: new Map(),
520  };
521
522  const panelController = renderPanel(fixture, panelState);
523
524  try {
525    const result = await runBench(fixture, {
526      fixturesDir: "./fixtures",
527      skillsDir: "./skills",
528      assetsDir: "./assets",
529      resultsDir: "./results",
530      runs,
531      reset,
532      reeval,
533      discovery,
534      onRunStart: (comboIndex, runNumber) => {
535        const combo = discovery.combos[comboIndex]!;
536        const key = `${comboIndex}-${runNumber}`;
537        panelState.activeLogs.set(key, {
538          comboIndex,
539          comboName: combo.name,
540          runNumber,
541          target: runs,
542          lines: [],
543          complete: false,
544          startTime: Date.now(),
545        });
546        panelController.update({
547          ...panelState,
548          activeLogs: new Map(panelState.activeLogs),
549        });
550      },
551      onStreamLine: (comboIndex, line) => {
552        const outputLines = extractVerboseOutput(line);
553        // Find active log for this combo
554        const log = Array.from(panelState.activeLogs.values()).find(
555          (l) => l.comboIndex === comboIndex,
556        );
557        if (log && outputLines.length > 0) {
558          for (const text of outputLines) {
559            log.lines.push(text);
560          }
561          // Keep last N lines per panel
562          const maxLines =
563            Math.floor(
564              (process.stdout.rows || 40) /
565                Math.max(1, panelState.activeLogs.size),
566            ) - 6;
567          if (log.lines.length > Math.max(5, maxLines)) {
568            log.lines = log.lines.slice(-Math.max(5, maxLines));
569          }
570          panelController.update({
571            ...panelState,
572            activeLogs: new Map(panelState.activeLogs),
573          });
574        }
575      },
576      onRunComplete: (comboIndex, runNumber) => {
577        const key = `${comboIndex}-${runNumber}`;
578        const log = panelState.activeLogs.get(key);
579        const elapsed = log ? Date.now() - log.startTime : 0;
580        const linger = elapsed > 1000 ? Math.min(elapsed * 0.25, 2500) : 0;
581        setTimeout(() => {
582          panelState.activeLogs.delete(key);
583          panelController.update({
584            ...panelState,
585            activeLogs: new Map(panelState.activeLogs),
586          });
587        }, linger);
588      },
589      onProgress: (_fixture, combos) => {
590        for (let i = 0; i < discovery.combos.length; i++) {
591          const combo = discovery.combos[i]!;
592          const data = combos[combo.name];
593          if (data) {
594            const outcomes: (number | "fail")[] = data.results.map((r) =>
595              r.status === "ok" && r.score !== undefined ? r.score : "fail",
596            );
597            panelState.combos[i]!.outcomes = outcomes;
598
599            // Add score to log panel
600            const log = Array.from(panelState.activeLogs.values()).find(
601              (l) => l.comboIndex === i && l.runNumber === data.results.length,
602            );
603            const lastResult = data.results[data.results.length - 1];
604            if (log && lastResult) {
605              const scoreText =
606                lastResult.status === "ok" && lastResult.score !== undefined
607                  ? `Score: ${lastResult.score}`
608                  : "Failed";
609              const reasonText =
610                lastResult.status === "ok" && lastResult.reason
611                  ? lastResult.reason
612                  : lastResult.status === "fail" && lastResult.error
613                    ? lastResult.error
614                    : "";
615              log.lines.push("");
616              log.lines.push(`\x1B[36m${scoreText}\x1B[0m`);
617              if (reasonText) {
618                log.lines.push(reasonText);
619              }
620              log.complete = true;
621            }
622          }
623        }
624        panelController.update({
625          ...panelState,
626          activeLogs: new Map(panelState.activeLogs),
627        });
628      },
629    });
630
631    panelState.activeLogs.clear();
632    panelController.update({
633      ...panelState,
634      activeLogs: new Map(panelState.activeLogs),
635    });
636
637    return result;
638  } finally {
639    // Linger for a moment so user can see final scores
640    await new Promise((resolve) => setTimeout(resolve, 2000));
641    panelController.unmount();
642  }
643}
644
645// Extract verbose output from a stream JSON line
646export function extractVerboseOutput(jsonLine: string): string[] {
647  const lines: string[] = [];
648  try {
649    const event = JSON.parse(jsonLine);
650
651    // Assistant message with content
652    if (event.type === "assistant" && event.message?.content) {
653      for (const content of event.message.content) {
654        if (content.type === "text" && content.text) {
655          // Include all text lines
656          const textLines = content.text.trim().split("\n");
657          for (const line of textLines) {
658            if (line.trim()) {
659              lines.push(line);
660            }
661          }
662        } else if (content.type === "tool_use") {
663          // Show tool being called - clean format
664          const toolName = content.name || "unknown";
665          let detail = "";
666          if (content.input) {
667            if (toolName === "Bash" && content.input.command) {
668              detail = content.input.command;
669            } else if (toolName === "Read" && content.input.file_path) {
670              detail = content.input.file_path;
671            } else if (toolName === "Write" && content.input.file_path) {
672              detail = content.input.file_path;
673            } else if (toolName === "Edit" && content.input.file_path) {
674              detail = content.input.file_path;
675            } else if (toolName === "Glob" && content.input.pattern) {
676              detail = content.input.pattern;
677            } else if (toolName === "Grep" && content.input.pattern) {
678              detail = content.input.pattern;
679            } else if (toolName === "Task" && content.input.description) {
680              detail = content.input.description;
681            } else if (toolName === "Skill" && content.input.skill) {
682              detail = content.input.skill;
683            }
684          }
685          lines.push(detail ? `${toolName}: ${detail}` : toolName);
686        }
687      }
688    }
689
690    // Tool results
691    if (event.type === "user" && event.message?.content) {
692      for (const content of event.message.content) {
693        if (content.type === "tool_result") {
694          const resultText =
695            typeof content.content === "string"
696              ? content.content
697              : Array.isArray(content.content)
698                ? content.content
699                    .map((c: { text?: string }) => c.text || "")
700                    .join("")
701                : "";
702
703          if (resultText) {
704            // Show each line of the result
705            const resultLines = resultText.trim().split("\n");
706            for (const line of resultLines) {
707              if (line.trim()) {
708                lines.push(`  ${line}`);
709              }
710            }
711          }
712        }
713      }
714    }
715  } catch {
716    // Not valid JSON, ignore
717  }
718  return lines;
719}
720
721function printHelp(): void {
722  console.log(`
723shed - Minimal runner and eval framework for Claude Skills.
724
725Usage:
726  shed [options] [fixture...]     Run benchmarks
727  shed create <folder>            Create a new project
728  shed help                       Show this help
729
730Options:
731  --runs <n>        Number of runs per combo (default: 10)
732  --cache-only      Only show cached results, don't run new tests
733  --reset           Archive existing results and start fresh
734  --reeval          Re-evaluate existing workspaces with current eval.md
735
736Examples:
737  shed                            Run all fixtures
738  shed my-fixture                 Run specific fixture
739  shed --runs 20 my-fixture       Run with 20 iterations
740  shed create my-project          Create new project folder
741`);
742}
743
744async function main() {
745  const rawArgs = process.argv.slice(2);
746
747  // Handle commands
748  if (rawArgs[0] === "help" || rawArgs[0] === "--help" || rawArgs[0] === "-h") {
749    printHelp();
750    return;
751  }
752
753  if (rawArgs[0] === "create") {
754    const targetDir = rawArgs[1];
755    if (!targetDir) {
756      console.error("Usage: shed create <folder>");
757      process.exit(1);
758    }
759    await initProject(targetDir);
760    return;
761  }
762
763  const args = await parseArgs(rawArgs);
764
765  // Check if user specified explicit fixtures (vs auto-discovery)
766  const runsIndex = rawArgs.indexOf("--runs");
767  const explicitFixtures = rawArgs.filter(
768    (a) =>
769      !a.startsWith("-") && (runsIndex === -1 || a !== rawArgs[runsIndex + 1]),
770  );
771  if (explicitFixtures.length > 0) {
772    const validFixtures = await validateFixtures(args.fixtures);
773    if (validFixtures.length === 0) {
774      return; // User created fixtures but needs to edit them
775    }
776    args.fixtures = validFixtures;
777  }
778
779  for (let i = 0; i < args.fixtures.length; i++) {
780    const fixture = args.fixtures[i]!;
781    let runs = args.runs;
782    let reset = args.reset;
783
784    if (args.fixtures.length > 1) {
785      console.log("");
786      console.log(`${DIM}#${RESET} ${fixture}`);
787    }
788
789    while (true) {
790      const result = await runFixture(
791        fixture,
792        runs,
793        args.cacheOnly,
794        reset,
795        args.reeval,
796      );
797      reset = false; // Only reset on first iteration
798      const verdict = computeVerdict(result);
799
800      if (verdict.hasWinner) {
801        console.log("");
802        break;
803      }
804
805      if (verdict.options.length === 0) {
806        console.log(
807          `${DIM}  Pick any — differences are negligible or would need impractical sample sizes.${RESET}`,
808        );
809        console.log("");
810        break;
811      }
812
813      // Build menu options
814      console.log("");
815      const menuOptions: { label: string; value: number | null }[] =
816        verdict.options.map((opt) => ({
817          label: `+${opt.runs} runs`,
818          value: opt.runs,
819        }));
820      menuOptions.push({ label: `${DIM}stop${RESET}`, value: null });
821
822      const choice = await selectMenu(menuOptions);
823
824      if (choice === null) {
825        console.log("");
826        break;
827      }
828
829      runs += choice;
830      args.cacheOnly = false;
831    }
832  }
833}
834
835main().catch((err) => {
836  if (err?.name === "ExitPromptError") {
837    process.exit(0);
838  }
839  console.error(err);
840  process.exit(1);
841});