Create, run, rate, and iterate on your Claude Skills
claude-skills
at main 841 lines 24 kB view raw
1#!/usr/bin/env node 2 3import { readdir, mkdir, writeFile, stat } from "node:fs/promises"; 4import { join } from "node:path"; 5import { createInterface } from "node:readline"; 6import { confirm, checkbox, input } from "@inquirer/prompts"; 7import { initProject } from "./init.js"; 8import { runBench, discoverCombos, type BenchResult } from "./index.js"; 9import { 10 computeProbabilityBest, 11 runsToTargetProbability, 12 verdictFromProbability, 13 comparisonConfidence, 14 bucketRunEstimate, 15 probabilityUncertainty, 16 leaderEffectSize, 17 type GroupStats, 18 type VerdictLevel, 19 type ConfidenceLevel, 20 type EffectMagnitude, 21} from "./stats.js"; 22import { renderPanel, type PanelState } from "./panel.js"; 23import { 24 NAME_PATTERN, 25 SKILL_BASELINE, 26 SKILL_EXPERIMENT, 27 FIXTURE_PROMPT, 28 FIXTURE_EVAL, 29} from "./example.js"; 30 31const DIM = "\x1B[2m"; 32const RESET = "\x1B[0m"; 33const CYAN = "\x1B[36m"; 34const GREEN = "\x1B[32m"; 35const YELLOW = "\x1B[33m"; 36const RED = "\x1B[31m"; 37 38interface CliArgs { 39 fixtures: string[]; 40 runs: number; 41 cacheOnly: boolean; 42 reset: boolean; 43 reeval: boolean; 44} 45 46type RunOutcome = number | "fail"; 47 48interface RunOption { 49 runs: number; 50 label: string; 51} 52 53interface VerdictResult { 54 hasWinner: boolean; 55 leaderName: string; 56 options: RunOption[]; 57} 58 59async function parseArgs(args: string[]): Promise<CliArgs> { 60 let runs = 10; 61 let cacheOnly = false; 62 let reset = false; 63 let reeval = false; 64 const fixtures: string[] = []; 65 66 for (let i = 0; i < args.length; i++) { 67 const arg = args[i]; 68 if (arg === "--runs" && args[i + 1]) { 69 runs = parseInt(args[i + 1]!, 10); 70 i++; 71 } else if (arg === "--cache-only") { 72 cacheOnly = true; 73 } else if (arg === "--reset") { 74 reset = true; 75 } else if (arg === "--reeval") { 76 reeval = true; 77 } else if (!arg?.startsWith("-")) { 78 fixtures.push(arg!); 79 } 80 } 81 82 if (fixtures.length === 0) { 83 let entries; 84 try { 85 entries = await readdir("./fixtures", { withFileTypes: true }); 86 } catch { 87 console.error(`${DIM}No fixtures/ folder found.${RESET}`); 88 console.error(""); 89 console.error(`${DIM}To create a new workspace:${RESET}`); 90 console.error(` ${CYAN}npx woodshed create${RESET} my-idea`); 91 console.error(` ${CYAN}cd${RESET} my-idea`); 92 console.error(` ${CYAN}npx woodshed${RESET}`); 93 console.error(""); 94 process.exit(1); 95 } 96 for (const entry of entries) { 97 if (entry.isDirectory() && !entry.name.startsWith("_")) { 98 fixtures.push(entry.name); 99 } 100 } 101 } 102 103 if (fixtures.length === 0) { 104 console.error("No fixtures found in ./fixtures"); 105 process.exit(1); 106 } 107 108 return { fixtures, runs, cacheOnly, reset, reeval }; 109} 110 111async function dirExists(path: string): Promise<boolean> { 112 try { 113 const s = await stat(path); 114 return s.isDirectory(); 115 } catch { 116 return false; 117 } 118} 119 120async function getSubdirs(path: string): Promise<string[]> { 121 try { 122 const entries = await readdir(path, { withFileTypes: true }); 123 return entries 124 .filter((e) => e.isDirectory() && !e.name.startsWith("_")) 125 .map((e) => e.name); 126 } catch { 127 return []; 128 } 129} 130 131async function getFiles(path: string): Promise<string[]> { 132 try { 133 const entries = await readdir(path, { withFileTypes: true }); 134 return entries 135 .filter((e) => e.isFile() && !e.name.startsWith(".")) 136 .map((e) => e.name); 137 } catch { 138 return []; 139 } 140} 141 142async function createSkill(skillName: string): Promise<void> { 143 await mkdir(join("./skills", skillName, "baseline"), { recursive: true }); 144 await mkdir(join("./skills", skillName, "experiment"), { recursive: true }); 145 await writeFile( 146 join("./skills", skillName, "baseline", "SKILL.md"), 147 SKILL_BASELINE(skillName), 148 ); 149 await writeFile( 150 join("./skills", skillName, "experiment", "SKILL.md"), 151 SKILL_EXPERIMENT(skillName), 152 ); 153} 154 155async function createFixture(name: string): Promise<boolean> { 156 const skills = await getSubdirs("./skills"); 157 const assets = await getFiles("./assets"); 158 159 let selectedSkills: string[] = []; 160 let selectedAssets: string[] = []; 161 162 const ADD_NEW = "__add_new__"; 163 const skillChoices = [ 164 ...skills.map((s) => ({ name: s, value: s })), 165 { name: `${DIM}Add new Skill...${RESET}`, value: ADD_NEW }, 166 ]; 167 168 const skillSelection = await checkbox({ 169 message: "Include Skills:", 170 choices: skillChoices, 171 validate: (items) => items.length > 0 || "Choose at least one Skill", 172 }); 173 174 if (skillSelection.includes(ADD_NEW)) { 175 const newSkillName = await input({ 176 message: "New Skill name:", 177 validate: (v) => { 178 if (!v) return "Name is required"; 179 if (!NAME_PATTERN.test(v)) { 180 return "Only lowercase letters, numbers, and hyphens (max 64 chars)"; 181 } 182 if (skills.includes(v)) return "Skill already exists"; 183 return true; 184 }, 185 }); 186 await createSkill(newSkillName); 187 console.log(` ${DIM}Created skills/${newSkillName}/${RESET}`); 188 selectedSkills = [ 189 ...skillSelection.filter((s) => s !== ADD_NEW), 190 newSkillName, 191 ]; 192 } else { 193 selectedSkills = skillSelection; 194 } 195 196 // Ensure at least one actual skill (not just "Add new") 197 if (selectedSkills.length === 0) { 198 console.log(`${RED}At least one Skill is required.${RESET}`); 199 return false; 200 } 201 202 if (assets.length > 0) { 203 selectedAssets = await checkbox({ 204 message: "Include assets:", 205 choices: assets.map((a) => ({ name: a, value: a })), 206 }); 207 } 208 209 // Create fixture directory 210 const fixtureDir = join("./fixtures", name); 211 await mkdir(fixtureDir, { recursive: true }); 212 213 await writeFile( 214 join(fixtureDir, "prompt.md"), 215 FIXTURE_PROMPT(selectedSkills, selectedAssets), 216 ); 217 await writeFile(join(fixtureDir, "eval.md"), FIXTURE_EVAL); 218 219 console.log(""); 220 console.log(`Created ${CYAN}fixtures/${name}/${RESET}`); 221 console.log(` ${DIM}prompt.md${RESET} - edit this to define the task`); 222 console.log(` ${DIM}eval.md${RESET} - edit this to define scoring`); 223 console.log(""); 224 225 return true; 226} 227 228async function validateFixtures(fixtures: string[]): Promise<string[]> { 229 const hasFixturesDir = await dirExists("./fixtures"); 230 231 if (!hasFixturesDir) { 232 // No fixtures/ at all - suggest create command 233 const name = fixtures[0] || "my-idea"; 234 console.error(`${DIM}No fixtures/ folder found.${RESET}`); 235 console.error(""); 236 console.error(`${DIM}Did you mean to create a new project?${RESET}`); 237 console.error(` ${CYAN}npx woodshed create ${name}${RESET}`); 238 console.error(""); 239 process.exit(1); 240 } 241 242 const validFixtures: string[] = []; 243 244 for (const fixture of fixtures) { 245 const fixtureDir = join("./fixtures", fixture); 246 const exists = await dirExists(fixtureDir); 247 248 if (!exists) { 249 if (!NAME_PATTERN.test(fixture)) { 250 console.error( 251 `${RED}Invalid fixture name '${fixture}'. Use only lowercase letters, numbers, and hyphens (max 64 chars).${RESET}`, 252 ); 253 continue; 254 } 255 256 console.log(`${DIM}Fixture '${fixture}' doesn't exist.${RESET}`); 257 const shouldCreate = await confirm({ 258 message: `Create fixture '${fixture}'?`, 259 default: true, 260 }); 261 262 if (shouldCreate) { 263 await createFixture(fixture); 264 // Don't add to validFixtures - user needs to edit prompt.md first 265 console.log( 266 `${DIM}Edit the prompt.md and eval.md files, then run again.${RESET}`, 267 ); 268 } 269 } else { 270 validFixtures.push(fixture); 271 } 272 } 273 274 return validFixtures; 275} 276 277// Shared computation for both live panel and final verdict 278interface ComboRow { 279 name: string; 280 scores: number[]; 281 mean: number; 282 std: number; 283 probability: number; // Probability this combo is best 284 verdict: VerdictLevel; 285 runsToConfidence: number; // Runs needed for 95% confidence 286} 287 288interface ComputedStats { 289 rows: ComboRow[]; 290 leader: ComboRow | null; 291 verdict: VerdictLevel; 292 runOptions: RunOption[]; 293 confidence: ConfidenceLevel; 294} 295 296function computeStats( 297 combos: { name: string; scores: number[]; mean: number; std: number }[], 298): ComputedStats { 299 if (combos.length === 0) { 300 return { 301 rows: [], 302 leader: null, 303 verdict: "insufficient", 304 runOptions: [], 305 confidence: "low", 306 }; 307 } 308 309 // Convert to GroupStats 310 const groupStats: GroupStats[] = combos.map((c) => ({ 311 n: c.scores.length, 312 mean: c.mean, 313 std: c.std, 314 scores: c.scores, 315 })); 316 317 // Compute probability each combo is best 318 const probabilities = computeProbabilityBest(groupStats); 319 320 // Sort by probability (descending) 321 const indexed = combos.map((c, i) => ({ 322 combo: c, 323 prob: probabilities[i]!, 324 index: i, 325 })); 326 indexed.sort((a, b) => b.prob - a.prob); 327 328 const leaderIndex = indexed[0]?.index ?? 0; 329 const leaderStats = groupStats[leaderIndex]!; 330 const otherStats = groupStats.filter((_, i) => i !== leaderIndex); 331 const leaderRunsToConfidence = runsToTargetProbability( 332 leaderStats, 333 otherStats, 334 0.95, 335 ); 336 337 const rows: ComboRow[] = indexed.map(({ combo, prob, index }) => { 338 const n = combo.scores.length; 339 const verdict = verdictFromProbability(prob, n); 340 341 // Only show runs-to-confidence for the leader 342 const runsToConf = index === leaderIndex ? leaderRunsToConfidence : 0; 343 344 return { 345 ...combo, 346 probability: prob, 347 verdict, 348 runsToConfidence: runsToConf, 349 }; 350 }); 351 352 // Determine overall verdict from leader 353 const leaderProb = probabilities[leaderIndex]!; 354 const minN = Math.min(...combos.map((c) => c.scores.length)); 355 const overallVerdict = verdictFromProbability(leaderProb, minN); 356 357 // Build run options from runs-to-confidence (bucketed to avoid false precision) 358 const runOptions: RunOption[] = []; 359 const bucketed = bucketRunEstimate(leaderRunsToConfidence); 360 if (bucketed > 0 && isFinite(bucketed)) { 361 runOptions.push({ runs: bucketed, label: "" }); 362 } 363 364 // Compute confidence level 365 const confidence = comparisonConfidence(groupStats); 366 367 return { 368 rows, 369 leader: rows[0] || null, 370 verdict: overallVerdict, 371 runOptions, 372 confidence, 373 }; 374} 375 376/** Compute verdict without printing (live panel already shows results) */ 377function computeVerdict(result: BenchResult): VerdictResult { 378 const combos = Object.entries(result.combos).map(([name, r]) => ({ 379 name, 380 scores: r.scores, 381 mean: r.mean, 382 std: r.std, 383 })); 384 385 if (combos.length === 0) { 386 return { hasWinner: true, leaderName: "", options: [] }; 387 } 388 389 const stats = computeStats(combos); 390 return { 391 hasWinner: stats.verdict === "winner", 392 leaderName: stats.leader?.name || "", 393 options: stats.runOptions, 394 }; 395} 396 397async function prompt(question: string): Promise<string> { 398 const rl = createInterface({ input: process.stdin, output: process.stdout }); 399 return new Promise((resolve) => { 400 rl.question(question, (answer) => { 401 rl.close(); 402 resolve(answer.trim().toLowerCase()); 403 }); 404 }); 405} 406 407async function selectMenu( 408 options: { label: string; value: number | null }[], 409): Promise<number | null> { 410 // Fallback for non-TTY (piped input) 411 if (!process.stdin.isTTY) { 412 for (let i = 0; i < options.length; i++) { 413 console.log(` [${i + 1}] ${options[i]!.label}`); 414 } 415 const rl = createInterface({ 416 input: process.stdin, 417 output: process.stdout, 418 }); 419 const answer = await new Promise<string>((resolve) => { 420 rl.question("> ", (a) => { 421 rl.close(); 422 resolve(a.trim()); 423 }); 424 }); 425 if (answer === "n" || answer === "") return null; 426 const idx = parseInt(answer, 10) - 1; 427 return idx >= 0 && idx < options.length ? options[idx]!.value : null; 428 } 429 430 let selected = 0; 431 432 const render = () => { 433 process.stdout.write(`\x1B[${options.length}A`); 434 for (let i = 0; i < options.length; i++) { 435 const prefix = i === selected ? `${CYAN}${RESET}` : " "; 436 const label = 437 i === selected 438 ? `${CYAN}${options[i]!.label}${RESET}` 439 : options[i]!.label; 440 process.stdout.write(`\x1B[2K${prefix} ${label}\n`); 441 } 442 }; 443 444 // Initial render 445 for (let i = 0; i < options.length; i++) { 446 const prefix = i === 0 ? `${CYAN}${RESET}` : " "; 447 const label = 448 i === 0 ? `${CYAN}${options[i]!.label}${RESET}` : options[i]!.label; 449 console.log(`${prefix} ${label}`); 450 } 451 452 return new Promise((resolve) => { 453 process.stdin.setRawMode(true); 454 process.stdin.resume(); 455 456 const onKey = (key: Buffer) => { 457 const str = key.toString(); 458 459 if (str === "\x1B[A") { 460 // Up arrow 461 selected = (selected - 1 + options.length) % options.length; 462 render(); 463 } else if (str === "\x1B[B") { 464 // Down arrow 465 selected = (selected + 1) % options.length; 466 render(); 467 } else if (str === "\r" || str === "\n") { 468 // Enter 469 process.stdin.setRawMode(false); 470 process.stdin.pause(); 471 process.stdin.removeListener("data", onKey); 472 resolve(options[selected]!.value); 473 } else if (str === "\x03") { 474 // Ctrl+C 475 process.stdin.setRawMode(false); 476 process.exit(0); 477 } else if (str === "q" || str === "n") { 478 // q or n to quit 479 process.stdin.setRawMode(false); 480 process.stdin.pause(); 481 process.stdin.removeListener("data", onKey); 482 resolve(null); 483 } 484 }; 485 486 process.stdin.on("data", onKey); 487 }); 488} 489 490async function runFixture( 491 fixture: string, 492 runs: number, 493 cacheOnly: boolean, 494 reset: boolean = false, 495 reeval: boolean = false, 496): Promise<BenchResult> { 497 const discovery = await discoverCombos(fixture, "./fixtures", "./skills"); 498 499 if (cacheOnly) { 500 return await runBench(fixture, { 501 fixturesDir: "./fixtures", 502 skillsDir: "./skills", 503 assetsDir: "./assets", 504 resultsDir: "./results", 505 runs, 506 cacheOnly: true, 507 reset, 508 reeval, 509 discovery, 510 }); 511 } 512 513 const panelState: PanelState = { 514 combos: discovery.combos.map((c) => ({ 515 name: c.name, 516 outcomes: [], 517 target: runs, 518 })), 519 activeLogs: new Map(), 520 }; 521 522 const panelController = renderPanel(fixture, panelState); 523 524 try { 525 const result = await runBench(fixture, { 526 fixturesDir: "./fixtures", 527 skillsDir: "./skills", 528 assetsDir: "./assets", 529 resultsDir: "./results", 530 runs, 531 reset, 532 reeval, 533 discovery, 534 onRunStart: (comboIndex, runNumber) => { 535 const combo = discovery.combos[comboIndex]!; 536 const key = `${comboIndex}-${runNumber}`; 537 panelState.activeLogs.set(key, { 538 comboIndex, 539 comboName: combo.name, 540 runNumber, 541 target: runs, 542 lines: [], 543 complete: false, 544 startTime: Date.now(), 545 }); 546 panelController.update({ 547 ...panelState, 548 activeLogs: new Map(panelState.activeLogs), 549 }); 550 }, 551 onStreamLine: (comboIndex, line) => { 552 const outputLines = extractVerboseOutput(line); 553 // Find active log for this combo 554 const log = Array.from(panelState.activeLogs.values()).find( 555 (l) => l.comboIndex === comboIndex, 556 ); 557 if (log && outputLines.length > 0) { 558 for (const text of outputLines) { 559 log.lines.push(text); 560 } 561 // Keep last N lines per panel 562 const maxLines = 563 Math.floor( 564 (process.stdout.rows || 40) / 565 Math.max(1, panelState.activeLogs.size), 566 ) - 6; 567 if (log.lines.length > Math.max(5, maxLines)) { 568 log.lines = log.lines.slice(-Math.max(5, maxLines)); 569 } 570 panelController.update({ 571 ...panelState, 572 activeLogs: new Map(panelState.activeLogs), 573 }); 574 } 575 }, 576 onRunComplete: (comboIndex, runNumber) => { 577 const key = `${comboIndex}-${runNumber}`; 578 const log = panelState.activeLogs.get(key); 579 const elapsed = log ? Date.now() - log.startTime : 0; 580 const linger = elapsed > 1000 ? Math.min(elapsed * 0.25, 2500) : 0; 581 setTimeout(() => { 582 panelState.activeLogs.delete(key); 583 panelController.update({ 584 ...panelState, 585 activeLogs: new Map(panelState.activeLogs), 586 }); 587 }, linger); 588 }, 589 onProgress: (_fixture, combos) => { 590 for (let i = 0; i < discovery.combos.length; i++) { 591 const combo = discovery.combos[i]!; 592 const data = combos[combo.name]; 593 if (data) { 594 const outcomes: (number | "fail")[] = data.results.map((r) => 595 r.status === "ok" && r.score !== undefined ? r.score : "fail", 596 ); 597 panelState.combos[i]!.outcomes = outcomes; 598 599 // Add score to log panel 600 const log = Array.from(panelState.activeLogs.values()).find( 601 (l) => l.comboIndex === i && l.runNumber === data.results.length, 602 ); 603 const lastResult = data.results[data.results.length - 1]; 604 if (log && lastResult) { 605 const scoreText = 606 lastResult.status === "ok" && lastResult.score !== undefined 607 ? `Score: ${lastResult.score}` 608 : "Failed"; 609 const reasonText = 610 lastResult.status === "ok" && lastResult.reason 611 ? lastResult.reason 612 : lastResult.status === "fail" && lastResult.error 613 ? lastResult.error 614 : ""; 615 log.lines.push(""); 616 log.lines.push(`\x1B[36m${scoreText}\x1B[0m`); 617 if (reasonText) { 618 log.lines.push(reasonText); 619 } 620 log.complete = true; 621 } 622 } 623 } 624 panelController.update({ 625 ...panelState, 626 activeLogs: new Map(panelState.activeLogs), 627 }); 628 }, 629 }); 630 631 panelState.activeLogs.clear(); 632 panelController.update({ 633 ...panelState, 634 activeLogs: new Map(panelState.activeLogs), 635 }); 636 637 return result; 638 } finally { 639 // Linger for a moment so user can see final scores 640 await new Promise((resolve) => setTimeout(resolve, 2000)); 641 panelController.unmount(); 642 } 643} 644 645// Extract verbose output from a stream JSON line 646export function extractVerboseOutput(jsonLine: string): string[] { 647 const lines: string[] = []; 648 try { 649 const event = JSON.parse(jsonLine); 650 651 // Assistant message with content 652 if (event.type === "assistant" && event.message?.content) { 653 for (const content of event.message.content) { 654 if (content.type === "text" && content.text) { 655 // Include all text lines 656 const textLines = content.text.trim().split("\n"); 657 for (const line of textLines) { 658 if (line.trim()) { 659 lines.push(line); 660 } 661 } 662 } else if (content.type === "tool_use") { 663 // Show tool being called - clean format 664 const toolName = content.name || "unknown"; 665 let detail = ""; 666 if (content.input) { 667 if (toolName === "Bash" && content.input.command) { 668 detail = content.input.command; 669 } else if (toolName === "Read" && content.input.file_path) { 670 detail = content.input.file_path; 671 } else if (toolName === "Write" && content.input.file_path) { 672 detail = content.input.file_path; 673 } else if (toolName === "Edit" && content.input.file_path) { 674 detail = content.input.file_path; 675 } else if (toolName === "Glob" && content.input.pattern) { 676 detail = content.input.pattern; 677 } else if (toolName === "Grep" && content.input.pattern) { 678 detail = content.input.pattern; 679 } else if (toolName === "Task" && content.input.description) { 680 detail = content.input.description; 681 } else if (toolName === "Skill" && content.input.skill) { 682 detail = content.input.skill; 683 } 684 } 685 lines.push(detail ? `${toolName}: ${detail}` : toolName); 686 } 687 } 688 } 689 690 // Tool results 691 if (event.type === "user" && event.message?.content) { 692 for (const content of event.message.content) { 693 if (content.type === "tool_result") { 694 const resultText = 695 typeof content.content === "string" 696 ? content.content 697 : Array.isArray(content.content) 698 ? content.content 699 .map((c: { text?: string }) => c.text || "") 700 .join("") 701 : ""; 702 703 if (resultText) { 704 // Show each line of the result 705 const resultLines = resultText.trim().split("\n"); 706 for (const line of resultLines) { 707 if (line.trim()) { 708 lines.push(` ${line}`); 709 } 710 } 711 } 712 } 713 } 714 } 715 } catch { 716 // Not valid JSON, ignore 717 } 718 return lines; 719} 720 721function printHelp(): void { 722 console.log(` 723shed - Minimal runner and eval framework for Claude Skills. 724 725Usage: 726 shed [options] [fixture...] Run benchmarks 727 shed create <folder> Create a new project 728 shed help Show this help 729 730Options: 731 --runs <n> Number of runs per combo (default: 10) 732 --cache-only Only show cached results, don't run new tests 733 --reset Archive existing results and start fresh 734 --reeval Re-evaluate existing workspaces with current eval.md 735 736Examples: 737 shed Run all fixtures 738 shed my-fixture Run specific fixture 739 shed --runs 20 my-fixture Run with 20 iterations 740 shed create my-project Create new project folder 741`); 742} 743 744async function main() { 745 const rawArgs = process.argv.slice(2); 746 747 // Handle commands 748 if (rawArgs[0] === "help" || rawArgs[0] === "--help" || rawArgs[0] === "-h") { 749 printHelp(); 750 return; 751 } 752 753 if (rawArgs[0] === "create") { 754 const targetDir = rawArgs[1]; 755 if (!targetDir) { 756 console.error("Usage: shed create <folder>"); 757 process.exit(1); 758 } 759 await initProject(targetDir); 760 return; 761 } 762 763 const args = await parseArgs(rawArgs); 764 765 // Check if user specified explicit fixtures (vs auto-discovery) 766 const runsIndex = rawArgs.indexOf("--runs"); 767 const explicitFixtures = rawArgs.filter( 768 (a) => 769 !a.startsWith("-") && (runsIndex === -1 || a !== rawArgs[runsIndex + 1]), 770 ); 771 if (explicitFixtures.length > 0) { 772 const validFixtures = await validateFixtures(args.fixtures); 773 if (validFixtures.length === 0) { 774 return; // User created fixtures but needs to edit them 775 } 776 args.fixtures = validFixtures; 777 } 778 779 for (let i = 0; i < args.fixtures.length; i++) { 780 const fixture = args.fixtures[i]!; 781 let runs = args.runs; 782 let reset = args.reset; 783 784 if (args.fixtures.length > 1) { 785 console.log(""); 786 console.log(`${DIM}#${RESET} ${fixture}`); 787 } 788 789 while (true) { 790 const result = await runFixture( 791 fixture, 792 runs, 793 args.cacheOnly, 794 reset, 795 args.reeval, 796 ); 797 reset = false; // Only reset on first iteration 798 const verdict = computeVerdict(result); 799 800 if (verdict.hasWinner) { 801 console.log(""); 802 break; 803 } 804 805 if (verdict.options.length === 0) { 806 console.log( 807 `${DIM} Pick any — differences are negligible or would need impractical sample sizes.${RESET}`, 808 ); 809 console.log(""); 810 break; 811 } 812 813 // Build menu options 814 console.log(""); 815 const menuOptions: { label: string; value: number | null }[] = 816 verdict.options.map((opt) => ({ 817 label: `+${opt.runs} runs`, 818 value: opt.runs, 819 })); 820 menuOptions.push({ label: `${DIM}stop${RESET}`, value: null }); 821 822 const choice = await selectMenu(menuOptions); 823 824 if (choice === null) { 825 console.log(""); 826 break; 827 } 828 829 runs += choice; 830 args.cacheOnly = false; 831 } 832 } 833} 834 835main().catch((err) => { 836 if (err?.name === "ExitPromptError") { 837 process.exit(0); 838 } 839 console.error(err); 840 process.exit(1); 841});