import { mkdir, readdir, readFile, writeFile } from "node:fs/promises"; import { join } from "node:path"; import { mean, sampleStd } from "./stats.js"; export interface RunResult { status: "ok" | "fail"; score?: number; reason?: string; error?: string; durationMs: number; costUsd?: number; } export async function getResultsDir( resultsBaseDir: string, fixture: string, comboName: string, hash: string, ): Promise { const dir = join(resultsBaseDir, fixture, comboName, hash); await mkdir(dir, { recursive: true }); // Create .gitignore in this hash directory to ignore workspaces and logs const gitignorePath = join(dir, ".gitignore"); const gitignoreContent = `run-*/ *.log `; await writeFile(gitignorePath, gitignoreContent).catch(() => {}); return dir; } export async function countExistingRuns(resultsDir: string): Promise { try { const entries = await readdir(resultsDir); return entries.filter((e) => e.endsWith(".eval.json")).length; } catch { return 0; } } export async function loadExistingResults( resultsDir: string, ): Promise { try { const entries = await readdir(resultsDir); const jsonFiles = entries.filter((e) => e.endsWith(".eval.json")).sort(); const results: RunResult[] = []; for (const file of jsonFiles) { const content = await readFile(join(resultsDir, file), "utf-8"); results.push(JSON.parse(content) as RunResult); } return results; } catch { return []; } } export async function saveRunResult( resultsDir: string, runNumber: number, result: RunResult, ): Promise { const padded = String(runNumber).padStart(3, "0"); await writeFile( join(resultsDir, `run-${padded}.eval.json`), JSON.stringify(result, null, 2), ); // Note: log file is now written via streaming during the run } export function calculateStats(results: RunResult[]): { runs: number; ok: number; fail: number; mean: number; std: number; scores: number[]; } { const okResults = results.filter((r) => r.status === "ok"); const scores = okResults.map((r) => r.score!).filter((s) => s !== undefined); return { runs: results.length, ok: okResults.length, fail: results.length - okResults.length, mean: mean(scores), std: sampleStd(scores), scores, }; }