// benchmark.ts - Benchmark runner for Imhotep bench harness // Measures execution time, memory, and CPU for extraction and solving paths. import { performance } from 'node:perf_hooks' import { type PerformanceProfile, type BudgetCheck, DEFAULT_PROFILES, checkBudget, } from './profiles.js' import { type CompilationCache, type ExtractionCache } from './cache.js' import { PromisePool, type PromisePoolOptions } from './parallel.js' export interface BenchmarkOptions { /** Number of warm-up runs before measurement */ warmupRuns?: number /** Number of measured runs */ measurementRuns?: number /** Performance profile to check budgets against */ profile?: PerformanceProfile /** Budget category name (compile | extract | solve | e2e) */ budgetName?: string /** Optional compilation cache injected by caller */ compilationCache?: CompilationCache /** Optional extraction cache injected by caller */ extractionCache?: ExtractionCache } export interface BenchmarkResult { /** Benchmark name */ name: string /** Mean duration across measured runs in milliseconds */ meanDurationMs: number /** Minimum duration in milliseconds */ minDurationMs: number /** Maximum duration in milliseconds */ maxDurationMs: number /** Standard deviation of duration */ stdDevDurationMs: number /** Mean memory delta in bytes */ meanMemoryDeltaBytes: number /** Budget check result, if a budget name was provided */ budgetCheck?: BudgetCheck /** Per-run raw measurements */ runs: RunMeasurement[] } export interface RunMeasurement { durationMs: number memoryDeltaBytes: number } /** * Profile a single function execution. * Returns duration and memory delta. */ export async function profileRun( fn: () => T | Promise ): Promise<{ durationMs: number; memoryDeltaBytes: number; result: T }> { const memBefore = process.memoryUsage().heapUsed const start = performance.now() const result = await fn() const end = performance.now() const memAfter = process.memoryUsage().heapUsed return { durationMs: end - start, memoryDeltaBytes: memAfter - memBefore, result, } } /** * Run a benchmark with warm-up and measurement phases. */ export async function runBenchmark( name: string, fn: () => T | Promise, options: BenchmarkOptions = {} ): Promise { const warmupRuns = options.warmupRuns ?? 1 const measurementRuns = options.measurementRuns ?? 5 const profile = options.profile ?? DEFAULT_PROFILES.dev const budgetName = options.budgetName ?? 'e2e' // Warm-up phase: discard results for (let i = 0; i < warmupRuns; i++) { await fn() } // Measurement phase const runs: RunMeasurement[] = [] for (let i = 0; i < measurementRuns; i++) { const { durationMs, memoryDeltaBytes } = await profileRun(fn) runs.push({ durationMs, memoryDeltaBytes }) } const durations = runs.map(r => r.durationMs) const meanDurationMs = durations.reduce((a, b) => a + b, 0) / durations.length const minDurationMs = Math.min(...durations) const maxDurationMs = Math.max(...durations) const variance = durations.reduce((sum, d) => sum + Math.pow(d - meanDurationMs, 2), 0) / durations.length const stdDevDurationMs = Math.sqrt(variance) const meanMemoryDeltaBytes = runs.reduce((sum, r) => sum + r.memoryDeltaBytes, 0) / runs.length const budgetCheck = checkBudget( budgetName, meanDurationMs, meanMemoryDeltaBytes, profile ) return { name, meanDurationMs, minDurationMs, maxDurationMs, stdDevDurationMs, meanMemoryDeltaBytes, budgetCheck, runs, } } export interface BenchmarkSuiteOptions { /** Global benchmark options applied to every benchmark */ defaults?: BenchmarkOptions /** Promise pool options for parallel execution */ poolOptions?: PromisePoolOptions } export interface BenchmarkSuiteResult { suiteName: string results: BenchmarkResult[] totalDurationMs: number allBudgetsPassed: boolean } /** * BenchmarkSuite collects and runs multiple named benchmarks. * Supports sequential and parallel execution. */ export class BenchmarkSuite { private benchmarks: Array<{ name: string fn: () => unknown | Promise options?: BenchmarkOptions }> = [] constructor(private suiteName: string, private options: BenchmarkSuiteOptions = {}) {} /** Register a benchmark function */ add( name: string, fn: () => T | Promise, options?: BenchmarkOptions ): this { this.benchmarks.push({ name, fn, options }) return this } /** Run all benchmarks sequentially */ async runSequential(): Promise { const results: BenchmarkResult[] = [] const suiteStart = performance.now() for (const { name, fn, options } of this.benchmarks) { const merged: BenchmarkOptions = { ...this.options.defaults, ...options, } const result = await runBenchmark(name, fn, merged) results.push(result) } const suiteEnd = performance.now() return { suiteName: this.suiteName, results, totalDurationMs: suiteEnd - suiteStart, allBudgetsPassed: results.every( r => !r.budgetCheck || r.budgetCheck.passed ), } } /** Run all benchmarks in parallel with controlled concurrency */ async runParallel(): Promise { const poolOptions: PromisePoolOptions = { maxConcurrency: this.options.poolOptions?.maxConcurrency ?? 4, taskTimeoutMs: this.options.poolOptions?.taskTimeoutMs ?? 30000, } const pool = new PromisePool(poolOptions) const suiteStart = performance.now() try { const tasks = this.benchmarks.map(({ name, fn, options }) => ({ id: name, input: undefined, fn: async () => { const merged: BenchmarkOptions = { ...this.options.defaults, ...options, } return runBenchmark(name, fn, merged) }, })) const results = (await pool.executeAll(tasks)) as BenchmarkResult[] const suiteEnd = performance.now() return { suiteName: this.suiteName, results, totalDurationMs: suiteEnd - suiteStart, allBudgetsPassed: results.every( r => !r.budgetCheck || r.budgetCheck.passed ), } } finally { await pool.shutdown() } } }