Files
Imhotep/packages/imhotep-bench/src/benchmark.ts
T

230 lines
6.3 KiB
TypeScript

// benchmark.ts - Benchmark runner for Imhotep bench harness
// Measures execution time, memory, and CPU for extraction and solving paths.
import { performance } from 'node:perf_hooks'
import {
type PerformanceProfile,
type BudgetCheck,
DEFAULT_PROFILES,
checkBudget,
} from './profiles.js'
import { type CompilationCache, type ExtractionCache } from './cache.js'
import { PromisePool, type PromisePoolOptions } from './parallel.js'
export interface BenchmarkOptions {
/** Number of warm-up runs before measurement */
warmupRuns?: number
/** Number of measured runs */
measurementRuns?: number
/** Performance profile to check budgets against */
profile?: PerformanceProfile
/** Budget category name (compile | extract | solve | e2e) */
budgetName?: string
/** Optional compilation cache injected by caller */
compilationCache?: CompilationCache<unknown>
/** Optional extraction cache injected by caller */
extractionCache?: ExtractionCache<unknown>
}
export interface BenchmarkResult {
/** Benchmark name */
name: string
/** Mean duration across measured runs in milliseconds */
meanDurationMs: number
/** Minimum duration in milliseconds */
minDurationMs: number
/** Maximum duration in milliseconds */
maxDurationMs: number
/** Standard deviation of duration */
stdDevDurationMs: number
/** Mean memory delta in bytes */
meanMemoryDeltaBytes: number
/** Budget check result, if a budget name was provided */
budgetCheck?: BudgetCheck
/** Per-run raw measurements */
runs: RunMeasurement[]
}
export interface RunMeasurement {
durationMs: number
memoryDeltaBytes: number
}
/**
* Profile a single function execution.
* Returns duration and memory delta.
*/
export async function profileRun<T>(
fn: () => T | Promise<T>
): Promise<{ durationMs: number; memoryDeltaBytes: number; result: T }> {
const memBefore = process.memoryUsage().heapUsed
const start = performance.now()
const result = await fn()
const end = performance.now()
const memAfter = process.memoryUsage().heapUsed
return {
durationMs: end - start,
memoryDeltaBytes: memAfter - memBefore,
result,
}
}
/**
* Run a benchmark with warm-up and measurement phases.
*/
export async function runBenchmark<T>(
name: string,
fn: () => T | Promise<T>,
options: BenchmarkOptions = {}
): Promise<BenchmarkResult> {
const warmupRuns = options.warmupRuns ?? 1
const measurementRuns = options.measurementRuns ?? 5
const profile = options.profile ?? DEFAULT_PROFILES.dev
const budgetName = options.budgetName ?? 'e2e'
// Warm-up phase: discard results
for (let i = 0; i < warmupRuns; i++) {
await fn()
}
// Measurement phase
const runs: RunMeasurement[] = []
for (let i = 0; i < measurementRuns; i++) {
const { durationMs, memoryDeltaBytes } = await profileRun(fn)
runs.push({ durationMs, memoryDeltaBytes })
}
const durations = runs.map(r => r.durationMs)
const meanDurationMs =
durations.reduce((a, b) => a + b, 0) / durations.length
const minDurationMs = Math.min(...durations)
const maxDurationMs = Math.max(...durations)
const variance =
durations.reduce((sum, d) => sum + Math.pow(d - meanDurationMs, 2), 0) /
durations.length
const stdDevDurationMs = Math.sqrt(variance)
const meanMemoryDeltaBytes =
runs.reduce((sum, r) => sum + r.memoryDeltaBytes, 0) / runs.length
const budgetCheck = checkBudget(
budgetName,
meanDurationMs,
meanMemoryDeltaBytes,
profile
)
return {
name,
meanDurationMs,
minDurationMs,
maxDurationMs,
stdDevDurationMs,
meanMemoryDeltaBytes,
budgetCheck,
runs,
}
}
export interface BenchmarkSuiteOptions {
/** Global benchmark options applied to every benchmark */
defaults?: BenchmarkOptions
/** Promise pool options for parallel execution */
poolOptions?: PromisePoolOptions
}
export interface BenchmarkSuiteResult {
suiteName: string
results: BenchmarkResult[]
totalDurationMs: number
allBudgetsPassed: boolean
}
/**
* BenchmarkSuite collects and runs multiple named benchmarks.
* Supports sequential and parallel execution.
*/
export class BenchmarkSuite {
private benchmarks: Array<{
name: string
fn: () => unknown | Promise<unknown>
options?: BenchmarkOptions
}> = []
constructor(private suiteName: string, private options: BenchmarkSuiteOptions = {}) {}
/** Register a benchmark function */
add<T>(
name: string,
fn: () => T | Promise<T>,
options?: BenchmarkOptions
): this {
this.benchmarks.push({ name, fn, options })
return this
}
/** Run all benchmarks sequentially */
async runSequential(): Promise<BenchmarkSuiteResult> {
const results: BenchmarkResult[] = []
const suiteStart = performance.now()
for (const { name, fn, options } of this.benchmarks) {
const merged: BenchmarkOptions = {
...this.options.defaults,
...options,
}
const result = await runBenchmark(name, fn, merged)
results.push(result)
}
const suiteEnd = performance.now()
return {
suiteName: this.suiteName,
results,
totalDurationMs: suiteEnd - suiteStart,
allBudgetsPassed: results.every(
r => !r.budgetCheck || r.budgetCheck.passed
),
}
}
/** Run all benchmarks in parallel with controlled concurrency */
async runParallel(): Promise<BenchmarkSuiteResult> {
const poolOptions: PromisePoolOptions = {
maxConcurrency: this.options.poolOptions?.maxConcurrency ?? 4,
taskTimeoutMs: this.options.poolOptions?.taskTimeoutMs ?? 30000,
}
const pool = new PromisePool(poolOptions)
const suiteStart = performance.now()
try {
const tasks = this.benchmarks.map(({ name, fn, options }) => ({
id: name,
input: undefined,
fn: async () => {
const merged: BenchmarkOptions = {
...this.options.defaults,
...options,
}
return runBenchmark(name, fn, merged)
},
}))
const results = (await pool.executeAll(tasks)) as BenchmarkResult[]
const suiteEnd = performance.now()
return {
suiteName: this.suiteName,
results,
totalDurationMs: suiteEnd - suiteStart,
allBudgetsPassed: results.every(
r => !r.budgetCheck || r.budgetCheck.passed
),
}
} finally {
await pool.shutdown()
}
}
}