230 lines
6.3 KiB
TypeScript
230 lines
6.3 KiB
TypeScript
// benchmark.ts - Benchmark runner for Imhotep bench harness
|
|
// Measures execution time, memory, and CPU for extraction and solving paths.
|
|
|
|
import { performance } from 'node:perf_hooks'
|
|
import {
|
|
type PerformanceProfile,
|
|
type BudgetCheck,
|
|
DEFAULT_PROFILES,
|
|
checkBudget,
|
|
} from './profiles.js'
|
|
import { type CompilationCache, type ExtractionCache } from './cache.js'
|
|
import { PromisePool, type PromisePoolOptions } from './parallel.js'
|
|
|
|
export interface BenchmarkOptions {
|
|
/** Number of warm-up runs before measurement */
|
|
warmupRuns?: number
|
|
/** Number of measured runs */
|
|
measurementRuns?: number
|
|
/** Performance profile to check budgets against */
|
|
profile?: PerformanceProfile
|
|
/** Budget category name (compile | extract | solve | e2e) */
|
|
budgetName?: string
|
|
/** Optional compilation cache injected by caller */
|
|
compilationCache?: CompilationCache<unknown>
|
|
/** Optional extraction cache injected by caller */
|
|
extractionCache?: ExtractionCache<unknown>
|
|
}
|
|
|
|
export interface BenchmarkResult {
|
|
/** Benchmark name */
|
|
name: string
|
|
/** Mean duration across measured runs in milliseconds */
|
|
meanDurationMs: number
|
|
/** Minimum duration in milliseconds */
|
|
minDurationMs: number
|
|
/** Maximum duration in milliseconds */
|
|
maxDurationMs: number
|
|
/** Standard deviation of duration */
|
|
stdDevDurationMs: number
|
|
/** Mean memory delta in bytes */
|
|
meanMemoryDeltaBytes: number
|
|
/** Budget check result, if a budget name was provided */
|
|
budgetCheck?: BudgetCheck
|
|
/** Per-run raw measurements */
|
|
runs: RunMeasurement[]
|
|
}
|
|
|
|
export interface RunMeasurement {
|
|
durationMs: number
|
|
memoryDeltaBytes: number
|
|
}
|
|
|
|
/**
|
|
* Profile a single function execution.
|
|
* Returns duration and memory delta.
|
|
*/
|
|
export async function profileRun<T>(
|
|
fn: () => T | Promise<T>
|
|
): Promise<{ durationMs: number; memoryDeltaBytes: number; result: T }> {
|
|
const memBefore = process.memoryUsage().heapUsed
|
|
const start = performance.now()
|
|
const result = await fn()
|
|
const end = performance.now()
|
|
const memAfter = process.memoryUsage().heapUsed
|
|
|
|
return {
|
|
durationMs: end - start,
|
|
memoryDeltaBytes: memAfter - memBefore,
|
|
result,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Run a benchmark with warm-up and measurement phases.
|
|
*/
|
|
export async function runBenchmark<T>(
|
|
name: string,
|
|
fn: () => T | Promise<T>,
|
|
options: BenchmarkOptions = {}
|
|
): Promise<BenchmarkResult> {
|
|
const warmupRuns = options.warmupRuns ?? 1
|
|
const measurementRuns = options.measurementRuns ?? 5
|
|
const profile = options.profile ?? DEFAULT_PROFILES.dev
|
|
const budgetName = options.budgetName ?? 'e2e'
|
|
|
|
// Warm-up phase: discard results
|
|
for (let i = 0; i < warmupRuns; i++) {
|
|
await fn()
|
|
}
|
|
|
|
// Measurement phase
|
|
const runs: RunMeasurement[] = []
|
|
for (let i = 0; i < measurementRuns; i++) {
|
|
const { durationMs, memoryDeltaBytes } = await profileRun(fn)
|
|
runs.push({ durationMs, memoryDeltaBytes })
|
|
}
|
|
|
|
const durations = runs.map(r => r.durationMs)
|
|
const meanDurationMs =
|
|
durations.reduce((a, b) => a + b, 0) / durations.length
|
|
const minDurationMs = Math.min(...durations)
|
|
const maxDurationMs = Math.max(...durations)
|
|
const variance =
|
|
durations.reduce((sum, d) => sum + Math.pow(d - meanDurationMs, 2), 0) /
|
|
durations.length
|
|
const stdDevDurationMs = Math.sqrt(variance)
|
|
const meanMemoryDeltaBytes =
|
|
runs.reduce((sum, r) => sum + r.memoryDeltaBytes, 0) / runs.length
|
|
|
|
const budgetCheck = checkBudget(
|
|
budgetName,
|
|
meanDurationMs,
|
|
meanMemoryDeltaBytes,
|
|
profile
|
|
)
|
|
|
|
return {
|
|
name,
|
|
meanDurationMs,
|
|
minDurationMs,
|
|
maxDurationMs,
|
|
stdDevDurationMs,
|
|
meanMemoryDeltaBytes,
|
|
budgetCheck,
|
|
runs,
|
|
}
|
|
}
|
|
|
|
export interface BenchmarkSuiteOptions {
|
|
/** Global benchmark options applied to every benchmark */
|
|
defaults?: BenchmarkOptions
|
|
/** Promise pool options for parallel execution */
|
|
poolOptions?: PromisePoolOptions
|
|
}
|
|
|
|
export interface BenchmarkSuiteResult {
|
|
suiteName: string
|
|
results: BenchmarkResult[]
|
|
totalDurationMs: number
|
|
allBudgetsPassed: boolean
|
|
}
|
|
|
|
/**
|
|
* BenchmarkSuite collects and runs multiple named benchmarks.
|
|
* Supports sequential and parallel execution.
|
|
*/
|
|
export class BenchmarkSuite {
|
|
private benchmarks: Array<{
|
|
name: string
|
|
fn: () => unknown | Promise<unknown>
|
|
options?: BenchmarkOptions
|
|
}> = []
|
|
|
|
constructor(private suiteName: string, private options: BenchmarkSuiteOptions = {}) {}
|
|
|
|
/** Register a benchmark function */
|
|
add<T>(
|
|
name: string,
|
|
fn: () => T | Promise<T>,
|
|
options?: BenchmarkOptions
|
|
): this {
|
|
this.benchmarks.push({ name, fn, options })
|
|
return this
|
|
}
|
|
|
|
/** Run all benchmarks sequentially */
|
|
async runSequential(): Promise<BenchmarkSuiteResult> {
|
|
const results: BenchmarkResult[] = []
|
|
const suiteStart = performance.now()
|
|
|
|
for (const { name, fn, options } of this.benchmarks) {
|
|
const merged: BenchmarkOptions = {
|
|
...this.options.defaults,
|
|
...options,
|
|
}
|
|
const result = await runBenchmark(name, fn, merged)
|
|
results.push(result)
|
|
}
|
|
|
|
const suiteEnd = performance.now()
|
|
|
|
return {
|
|
suiteName: this.suiteName,
|
|
results,
|
|
totalDurationMs: suiteEnd - suiteStart,
|
|
allBudgetsPassed: results.every(
|
|
r => !r.budgetCheck || r.budgetCheck.passed
|
|
),
|
|
}
|
|
}
|
|
|
|
/** Run all benchmarks in parallel with controlled concurrency */
|
|
async runParallel(): Promise<BenchmarkSuiteResult> {
|
|
const poolOptions: PromisePoolOptions = {
|
|
maxConcurrency: this.options.poolOptions?.maxConcurrency ?? 4,
|
|
taskTimeoutMs: this.options.poolOptions?.taskTimeoutMs ?? 30000,
|
|
}
|
|
const pool = new PromisePool(poolOptions)
|
|
const suiteStart = performance.now()
|
|
|
|
try {
|
|
const tasks = this.benchmarks.map(({ name, fn, options }) => ({
|
|
id: name,
|
|
input: undefined,
|
|
fn: async () => {
|
|
const merged: BenchmarkOptions = {
|
|
...this.options.defaults,
|
|
...options,
|
|
}
|
|
return runBenchmark(name, fn, merged)
|
|
},
|
|
}))
|
|
|
|
const results = (await pool.executeAll(tasks)) as BenchmarkResult[]
|
|
const suiteEnd = performance.now()
|
|
|
|
return {
|
|
suiteName: this.suiteName,
|
|
results,
|
|
totalDurationMs: suiteEnd - suiteStart,
|
|
allBudgetsPassed: results.every(
|
|
r => !r.budgetCheck || r.budgetCheck.passed
|
|
),
|
|
}
|
|
} finally {
|
|
await pool.shutdown()
|
|
}
|
|
}
|
|
}
|