v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
@@ -0,0 +1,229 @@
+// benchmark.ts - Benchmark runner for Imhotep bench harness
+// Measures execution time, memory, and CPU for extraction and solving paths.
+
+import { performance } from 'node:perf_hooks'
+import {
+  type PerformanceProfile,
+  type BudgetCheck,
+  DEFAULT_PROFILES,
+  checkBudget,
+} from './profiles.js'
+import { type CompilationCache, type ExtractionCache } from './cache.js'
+import { PromisePool, type PromisePoolOptions } from './parallel.js'
+
+export interface BenchmarkOptions {
+  /** Number of warm-up runs before measurement */
+  warmupRuns?: number
+  /** Number of measured runs */
+  measurementRuns?: number
+  /** Performance profile to check budgets against */
+  profile?: PerformanceProfile
+  /** Budget category name (compile | extract | solve | e2e) */
+  budgetName?: string
+  /** Optional compilation cache injected by caller */
+  compilationCache?: CompilationCache<unknown>
+  /** Optional extraction cache injected by caller */
+  extractionCache?: ExtractionCache<unknown>
+}
+
+export interface BenchmarkResult {
+  /** Benchmark name */
+  name: string
+  /** Mean duration across measured runs in milliseconds */
+  meanDurationMs: number
+  /** Minimum duration in milliseconds */
+  minDurationMs: number
+  /** Maximum duration in milliseconds */
+  maxDurationMs: number
+  /** Standard deviation of duration */
+  stdDevDurationMs: number
+  /** Mean memory delta in bytes */
+  meanMemoryDeltaBytes: number
+  /** Budget check result, if a budget name was provided */
+  budgetCheck?: BudgetCheck
+  /** Per-run raw measurements */
+  runs: RunMeasurement[]
+}
+
+export interface RunMeasurement {
+  durationMs: number
+  memoryDeltaBytes: number
+}
+
+/**
+ * Profile a single function execution.
+ * Returns duration and memory delta.
+ */
+export async function profileRun<T>(
+  fn: () => T | Promise<T>
+): Promise<{ durationMs: number; memoryDeltaBytes: number; result: T }> {
+  const memBefore = process.memoryUsage().heapUsed
+  const start = performance.now()
+  const result = await fn()
+  const end = performance.now()
+  const memAfter = process.memoryUsage().heapUsed
+
+  return {
+    durationMs: end - start,
+    memoryDeltaBytes: memAfter - memBefore,
+    result,
+  }
+}
+
+/**
+ * Run a benchmark with warm-up and measurement phases.
+ */
+export async function runBenchmark<T>(
+  name: string,
+  fn: () => T | Promise<T>,
+  options: BenchmarkOptions = {}
+): Promise<BenchmarkResult> {
+  const warmupRuns = options.warmupRuns ?? 1
+  const measurementRuns = options.measurementRuns ?? 5
+  const profile = options.profile ?? DEFAULT_PROFILES.dev
+  const budgetName = options.budgetName ?? 'e2e'
+
+  // Warm-up phase: discard results
+  for (let i = 0; i < warmupRuns; i++) {
+    await fn()
+  }
+
+  // Measurement phase
+  const runs: RunMeasurement[] = []
+  for (let i = 0; i < measurementRuns; i++) {
+    const { durationMs, memoryDeltaBytes } = await profileRun(fn)
+    runs.push({ durationMs, memoryDeltaBytes })
+  }
+
+  const durations = runs.map(r => r.durationMs)
+  const meanDurationMs =
+    durations.reduce((a, b) => a + b, 0) / durations.length
+  const minDurationMs = Math.min(...durations)
+  const maxDurationMs = Math.max(...durations)
+  const variance =
+    durations.reduce((sum, d) => sum + Math.pow(d - meanDurationMs, 2), 0) /
+    durations.length
+  const stdDevDurationMs = Math.sqrt(variance)
+  const meanMemoryDeltaBytes =
+    runs.reduce((sum, r) => sum + r.memoryDeltaBytes, 0) / runs.length
+
+  const budgetCheck = checkBudget(
+    budgetName,
+    meanDurationMs,
+    meanMemoryDeltaBytes,
+    profile
+  )
+
+  return {
+    name,
+    meanDurationMs,
+    minDurationMs,
+    maxDurationMs,
+    stdDevDurationMs,
+    meanMemoryDeltaBytes,
+    budgetCheck,
+    runs,
+  }
+}
+
+export interface BenchmarkSuiteOptions {
+  /** Global benchmark options applied to every benchmark */
+  defaults?: BenchmarkOptions
+  /** Promise pool options for parallel execution */
+  poolOptions?: PromisePoolOptions
+}
+
+export interface BenchmarkSuiteResult {
+  suiteName: string
+  results: BenchmarkResult[]
+  totalDurationMs: number
+  allBudgetsPassed: boolean
+}
+
+/**
+ * BenchmarkSuite collects and runs multiple named benchmarks.
+ * Supports sequential and parallel execution.
+ */
+export class BenchmarkSuite {
+  private benchmarks: Array<{
+    name: string
+    fn: () => unknown | Promise<unknown>
+    options?: BenchmarkOptions
+  }> = []
+
+  constructor(private suiteName: string, private options: BenchmarkSuiteOptions = {}) {}
+
+  /** Register a benchmark function */
+  add<T>(
+    name: string,
+    fn: () => T | Promise<T>,
+    options?: BenchmarkOptions
+  ): this {
+    this.benchmarks.push({ name, fn, options })
+    return this
+  }
+
+  /** Run all benchmarks sequentially */
+  async runSequential(): Promise<BenchmarkSuiteResult> {
+    const results: BenchmarkResult[] = []
+    const suiteStart = performance.now()
+
+    for (const { name, fn, options } of this.benchmarks) {
+      const merged: BenchmarkOptions = {
+        ...this.options.defaults,
+        ...options,
+      }
+      const result = await runBenchmark(name, fn, merged)
+      results.push(result)
+    }
+
+    const suiteEnd = performance.now()
+
+    return {
+      suiteName: this.suiteName,
+      results,
+      totalDurationMs: suiteEnd - suiteStart,
+      allBudgetsPassed: results.every(
+        r => !r.budgetCheck || r.budgetCheck.passed
+      ),
+    }
+  }
+
+  /** Run all benchmarks in parallel with controlled concurrency */
+  async runParallel(): Promise<BenchmarkSuiteResult> {
+    const poolOptions: PromisePoolOptions = {
+      maxConcurrency: this.options.poolOptions?.maxConcurrency ?? 4,
+      taskTimeoutMs: this.options.poolOptions?.taskTimeoutMs ?? 30000,
+    }
+    const pool = new PromisePool(poolOptions)
+    const suiteStart = performance.now()
+
+    try {
+      const tasks = this.benchmarks.map(({ name, fn, options }) => ({
+        id: name,
+        input: undefined,
+        fn: async () => {
+          const merged: BenchmarkOptions = {
+            ...this.options.defaults,
+            ...options,
+          }
+          return runBenchmark(name, fn, merged)
+        },
+      }))
+
+      const results = (await pool.executeAll(tasks)) as BenchmarkResult[]
+      const suiteEnd = performance.now()
+
+      return {
+        suiteName: this.suiteName,
+        results,
+        totalDurationMs: suiteEnd - suiteStart,
+        allBudgetsPassed: results.every(
+          r => !r.budgetCheck || r.budgetCheck.passed
+        ),
+      }
+    } finally {
+      await pool.shutdown()
+    }
+  }
+}