Imhotep/packages/imhotep-bench/src/benchmark.ts

// benchmark.ts - Benchmark runner for Imhotep bench harness
// Measures execution time, memory, and CPU for extraction and solving paths.

import { performance } from 'node:perf_hooks'
import {
  type PerformanceProfile,
  type BudgetCheck,
  DEFAULT_PROFILES,
  checkBudget,
} from './profiles.js'
import { type CompilationCache, type ExtractionCache } from './cache.js'
import { PromisePool, type PromisePoolOptions } from './parallel.js'

export interface BenchmarkOptions {
  /** Number of warm-up runs before measurement */
  warmupRuns?: number
  /** Number of measured runs */
  measurementRuns?: number
  /** Performance profile to check budgets against */
  profile?: PerformanceProfile
  /** Budget category name (compile | extract | solve | e2e) */
  budgetName?: string
  /** Optional compilation cache injected by caller */
  compilationCache?: CompilationCache<unknown>
  /** Optional extraction cache injected by caller */
  extractionCache?: ExtractionCache<unknown>
}

export interface BenchmarkResult {
  /** Benchmark name */
  name: string
  /** Mean duration across measured runs in milliseconds */
  meanDurationMs: number
  /** Minimum duration in milliseconds */
  minDurationMs: number
  /** Maximum duration in milliseconds */
  maxDurationMs: number
  /** Standard deviation of duration */
  stdDevDurationMs: number
  /** Mean memory delta in bytes */
  meanMemoryDeltaBytes: number
  /** Budget check result, if a budget name was provided */
  budgetCheck?: BudgetCheck
  /** Per-run raw measurements */
  runs: RunMeasurement[]
}

export interface RunMeasurement {
  durationMs: number
  memoryDeltaBytes: number
}

/**
 * Profile a single function execution.
 * Returns duration and memory delta.
 */
export async function profileRun<T>(
  fn: () => T | Promise<T>
): Promise<{ durationMs: number; memoryDeltaBytes: number; result: T }> {
  const memBefore = process.memoryUsage().heapUsed
  const start = performance.now()
  const result = await fn()
  const end = performance.now()
  const memAfter = process.memoryUsage().heapUsed

  return {
    durationMs: end - start,
    memoryDeltaBytes: memAfter - memBefore,
    result,
  }
}

/**
 * Run a benchmark with warm-up and measurement phases.
 */
export async function runBenchmark<T>(
  name: string,
  fn: () => T | Promise<T>,
  options: BenchmarkOptions = {}
): Promise<BenchmarkResult> {
  const warmupRuns = options.warmupRuns ?? 1
  const measurementRuns = options.measurementRuns ?? 5
  const profile = options.profile ?? DEFAULT_PROFILES.dev
  const budgetName = options.budgetName ?? 'e2e'

  // Warm-up phase: discard results
  for (let i = 0; i < warmupRuns; i++) {
    await fn()
  }

  // Measurement phase
  const runs: RunMeasurement[] = []
  for (let i = 0; i < measurementRuns; i++) {
    const { durationMs, memoryDeltaBytes } = await profileRun(fn)
    runs.push({ durationMs, memoryDeltaBytes })
  }

  const durations = runs.map(r => r.durationMs)
  const meanDurationMs =
    durations.reduce((a, b) => a + b, 0) / durations.length
  const minDurationMs = Math.min(...durations)
  const maxDurationMs = Math.max(...durations)
  const variance =
    durations.reduce((sum, d) => sum + Math.pow(d - meanDurationMs, 2), 0) /
    durations.length
  const stdDevDurationMs = Math.sqrt(variance)
  const meanMemoryDeltaBytes =
    runs.reduce((sum, r) => sum + r.memoryDeltaBytes, 0) / runs.length

  const budgetCheck = checkBudget(
    budgetName,
    meanDurationMs,
    meanMemoryDeltaBytes,
    profile
  )

  return {
    name,
    meanDurationMs,
    minDurationMs,
    maxDurationMs,
    stdDevDurationMs,
    meanMemoryDeltaBytes,
    budgetCheck,
    runs,
  }
}

export interface BenchmarkSuiteOptions {
  /** Global benchmark options applied to every benchmark */
  defaults?: BenchmarkOptions
  /** Promise pool options for parallel execution */
  poolOptions?: PromisePoolOptions
}

export interface BenchmarkSuiteResult {
  suiteName: string
  results: BenchmarkResult[]
  totalDurationMs: number
  allBudgetsPassed: boolean
}

/**
 * BenchmarkSuite collects and runs multiple named benchmarks.
 * Supports sequential and parallel execution.
 */
export class BenchmarkSuite {
  private benchmarks: Array<{
    name: string
    fn: () => unknown | Promise<unknown>
    options?: BenchmarkOptions
  }> = []

  constructor(private suiteName: string, private options: BenchmarkSuiteOptions = {}) {}

  /** Register a benchmark function */
  add<T>(
    name: string,
    fn: () => T | Promise<T>,
    options?: BenchmarkOptions
  ): this {
    this.benchmarks.push({ name, fn, options })
    return this
  }

  /** Run all benchmarks sequentially */
  async runSequential(): Promise<BenchmarkSuiteResult> {
    const results: BenchmarkResult[] = []
    const suiteStart = performance.now()

    for (const { name, fn, options } of this.benchmarks) {
      const merged: BenchmarkOptions = {
        ...this.options.defaults,
        ...options,
      }
      const result = await runBenchmark(name, fn, merged)
      results.push(result)
    }

    const suiteEnd = performance.now()

    return {
      suiteName: this.suiteName,
      results,
      totalDurationMs: suiteEnd - suiteStart,
      allBudgetsPassed: results.every(
        r => !r.budgetCheck || r.budgetCheck.passed
      ),
    }
  }

  /** Run all benchmarks in parallel with controlled concurrency */
  async runParallel(): Promise<BenchmarkSuiteResult> {
    const poolOptions: PromisePoolOptions = {
      maxConcurrency: this.options.poolOptions?.maxConcurrency ?? 4,
      taskTimeoutMs: this.options.poolOptions?.taskTimeoutMs ?? 30000,
    }
    const pool = new PromisePool(poolOptions)
    const suiteStart = performance.now()

    try {
      const tasks = this.benchmarks.map(({ name, fn, options }) => ({
        id: name,
        input: undefined,
        fn: async () => {
          const merged: BenchmarkOptions = {
            ...this.options.defaults,
            ...options,
          }
          return runBenchmark(name, fn, merged)
        },
      }))

      const results = (await pool.executeAll(tasks)) as BenchmarkResult[]
      const suiteEnd = performance.now()

      return {
        suiteName: this.suiteName,
        results,
        totalDurationMs: suiteEnd - suiteStart,
        allBudgetsPassed: results.every(
          r => !r.budgetCheck || r.budgetCheck.passed
        ),
      }
    } finally {
      await pool.shutdown()
    }
  }
}