v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
@@ -0,0 +1,415 @@
+/**
+ * Extraction plan generation from execution IR.
+ *
+ * This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
+ * It consumes execution IR + semantic IR and produces deduplicated, batched
+ * extraction requests that tell the runtime exactly which browser facts to capture.
+ */
+
+import type {
+  ExecutionIr,
+  SemanticIr,
+  Environment,
+  StateSnapshot,
+  TimelineSnapshot,
+} from 'imhotep-core'
+import type {
+  FactRequirementRegistry,
+  RequiredFacts,
+  Diagnostic,
+} from './requirements.js'
+import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
+import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
+import { planSelectorResolution } from './selector.js'
+import type { ExtractionRequest, ExtractionBatch } from './batching.js'
+import { deduplicateRequests, batchRequests } from './batching.js'
+
+// ---------------------------------------------------------------------------
+// Execution Context
+// ---------------------------------------------------------------------------
+
+/**
+ * Bundles the flattened execution IR with the lookup tables needed to map
+ * numeric indices back to semantic IR entities.
+ *
+ * The compiler (WBS 2) produces both IRs and these index-to-id arrays so
+ * the planner never has to guess insertion order.
+ */
+export interface ExecutionContext {
+  executionIr: ExecutionIr
+  semanticIr: SemanticIr
+  /** executionIr.clauseSubject[i] -> subject id */
+  subjectIds: string[]
+  /** executionIr.clauseFrame[i] -> frame id */
+  frameIds: string[]
+  /** executionIr.clauseState[i] -> state id */
+  stateIds: string[]
+  /** executionIr.clauseTimeline[i] -> timeline id */
+  timelineIds: string[]
+  /** executionIr.clauseEnvGuard[i] -> envGuard id */
+  envGuardIds: string[]
+  /** executionIr.clauseTolerance[i] -> tolerance id */
+  toleranceIds: string[]
+}
+
+// ---------------------------------------------------------------------------
+// Planner Options
+// ---------------------------------------------------------------------------
+
+export interface PlannerOptions {
+  /** Registry that knows which facts each clause family needs. */
+  factRegistry: FactRequirementRegistry
+  /** Default environment when no envGuards are active. */
+  defaultEnvironment?: Partial<Environment>
+  /** Default state snapshots when no state refs are active. */
+  defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
+  /** Default timeline mode when no timeline refs are active. */
+  defaultTimelineMode?: string
+}
+
+// ---------------------------------------------------------------------------
+// Extraction Plan Output
+// ---------------------------------------------------------------------------
+
+/**
+ * The final output of the planning phase.
+ *
+ * Contains every extraction request needed to satisfy the execution IR,
+ * grouped into environment-compatible batches, plus any diagnostics.
+ */
+export interface ExtractionPlan {
+  planId: string
+  requests: ExtractionRequest[]
+  batches: ExtractionBatch[]
+  diagnostics: Diagnostic[]
+}
+
+// ---------------------------------------------------------------------------
+// Planner
+// ---------------------------------------------------------------------------
+
+let _planCounter = 0
+
+export class ExtractionPlanner {
+  constructor(private options: PlannerOptions) {}
+
+  /**
+   * Build an extraction plan from an execution context.
+   *
+   * Steps:
+   *   1. Analyze required facts across all clauses.
+   *   2. Build selector resolution plan.
+   *   3. Expand environment matrix from envGuards.
+   *   4. Collect state snapshots.
+   *   5. Collect timeline plan.
+   *   6. Build one request per env case.
+   *   7. Deduplicate and batch.
+   *   8. Emit diagnostics for unsupported configurations.
+   */
+  createPlan(context: ExecutionContext): ExtractionPlan {
+    const diagnostics = new DiagnosticCollector()
+    const { executionIr, semanticIr } = context
+
+    // Step 1: Union all required facts.
+    const requiredFacts = analyzeRequiredFacts(
+      executionIr,
+      this.options.factRegistry,
+      diagnostics
+    )
+
+    // Step 2: Selector resolution plan.
+    const selectorPlan = planSelectorResolution(semanticIr)
+    for (const d of selectorPlan.diagnostics) {
+      diagnostics.emit({
+        code: d.code,
+        severity: d.severity,
+        message: d.message,
+        category: 'resolution-error',
+      })
+    }
+
+    // Step 3: Environment matrix expansion.
+    const envCases = expandEnvironmentMatrix(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultEnvironment,
+      diagnostics
+    )
+
+    // Step 4: State snapshot planning.
+    const stateSnapshots = planStateSnapshots(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultStateSnapshots,
+      diagnostics
+    )
+
+    // Step 5: Timeline planning.
+    const timeline = planTimeline(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultTimelineMode,
+      diagnostics
+    )
+
+    // Step 6: Build one extraction request per env case.
+    const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
+      requestId: `req_${idx + 1}`,
+      envCaseId: envCase.id,
+      env: envCase.env,
+      stateSnapshots,
+      timeline,
+      subjects: selectorPlan.subjects,
+      requiredFacts,
+      diagnostics: [],
+    }))
+
+    // If there are no env cases, still produce one request with defaults.
+    if (requests.length === 0) {
+      requests.push({
+        requestId: 'req_default',
+        envCaseId: 'env_default',
+        env: this.options.defaultEnvironment ?? {},
+        stateSnapshots,
+        timeline,
+        subjects: selectorPlan.subjects,
+        requiredFacts,
+        diagnostics: [],
+      })
+    }
+
+    // Step 7: Deduplicate.
+    const deduped = deduplicateRequests(requests)
+
+    // Step 8: Batch by compatible environment.
+    const batches = batchRequests(deduped)
+
+    return {
+      planId: `plan_${++_planCounter}`,
+      requests: deduped,
+      batches,
+      diagnostics: diagnostics.diagnostics,
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Environment Matrix Expansion
+// ---------------------------------------------------------------------------
+
+interface EnvCaseEntry {
+  id: string
+  env: Partial<Environment>
+}
+
+/**
+ * Collect every environment case referenced by any clause in the execution IR.
+ *
+ * For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
+ * and unions all normalizedCases. If no envGuards are referenced, a single
+ * default env case is returned.
+ */
+function expandEnvironmentMatrix(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaultEnv: Partial<Environment> | undefined,
+  diagnostics: DiagnosticCollector
+): EnvCaseEntry[] {
+  const seen = new Set<string>()
+  const cases: EnvCaseEntry[] = []
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const guardIndex = executionIr.clauseEnvGuard[i]
+    const guardId = context.envGuardIds[guardIndex]
+    if (!guardId) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    const guard = semanticIr.envGuards.get(guardId)
+    if (!guard) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
+        severity: 'error',
+        category: 'resolution-error',
+        message: `EnvGuard ${guardId} not found in semantic IR`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    // The semantic IR envGuard may have normalizedCases (from ir.ts) or
+    // a condition string (from index.ts). We handle both shapes.
+    const normalizedCases = (guard as unknown as Record<string, unknown>)
+      .normalizedCases as Array<Partial<Environment>> | undefined
+
+    if (normalizedCases && normalizedCases.length > 0) {
+      for (const env of normalizedCases) {
+        const key = JSON.stringify(env)
+        if (seen.has(key)) continue
+        seen.add(key)
+        cases.push({
+          id: `env_${cases.length + 1}`,
+          env,
+        })
+      }
+    }
+  }
+
+  if (cases.length === 0 && defaultEnv) {
+    cases.push({ id: 'env_default', env: defaultEnv })
+  }
+
+  return cases
+}
+
+// ---------------------------------------------------------------------------
+// State Snapshot Planning
+// ---------------------------------------------------------------------------
+
+/**
+ * Collect all state snapshots referenced by clauses.
+ *
+ * V1 supports: default, hover, focus-visible.
+ * If a clause references an unsupported state kind, a diagnostic is emitted
+ * but the state is still included so the solver can decide what to do.
+ */
+function planStateSnapshots(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
+  diagnostics: DiagnosticCollector
+): Array<{ id: string; kind: string; name?: string }> {
+  const seen = new Set<string>()
+  const snapshots: Array<{ id: string; kind: string; name?: string }> = []
+
+  const supportedStates = new Set([
+    'default',
+    'hover',
+    'focus',
+    'focusVisible',
+    'active',
+    'disabled',
+    'visited',
+  ])
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const stateIndex = executionIr.clauseState[i]
+    const stateId = context.stateIds[stateIndex]
+    if (!stateId) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_MISSING_STATE',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    if (seen.has(stateId)) continue
+    seen.add(stateId)
+
+    const state = semanticIr.states.get(stateId)
+    if (!state) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
+        severity: 'error',
+        category: 'resolution-error',
+        message: `State ${stateId} not found in semantic IR`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    const kind = state.kind ?? 'default'
+    if (!supportedStates.has(kind)) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `State kind "${kind}" is not fully supported in V1`,
+        clauseIndex: i,
+      })
+    }
+
+    snapshots.push({
+      id: stateId,
+      kind,
+      name: (state as unknown as Record<string, unknown>).name as string | undefined,
+    })
+  }
+
+  if (snapshots.length === 0 && defaults) {
+    for (const d of defaults) {
+      if (!seen.has(d.id)) {
+        seen.add(d.id)
+        snapshots.push(d)
+      }
+    }
+  }
+
+  return snapshots
+}
+
+// ---------------------------------------------------------------------------
+// Timeline Planning
+// ---------------------------------------------------------------------------
+
+/**
+ * Determine the timeline sampling plan.
+ *
+ * V1 is static-only. If any clause references a non-static timeline, a
+ * diagnostic is emitted and the timeline mode falls back to 'static'.
+ */
+function planTimeline(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaultMode: string | undefined,
+  diagnostics: DiagnosticCollector
+): { mode: string; samples?: number[] } {
+  const seenModes = new Set<string>()
+  let hasNonStatic = false
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const timelineIndex = executionIr.clauseTimeline[i]
+    const timelineId = context.timelineIds[timelineIndex]
+    if (!timelineId) continue
+
+    const timeline = semanticIr.timelines.get(timelineId)
+    if (!timeline) continue
+
+    const mode = timeline.mode ?? 'static'
+    seenModes.add(mode)
+
+    if (mode !== 'static') {
+      hasNonStatic = true
+    }
+  }
+
+  if (hasNonStatic) {
+    diagnostics.emit({
+      code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
+      severity: 'warning',
+      category: 'extraction-error',
+      message:
+        'Non-static timelines detected; V1 falls back to static extraction. ' +
+        `Modes seen: ${Array.from(seenModes).join(', ')}`,
+    })
+  }
+
+  // For V1 we always return static regardless of what the clauses asked for.
+  return { mode: 'static' }
+}