/** * Extraction plan generation from execution IR. * * This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning. * It consumes execution IR + semantic IR and produces deduplicated, batched * extraction requests that tell the runtime exactly which browser facts to capture. */ import type { ExecutionIr, SemanticIr, Environment, StateSnapshot, TimelineSnapshot, } from 'imhotep-core' import type { FactRequirementRegistry, RequiredFacts, Diagnostic, } from './requirements.js' import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js' import type { SelectorPlan, SelectorResolutionPlan } from './selector.js' import { planSelectorResolution } from './selector.js' import type { ExtractionRequest, ExtractionBatch } from './batching.js' import { deduplicateRequests, batchRequests } from './batching.js' // --------------------------------------------------------------------------- // Execution Context // --------------------------------------------------------------------------- /** * Bundles the flattened execution IR with the lookup tables needed to map * numeric indices back to semantic IR entities. * * The compiler (WBS 2) produces both IRs and these index-to-id arrays so * the planner never has to guess insertion order. */ export interface ExecutionContext { executionIr: ExecutionIr semanticIr: SemanticIr /** executionIr.clauseSubject[i] -> subject id */ subjectIds: string[] /** executionIr.clauseFrame[i] -> frame id */ frameIds: string[] /** executionIr.clauseState[i] -> state id */ stateIds: string[] /** executionIr.clauseTimeline[i] -> timeline id */ timelineIds: string[] /** executionIr.clauseEnvGuard[i] -> envGuard id */ envGuardIds: string[] /** executionIr.clauseTolerance[i] -> tolerance id */ toleranceIds: string[] } // --------------------------------------------------------------------------- // Planner Options // --------------------------------------------------------------------------- export interface PlannerOptions { /** Registry that knows which facts each clause family needs. */ factRegistry: FactRequirementRegistry /** Default environment when no envGuards are active. */ defaultEnvironment?: Partial /** Default state snapshots when no state refs are active. */ defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }> /** Default timeline mode when no timeline refs are active. */ defaultTimelineMode?: string } // --------------------------------------------------------------------------- // Extraction Plan Output // --------------------------------------------------------------------------- /** * The final output of the planning phase. * * Contains every extraction request needed to satisfy the execution IR, * grouped into environment-compatible batches, plus any diagnostics. */ export interface ExtractionPlan { planId: string requests: ExtractionRequest[] batches: ExtractionBatch[] diagnostics: Diagnostic[] } // --------------------------------------------------------------------------- // Planner // --------------------------------------------------------------------------- let _planCounter = 0 export class ExtractionPlanner { constructor(private options: PlannerOptions) {} /** * Build an extraction plan from an execution context. * * Steps: * 1. Analyze required facts across all clauses. * 2. Build selector resolution plan. * 3. Expand environment matrix from envGuards. * 4. Collect state snapshots. * 5. Collect timeline plan. * 6. Build one request per env case. * 7. Deduplicate and batch. * 8. Emit diagnostics for unsupported configurations. */ createPlan(context: ExecutionContext): ExtractionPlan { const diagnostics = new DiagnosticCollector() const { executionIr, semanticIr } = context // Step 1: Union all required facts. const requiredFacts = analyzeRequiredFacts( executionIr, this.options.factRegistry, diagnostics ) // Step 2: Selector resolution plan. const selectorPlan = planSelectorResolution(semanticIr) for (const d of selectorPlan.diagnostics) { diagnostics.emit({ code: d.code, severity: d.severity, message: d.message, category: 'resolution-error', }) } // Step 3: Environment matrix expansion. const envCases = expandEnvironmentMatrix( executionIr, context, semanticIr, this.options.defaultEnvironment, diagnostics ) // Step 4: State snapshot planning. const stateSnapshots = planStateSnapshots( executionIr, context, semanticIr, this.options.defaultStateSnapshots, diagnostics ) // Step 5: Timeline planning. const timeline = planTimeline( executionIr, context, semanticIr, this.options.defaultTimelineMode, diagnostics ) // Step 6: Build one extraction request per env case. const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({ requestId: `req_${idx + 1}`, envCaseId: envCase.id, env: envCase.env, stateSnapshots, timeline, subjects: selectorPlan.subjects, requiredFacts, diagnostics: [], })) // If there are no env cases, still produce one request with defaults. if (requests.length === 0) { requests.push({ requestId: 'req_default', envCaseId: 'env_default', env: this.options.defaultEnvironment ?? {}, stateSnapshots, timeline, subjects: selectorPlan.subjects, requiredFacts, diagnostics: [], }) } // Step 7: Deduplicate. const deduped = deduplicateRequests(requests) // Step 8: Batch by compatible environment. const batches = batchRequests(deduped) return { planId: `plan_${++_planCounter}`, requests: deduped, batches, diagnostics: diagnostics.diagnostics, } } } // --------------------------------------------------------------------------- // Environment Matrix Expansion // --------------------------------------------------------------------------- interface EnvCaseEntry { id: string env: Partial } /** * Collect every environment case referenced by any clause in the execution IR. * * For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR, * and unions all normalizedCases. If no envGuards are referenced, a single * default env case is returned. */ function expandEnvironmentMatrix( executionIr: ExecutionIr, context: ExecutionContext, semanticIr: SemanticIr, defaultEnv: Partial | undefined, diagnostics: DiagnosticCollector ): EnvCaseEntry[] { const seen = new Set() const cases: EnvCaseEntry[] = [] for (let i = 0; i < executionIr.clauseCount; i++) { const guardIndex = executionIr.clauseEnvGuard[i] const guardId = context.envGuardIds[guardIndex] if (!guardId) { diagnostics.emit({ code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD', severity: 'warning', category: 'extraction-error', message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`, clauseIndex: i, }) continue } const guard = semanticIr.envGuards.get(guardId) if (!guard) { diagnostics.emit({ code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD', severity: 'error', category: 'resolution-error', message: `EnvGuard ${guardId} not found in semantic IR`, clauseIndex: i, }) continue } // The semantic IR envGuard may have normalizedCases (from ir.ts) or // a condition string (from index.ts). We handle both shapes. const normalizedCases = (guard as unknown as Record) .normalizedCases as Array> | undefined if (normalizedCases && normalizedCases.length > 0) { for (const env of normalizedCases) { const key = JSON.stringify(env) if (seen.has(key)) continue seen.add(key) cases.push({ id: `env_${cases.length + 1}`, env, }) } } } if (cases.length === 0 && defaultEnv) { cases.push({ id: 'env_default', env: defaultEnv }) } return cases } // --------------------------------------------------------------------------- // State Snapshot Planning // --------------------------------------------------------------------------- /** * Collect all state snapshots referenced by clauses. * * V1 supports: default, hover, focus-visible. * If a clause references an unsupported state kind, a diagnostic is emitted * but the state is still included so the solver can decide what to do. */ function planStateSnapshots( executionIr: ExecutionIr, context: ExecutionContext, semanticIr: SemanticIr, defaults: Array<{ id: string; kind: string; name?: string }> | undefined, diagnostics: DiagnosticCollector ): Array<{ id: string; kind: string; name?: string }> { const seen = new Set() const snapshots: Array<{ id: string; kind: string; name?: string }> = [] const supportedStates = new Set([ 'default', 'hover', 'focus', 'focusVisible', 'active', 'disabled', 'visited', ]) for (let i = 0; i < executionIr.clauseCount; i++) { const stateIndex = executionIr.clauseState[i] const stateId = context.stateIds[stateIndex] if (!stateId) { diagnostics.emit({ code: 'IMH_EXTRACTOR_MISSING_STATE', severity: 'warning', category: 'extraction-error', message: `Clause ${i} references state index ${stateIndex} which has no id mapping`, clauseIndex: i, }) continue } if (seen.has(stateId)) continue seen.add(stateId) const state = semanticIr.states.get(stateId) if (!state) { diagnostics.emit({ code: 'IMH_EXTRACTOR_UNRESOLVED_STATE', severity: 'error', category: 'resolution-error', message: `State ${stateId} not found in semantic IR`, clauseIndex: i, }) continue } const kind = state.kind ?? 'default' if (!supportedStates.has(kind)) { diagnostics.emit({ code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE', severity: 'warning', category: 'extraction-error', message: `State kind "${kind}" is not fully supported in V1`, clauseIndex: i, }) } snapshots.push({ id: stateId, kind, name: (state as unknown as Record).name as string | undefined, }) } if (snapshots.length === 0 && defaults) { for (const d of defaults) { if (!seen.has(d.id)) { seen.add(d.id) snapshots.push(d) } } } return snapshots } // --------------------------------------------------------------------------- // Timeline Planning // --------------------------------------------------------------------------- /** * Determine the timeline sampling plan. * * V1 is static-only. If any clause references a non-static timeline, a * diagnostic is emitted and the timeline mode falls back to 'static'. */ function planTimeline( executionIr: ExecutionIr, context: ExecutionContext, semanticIr: SemanticIr, defaultMode: string | undefined, diagnostics: DiagnosticCollector ): { mode: string; samples?: number[] } { const seenModes = new Set() let hasNonStatic = false for (let i = 0; i < executionIr.clauseCount; i++) { const timelineIndex = executionIr.clauseTimeline[i] const timelineId = context.timelineIds[timelineIndex] if (!timelineId) continue const timeline = semanticIr.timelines.get(timelineId) if (!timeline) continue const mode = timeline.mode ?? 'static' seenModes.add(mode) if (mode !== 'static') { hasNonStatic = true } } if (hasNonStatic) { diagnostics.emit({ code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE', severity: 'warning', category: 'extraction-error', message: 'Non-static timelines detected; V1 falls back to static extraction. ' + `Modes seen: ${Array.from(seenModes).join(', ')}`, }) } // For V1 we always return static regardless of what the clauses asked for. return { mode: 'static' } }