v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

This commit is contained in:
John Dvorak
2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
+415
View File
@@ -0,0 +1,415 @@
/**
* Extraction plan generation from execution IR.
*
* This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
* It consumes execution IR + semantic IR and produces deduplicated, batched
* extraction requests that tell the runtime exactly which browser facts to capture.
*/
import type {
ExecutionIr,
SemanticIr,
Environment,
StateSnapshot,
TimelineSnapshot,
} from 'imhotep-core'
import type {
FactRequirementRegistry,
RequiredFacts,
Diagnostic,
} from './requirements.js'
import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
import { planSelectorResolution } from './selector.js'
import type { ExtractionRequest, ExtractionBatch } from './batching.js'
import { deduplicateRequests, batchRequests } from './batching.js'
// ---------------------------------------------------------------------------
// Execution Context
// ---------------------------------------------------------------------------
/**
* Bundles the flattened execution IR with the lookup tables needed to map
* numeric indices back to semantic IR entities.
*
* The compiler (WBS 2) produces both IRs and these index-to-id arrays so
* the planner never has to guess insertion order.
*/
export interface ExecutionContext {
executionIr: ExecutionIr
semanticIr: SemanticIr
/** executionIr.clauseSubject[i] -> subject id */
subjectIds: string[]
/** executionIr.clauseFrame[i] -> frame id */
frameIds: string[]
/** executionIr.clauseState[i] -> state id */
stateIds: string[]
/** executionIr.clauseTimeline[i] -> timeline id */
timelineIds: string[]
/** executionIr.clauseEnvGuard[i] -> envGuard id */
envGuardIds: string[]
/** executionIr.clauseTolerance[i] -> tolerance id */
toleranceIds: string[]
}
// ---------------------------------------------------------------------------
// Planner Options
// ---------------------------------------------------------------------------
export interface PlannerOptions {
/** Registry that knows which facts each clause family needs. */
factRegistry: FactRequirementRegistry
/** Default environment when no envGuards are active. */
defaultEnvironment?: Partial<Environment>
/** Default state snapshots when no state refs are active. */
defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
/** Default timeline mode when no timeline refs are active. */
defaultTimelineMode?: string
}
// ---------------------------------------------------------------------------
// Extraction Plan Output
// ---------------------------------------------------------------------------
/**
* The final output of the planning phase.
*
* Contains every extraction request needed to satisfy the execution IR,
* grouped into environment-compatible batches, plus any diagnostics.
*/
export interface ExtractionPlan {
planId: string
requests: ExtractionRequest[]
batches: ExtractionBatch[]
diagnostics: Diagnostic[]
}
// ---------------------------------------------------------------------------
// Planner
// ---------------------------------------------------------------------------
let _planCounter = 0
export class ExtractionPlanner {
constructor(private options: PlannerOptions) {}
/**
* Build an extraction plan from an execution context.
*
* Steps:
* 1. Analyze required facts across all clauses.
* 2. Build selector resolution plan.
* 3. Expand environment matrix from envGuards.
* 4. Collect state snapshots.
* 5. Collect timeline plan.
* 6. Build one request per env case.
* 7. Deduplicate and batch.
* 8. Emit diagnostics for unsupported configurations.
*/
createPlan(context: ExecutionContext): ExtractionPlan {
const diagnostics = new DiagnosticCollector()
const { executionIr, semanticIr } = context
// Step 1: Union all required facts.
const requiredFacts = analyzeRequiredFacts(
executionIr,
this.options.factRegistry,
diagnostics
)
// Step 2: Selector resolution plan.
const selectorPlan = planSelectorResolution(semanticIr)
for (const d of selectorPlan.diagnostics) {
diagnostics.emit({
code: d.code,
severity: d.severity,
message: d.message,
category: 'resolution-error',
})
}
// Step 3: Environment matrix expansion.
const envCases = expandEnvironmentMatrix(
executionIr,
context,
semanticIr,
this.options.defaultEnvironment,
diagnostics
)
// Step 4: State snapshot planning.
const stateSnapshots = planStateSnapshots(
executionIr,
context,
semanticIr,
this.options.defaultStateSnapshots,
diagnostics
)
// Step 5: Timeline planning.
const timeline = planTimeline(
executionIr,
context,
semanticIr,
this.options.defaultTimelineMode,
diagnostics
)
// Step 6: Build one extraction request per env case.
const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
requestId: `req_${idx + 1}`,
envCaseId: envCase.id,
env: envCase.env,
stateSnapshots,
timeline,
subjects: selectorPlan.subjects,
requiredFacts,
diagnostics: [],
}))
// If there are no env cases, still produce one request with defaults.
if (requests.length === 0) {
requests.push({
requestId: 'req_default',
envCaseId: 'env_default',
env: this.options.defaultEnvironment ?? {},
stateSnapshots,
timeline,
subjects: selectorPlan.subjects,
requiredFacts,
diagnostics: [],
})
}
// Step 7: Deduplicate.
const deduped = deduplicateRequests(requests)
// Step 8: Batch by compatible environment.
const batches = batchRequests(deduped)
return {
planId: `plan_${++_planCounter}`,
requests: deduped,
batches,
diagnostics: diagnostics.diagnostics,
}
}
}
// ---------------------------------------------------------------------------
// Environment Matrix Expansion
// ---------------------------------------------------------------------------
interface EnvCaseEntry {
id: string
env: Partial<Environment>
}
/**
* Collect every environment case referenced by any clause in the execution IR.
*
* For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
* and unions all normalizedCases. If no envGuards are referenced, a single
* default env case is returned.
*/
function expandEnvironmentMatrix(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaultEnv: Partial<Environment> | undefined,
diagnostics: DiagnosticCollector
): EnvCaseEntry[] {
const seen = new Set<string>()
const cases: EnvCaseEntry[] = []
for (let i = 0; i < executionIr.clauseCount; i++) {
const guardIndex = executionIr.clauseEnvGuard[i]
const guardId = context.envGuardIds[guardIndex]
if (!guardId) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
severity: 'warning',
category: 'extraction-error',
message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
clauseIndex: i,
})
continue
}
const guard = semanticIr.envGuards.get(guardId)
if (!guard) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
severity: 'error',
category: 'resolution-error',
message: `EnvGuard ${guardId} not found in semantic IR`,
clauseIndex: i,
})
continue
}
// The semantic IR envGuard may have normalizedCases (from ir.ts) or
// a condition string (from index.ts). We handle both shapes.
const normalizedCases = (guard as unknown as Record<string, unknown>)
.normalizedCases as Array<Partial<Environment>> | undefined
if (normalizedCases && normalizedCases.length > 0) {
for (const env of normalizedCases) {
const key = JSON.stringify(env)
if (seen.has(key)) continue
seen.add(key)
cases.push({
id: `env_${cases.length + 1}`,
env,
})
}
}
}
if (cases.length === 0 && defaultEnv) {
cases.push({ id: 'env_default', env: defaultEnv })
}
return cases
}
// ---------------------------------------------------------------------------
// State Snapshot Planning
// ---------------------------------------------------------------------------
/**
* Collect all state snapshots referenced by clauses.
*
* V1 supports: default, hover, focus-visible.
* If a clause references an unsupported state kind, a diagnostic is emitted
* but the state is still included so the solver can decide what to do.
*/
function planStateSnapshots(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
diagnostics: DiagnosticCollector
): Array<{ id: string; kind: string; name?: string }> {
const seen = new Set<string>()
const snapshots: Array<{ id: string; kind: string; name?: string }> = []
const supportedStates = new Set([
'default',
'hover',
'focus',
'focusVisible',
'active',
'disabled',
'visited',
])
for (let i = 0; i < executionIr.clauseCount; i++) {
const stateIndex = executionIr.clauseState[i]
const stateId = context.stateIds[stateIndex]
if (!stateId) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_MISSING_STATE',
severity: 'warning',
category: 'extraction-error',
message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
clauseIndex: i,
})
continue
}
if (seen.has(stateId)) continue
seen.add(stateId)
const state = semanticIr.states.get(stateId)
if (!state) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
severity: 'error',
category: 'resolution-error',
message: `State ${stateId} not found in semantic IR`,
clauseIndex: i,
})
continue
}
const kind = state.kind ?? 'default'
if (!supportedStates.has(kind)) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
severity: 'warning',
category: 'extraction-error',
message: `State kind "${kind}" is not fully supported in V1`,
clauseIndex: i,
})
}
snapshots.push({
id: stateId,
kind,
name: (state as unknown as Record<string, unknown>).name as string | undefined,
})
}
if (snapshots.length === 0 && defaults) {
for (const d of defaults) {
if (!seen.has(d.id)) {
seen.add(d.id)
snapshots.push(d)
}
}
}
return snapshots
}
// ---------------------------------------------------------------------------
// Timeline Planning
// ---------------------------------------------------------------------------
/**
* Determine the timeline sampling plan.
*
* V1 is static-only. If any clause references a non-static timeline, a
* diagnostic is emitted and the timeline mode falls back to 'static'.
*/
function planTimeline(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaultMode: string | undefined,
diagnostics: DiagnosticCollector
): { mode: string; samples?: number[] } {
const seenModes = new Set<string>()
let hasNonStatic = false
for (let i = 0; i < executionIr.clauseCount; i++) {
const timelineIndex = executionIr.clauseTimeline[i]
const timelineId = context.timelineIds[timelineIndex]
if (!timelineId) continue
const timeline = semanticIr.timelines.get(timelineId)
if (!timeline) continue
const mode = timeline.mode ?? 'static'
seenModes.add(mode)
if (mode !== 'static') {
hasNonStatic = true
}
}
if (hasNonStatic) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
severity: 'warning',
category: 'extraction-error',
message:
'Non-static timelines detected; V1 falls back to static extraction. ' +
`Modes seen: ${Array.from(seenModes).join(', ')}`,
})
}
// For V1 we always return static regardless of what the clauses asked for.
return { mode: 'static' }
}