416 lines
12 KiB
TypeScript
416 lines
12 KiB
TypeScript
|
|
/**
|
||
|
|
* Extraction plan generation from execution IR.
|
||
|
|
*
|
||
|
|
* This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
|
||
|
|
* It consumes execution IR + semantic IR and produces deduplicated, batched
|
||
|
|
* extraction requests that tell the runtime exactly which browser facts to capture.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import type {
|
||
|
|
ExecutionIr,
|
||
|
|
SemanticIr,
|
||
|
|
Environment,
|
||
|
|
StateSnapshot,
|
||
|
|
TimelineSnapshot,
|
||
|
|
} from 'imhotep-core'
|
||
|
|
import type {
|
||
|
|
FactRequirementRegistry,
|
||
|
|
RequiredFacts,
|
||
|
|
Diagnostic,
|
||
|
|
} from './requirements.js'
|
||
|
|
import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
|
||
|
|
import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
|
||
|
|
import { planSelectorResolution } from './selector.js'
|
||
|
|
import type { ExtractionRequest, ExtractionBatch } from './batching.js'
|
||
|
|
import { deduplicateRequests, batchRequests } from './batching.js'
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Execution Context
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Bundles the flattened execution IR with the lookup tables needed to map
|
||
|
|
* numeric indices back to semantic IR entities.
|
||
|
|
*
|
||
|
|
* The compiler (WBS 2) produces both IRs and these index-to-id arrays so
|
||
|
|
* the planner never has to guess insertion order.
|
||
|
|
*/
|
||
|
|
export interface ExecutionContext {
|
||
|
|
executionIr: ExecutionIr
|
||
|
|
semanticIr: SemanticIr
|
||
|
|
/** executionIr.clauseSubject[i] -> subject id */
|
||
|
|
subjectIds: string[]
|
||
|
|
/** executionIr.clauseFrame[i] -> frame id */
|
||
|
|
frameIds: string[]
|
||
|
|
/** executionIr.clauseState[i] -> state id */
|
||
|
|
stateIds: string[]
|
||
|
|
/** executionIr.clauseTimeline[i] -> timeline id */
|
||
|
|
timelineIds: string[]
|
||
|
|
/** executionIr.clauseEnvGuard[i] -> envGuard id */
|
||
|
|
envGuardIds: string[]
|
||
|
|
/** executionIr.clauseTolerance[i] -> tolerance id */
|
||
|
|
toleranceIds: string[]
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Planner Options
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
export interface PlannerOptions {
|
||
|
|
/** Registry that knows which facts each clause family needs. */
|
||
|
|
factRegistry: FactRequirementRegistry
|
||
|
|
/** Default environment when no envGuards are active. */
|
||
|
|
defaultEnvironment?: Partial<Environment>
|
||
|
|
/** Default state snapshots when no state refs are active. */
|
||
|
|
defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
|
||
|
|
/** Default timeline mode when no timeline refs are active. */
|
||
|
|
defaultTimelineMode?: string
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Extraction Plan Output
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The final output of the planning phase.
|
||
|
|
*
|
||
|
|
* Contains every extraction request needed to satisfy the execution IR,
|
||
|
|
* grouped into environment-compatible batches, plus any diagnostics.
|
||
|
|
*/
|
||
|
|
export interface ExtractionPlan {
|
||
|
|
planId: string
|
||
|
|
requests: ExtractionRequest[]
|
||
|
|
batches: ExtractionBatch[]
|
||
|
|
diagnostics: Diagnostic[]
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Planner
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
let _planCounter = 0
|
||
|
|
|
||
|
|
export class ExtractionPlanner {
|
||
|
|
constructor(private options: PlannerOptions) {}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Build an extraction plan from an execution context.
|
||
|
|
*
|
||
|
|
* Steps:
|
||
|
|
* 1. Analyze required facts across all clauses.
|
||
|
|
* 2. Build selector resolution plan.
|
||
|
|
* 3. Expand environment matrix from envGuards.
|
||
|
|
* 4. Collect state snapshots.
|
||
|
|
* 5. Collect timeline plan.
|
||
|
|
* 6. Build one request per env case.
|
||
|
|
* 7. Deduplicate and batch.
|
||
|
|
* 8. Emit diagnostics for unsupported configurations.
|
||
|
|
*/
|
||
|
|
createPlan(context: ExecutionContext): ExtractionPlan {
|
||
|
|
const diagnostics = new DiagnosticCollector()
|
||
|
|
const { executionIr, semanticIr } = context
|
||
|
|
|
||
|
|
// Step 1: Union all required facts.
|
||
|
|
const requiredFacts = analyzeRequiredFacts(
|
||
|
|
executionIr,
|
||
|
|
this.options.factRegistry,
|
||
|
|
diagnostics
|
||
|
|
)
|
||
|
|
|
||
|
|
// Step 2: Selector resolution plan.
|
||
|
|
const selectorPlan = planSelectorResolution(semanticIr)
|
||
|
|
for (const d of selectorPlan.diagnostics) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: d.code,
|
||
|
|
severity: d.severity,
|
||
|
|
message: d.message,
|
||
|
|
category: 'resolution-error',
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 3: Environment matrix expansion.
|
||
|
|
const envCases = expandEnvironmentMatrix(
|
||
|
|
executionIr,
|
||
|
|
context,
|
||
|
|
semanticIr,
|
||
|
|
this.options.defaultEnvironment,
|
||
|
|
diagnostics
|
||
|
|
)
|
||
|
|
|
||
|
|
// Step 4: State snapshot planning.
|
||
|
|
const stateSnapshots = planStateSnapshots(
|
||
|
|
executionIr,
|
||
|
|
context,
|
||
|
|
semanticIr,
|
||
|
|
this.options.defaultStateSnapshots,
|
||
|
|
diagnostics
|
||
|
|
)
|
||
|
|
|
||
|
|
// Step 5: Timeline planning.
|
||
|
|
const timeline = planTimeline(
|
||
|
|
executionIr,
|
||
|
|
context,
|
||
|
|
semanticIr,
|
||
|
|
this.options.defaultTimelineMode,
|
||
|
|
diagnostics
|
||
|
|
)
|
||
|
|
|
||
|
|
// Step 6: Build one extraction request per env case.
|
||
|
|
const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
|
||
|
|
requestId: `req_${idx + 1}`,
|
||
|
|
envCaseId: envCase.id,
|
||
|
|
env: envCase.env,
|
||
|
|
stateSnapshots,
|
||
|
|
timeline,
|
||
|
|
subjects: selectorPlan.subjects,
|
||
|
|
requiredFacts,
|
||
|
|
diagnostics: [],
|
||
|
|
}))
|
||
|
|
|
||
|
|
// If there are no env cases, still produce one request with defaults.
|
||
|
|
if (requests.length === 0) {
|
||
|
|
requests.push({
|
||
|
|
requestId: 'req_default',
|
||
|
|
envCaseId: 'env_default',
|
||
|
|
env: this.options.defaultEnvironment ?? {},
|
||
|
|
stateSnapshots,
|
||
|
|
timeline,
|
||
|
|
subjects: selectorPlan.subjects,
|
||
|
|
requiredFacts,
|
||
|
|
diagnostics: [],
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 7: Deduplicate.
|
||
|
|
const deduped = deduplicateRequests(requests)
|
||
|
|
|
||
|
|
// Step 8: Batch by compatible environment.
|
||
|
|
const batches = batchRequests(deduped)
|
||
|
|
|
||
|
|
return {
|
||
|
|
planId: `plan_${++_planCounter}`,
|
||
|
|
requests: deduped,
|
||
|
|
batches,
|
||
|
|
diagnostics: diagnostics.diagnostics,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Environment Matrix Expansion
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
interface EnvCaseEntry {
|
||
|
|
id: string
|
||
|
|
env: Partial<Environment>
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Collect every environment case referenced by any clause in the execution IR.
|
||
|
|
*
|
||
|
|
* For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
|
||
|
|
* and unions all normalizedCases. If no envGuards are referenced, a single
|
||
|
|
* default env case is returned.
|
||
|
|
*/
|
||
|
|
function expandEnvironmentMatrix(
|
||
|
|
executionIr: ExecutionIr,
|
||
|
|
context: ExecutionContext,
|
||
|
|
semanticIr: SemanticIr,
|
||
|
|
defaultEnv: Partial<Environment> | undefined,
|
||
|
|
diagnostics: DiagnosticCollector
|
||
|
|
): EnvCaseEntry[] {
|
||
|
|
const seen = new Set<string>()
|
||
|
|
const cases: EnvCaseEntry[] = []
|
||
|
|
|
||
|
|
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||
|
|
const guardIndex = executionIr.clauseEnvGuard[i]
|
||
|
|
const guardId = context.envGuardIds[guardIndex]
|
||
|
|
if (!guardId) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
|
||
|
|
severity: 'warning',
|
||
|
|
category: 'extraction-error',
|
||
|
|
message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
|
||
|
|
clauseIndex: i,
|
||
|
|
})
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
const guard = semanticIr.envGuards.get(guardId)
|
||
|
|
if (!guard) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
|
||
|
|
severity: 'error',
|
||
|
|
category: 'resolution-error',
|
||
|
|
message: `EnvGuard ${guardId} not found in semantic IR`,
|
||
|
|
clauseIndex: i,
|
||
|
|
})
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
// The semantic IR envGuard may have normalizedCases (from ir.ts) or
|
||
|
|
// a condition string (from index.ts). We handle both shapes.
|
||
|
|
const normalizedCases = (guard as unknown as Record<string, unknown>)
|
||
|
|
.normalizedCases as Array<Partial<Environment>> | undefined
|
||
|
|
|
||
|
|
if (normalizedCases && normalizedCases.length > 0) {
|
||
|
|
for (const env of normalizedCases) {
|
||
|
|
const key = JSON.stringify(env)
|
||
|
|
if (seen.has(key)) continue
|
||
|
|
seen.add(key)
|
||
|
|
cases.push({
|
||
|
|
id: `env_${cases.length + 1}`,
|
||
|
|
env,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (cases.length === 0 && defaultEnv) {
|
||
|
|
cases.push({ id: 'env_default', env: defaultEnv })
|
||
|
|
}
|
||
|
|
|
||
|
|
return cases
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// State Snapshot Planning
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Collect all state snapshots referenced by clauses.
|
||
|
|
*
|
||
|
|
* V1 supports: default, hover, focus-visible.
|
||
|
|
* If a clause references an unsupported state kind, a diagnostic is emitted
|
||
|
|
* but the state is still included so the solver can decide what to do.
|
||
|
|
*/
|
||
|
|
function planStateSnapshots(
|
||
|
|
executionIr: ExecutionIr,
|
||
|
|
context: ExecutionContext,
|
||
|
|
semanticIr: SemanticIr,
|
||
|
|
defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
|
||
|
|
diagnostics: DiagnosticCollector
|
||
|
|
): Array<{ id: string; kind: string; name?: string }> {
|
||
|
|
const seen = new Set<string>()
|
||
|
|
const snapshots: Array<{ id: string; kind: string; name?: string }> = []
|
||
|
|
|
||
|
|
const supportedStates = new Set([
|
||
|
|
'default',
|
||
|
|
'hover',
|
||
|
|
'focus',
|
||
|
|
'focusVisible',
|
||
|
|
'active',
|
||
|
|
'disabled',
|
||
|
|
'visited',
|
||
|
|
])
|
||
|
|
|
||
|
|
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||
|
|
const stateIndex = executionIr.clauseState[i]
|
||
|
|
const stateId = context.stateIds[stateIndex]
|
||
|
|
if (!stateId) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_MISSING_STATE',
|
||
|
|
severity: 'warning',
|
||
|
|
category: 'extraction-error',
|
||
|
|
message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
|
||
|
|
clauseIndex: i,
|
||
|
|
})
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
if (seen.has(stateId)) continue
|
||
|
|
seen.add(stateId)
|
||
|
|
|
||
|
|
const state = semanticIr.states.get(stateId)
|
||
|
|
if (!state) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
|
||
|
|
severity: 'error',
|
||
|
|
category: 'resolution-error',
|
||
|
|
message: `State ${stateId} not found in semantic IR`,
|
||
|
|
clauseIndex: i,
|
||
|
|
})
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
const kind = state.kind ?? 'default'
|
||
|
|
if (!supportedStates.has(kind)) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
|
||
|
|
severity: 'warning',
|
||
|
|
category: 'extraction-error',
|
||
|
|
message: `State kind "${kind}" is not fully supported in V1`,
|
||
|
|
clauseIndex: i,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
snapshots.push({
|
||
|
|
id: stateId,
|
||
|
|
kind,
|
||
|
|
name: (state as unknown as Record<string, unknown>).name as string | undefined,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
if (snapshots.length === 0 && defaults) {
|
||
|
|
for (const d of defaults) {
|
||
|
|
if (!seen.has(d.id)) {
|
||
|
|
seen.add(d.id)
|
||
|
|
snapshots.push(d)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return snapshots
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Timeline Planning
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Determine the timeline sampling plan.
|
||
|
|
*
|
||
|
|
* V1 is static-only. If any clause references a non-static timeline, a
|
||
|
|
* diagnostic is emitted and the timeline mode falls back to 'static'.
|
||
|
|
*/
|
||
|
|
function planTimeline(
|
||
|
|
executionIr: ExecutionIr,
|
||
|
|
context: ExecutionContext,
|
||
|
|
semanticIr: SemanticIr,
|
||
|
|
defaultMode: string | undefined,
|
||
|
|
diagnostics: DiagnosticCollector
|
||
|
|
): { mode: string; samples?: number[] } {
|
||
|
|
const seenModes = new Set<string>()
|
||
|
|
let hasNonStatic = false
|
||
|
|
|
||
|
|
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||
|
|
const timelineIndex = executionIr.clauseTimeline[i]
|
||
|
|
const timelineId = context.timelineIds[timelineIndex]
|
||
|
|
if (!timelineId) continue
|
||
|
|
|
||
|
|
const timeline = semanticIr.timelines.get(timelineId)
|
||
|
|
if (!timeline) continue
|
||
|
|
|
||
|
|
const mode = timeline.mode ?? 'static'
|
||
|
|
seenModes.add(mode)
|
||
|
|
|
||
|
|
if (mode !== 'static') {
|
||
|
|
hasNonStatic = true
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (hasNonStatic) {
|
||
|
|
diagnostics.emit({
|
||
|
|
code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
|
||
|
|
severity: 'warning',
|
||
|
|
category: 'extraction-error',
|
||
|
|
message:
|
||
|
|
'Non-static timelines detected; V1 falls back to static extraction. ' +
|
||
|
|
`Modes seen: ${Array.from(seenModes).join(', ')}`,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// For V1 we always return static regardless of what the clauses asked for.
|
||
|
|
return { mode: 'static' }
|
||
|
|
}
|