v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* Plan deduplication and batching.
|
||||
*
|
||||
* Multiple environment cases, state snapshots, or timeline modes may produce
|
||||
* identical extraction needs. This module collapses duplicate plans and groups
|
||||
* compatible requests into batches so the runtime can issue bulk calls.
|
||||
*/
|
||||
|
||||
import type { Environment, StateSnapshot, TimelineSnapshot } from 'imhotep-core'
|
||||
import type { RequiredFacts, Diagnostic } from './requirements.js'
|
||||
import type { SelectorPlan } from './selector.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Extraction Request Shape
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A single unit of work for the extractor runtime.
|
||||
*
|
||||
* One request corresponds to one env case + one state plan + one timeline plan.
|
||||
* The runtime may still merge multiple requests into a single browser session
|
||||
* if their env and subject sets are compatible.
|
||||
*/
|
||||
export interface ExtractionRequest {
|
||||
/** Unique id for this request. */
|
||||
requestId: string
|
||||
/** Environment case id. */
|
||||
envCaseId: string
|
||||
/** Environment parameters (viewport, colorScheme, etc). */
|
||||
env: Partial<Environment>
|
||||
/** State snapshots to capture under this environment. */
|
||||
stateSnapshots: Array<{ id: string; kind: string; name?: string }>
|
||||
/** Timeline sampling plan. */
|
||||
timeline: { mode: string; samples?: number[] }
|
||||
/** Subjects whose selectors must be resolved. */
|
||||
subjects: SelectorPlan[]
|
||||
/** Union of all facts needed by active clauses in this context. */
|
||||
requiredFacts: RequiredFacts
|
||||
/** Diagnostics accumulated while building this request. */
|
||||
diagnostics: Diagnostic[]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Deduplication
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Remove duplicate extraction requests.
|
||||
*
|
||||
* Two requests are duplicates when their env, state snapshot ids, timeline mode,
|
||||
* subject selectors, and required facts are identical. The first requestId is kept.
|
||||
*/
|
||||
export function deduplicateRequests(requests: ExtractionRequest[]): ExtractionRequest[] {
|
||||
const seen = new Map<string, ExtractionRequest>()
|
||||
const deduped: ExtractionRequest[] = []
|
||||
|
||||
for (const req of requests) {
|
||||
const key = requestKey(req)
|
||||
if (seen.has(key)) continue
|
||||
seen.set(key, req)
|
||||
deduped.push(req)
|
||||
}
|
||||
|
||||
return deduped
|
||||
}
|
||||
|
||||
/** Build a deterministic string key for an extraction request. */
|
||||
function requestKey(req: ExtractionRequest): string {
|
||||
return JSON.stringify({
|
||||
env: req.env,
|
||||
stateSnapshotIds: req.stateSnapshots.map((s) => s.id).sort(),
|
||||
timelineMode: req.timeline.mode,
|
||||
subjectSelectors: req.subjects.map((s) => s.selector).sort(),
|
||||
requiredFacts: serializeRequiredFacts(req.requiredFacts),
|
||||
})
|
||||
}
|
||||
|
||||
/** Serialize RequiredFacts into a plain, sortable object. */
|
||||
function serializeRequiredFacts(facts: RequiredFacts): Record<string, unknown> {
|
||||
return {
|
||||
geometry: facts.geometry,
|
||||
fragments: facts.fragments,
|
||||
styles: Array.from(facts.styles).sort(),
|
||||
topology: Array.from(facts.topology).sort(),
|
||||
text: facts.text,
|
||||
scroll: facts.scroll,
|
||||
clipping: facts.clipping,
|
||||
paint: facts.paint,
|
||||
visibility: facts.visibility,
|
||||
transforms: facts.transforms,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Batching
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A batch is a group of extraction requests that can share a single browser
|
||||
* session because their environments are compatible.
|
||||
*
|
||||
* For V1 the batching rule is simple: requests with the exact same env
|
||||
* (viewport, colorScheme, pointer, etc) can be batched.
|
||||
*/
|
||||
export interface ExtractionBatch {
|
||||
/** Id for the batch. */
|
||||
batchId: string
|
||||
/** Shared environment for every request in the batch. */
|
||||
env: Partial<Environment>
|
||||
/** Requests grouped into this batch. */
|
||||
requests: ExtractionRequest[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Group deduplicated requests into batches by environment compatibility.
|
||||
*
|
||||
* Requests with identical env objects end up in the same batch.
|
||||
*/
|
||||
export function batchRequests(requests: ExtractionRequest[]): ExtractionBatch[] {
|
||||
const groups = new Map<string, ExtractionRequest[]>()
|
||||
|
||||
for (const req of requests) {
|
||||
const envKey = JSON.stringify(req.env)
|
||||
const group = groups.get(envKey)
|
||||
if (group) {
|
||||
group.push(req)
|
||||
} else {
|
||||
groups.set(envKey, [req])
|
||||
}
|
||||
}
|
||||
|
||||
let batchCounter = 0
|
||||
const batches: ExtractionBatch[] = []
|
||||
|
||||
for (const [envKey, group] of groups) {
|
||||
batches.push({
|
||||
batchId: `batch_${++batchCounter}`,
|
||||
env: JSON.parse(envKey) as Partial<Environment>,
|
||||
requests: group,
|
||||
})
|
||||
}
|
||||
|
||||
return batches
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* imhotep-extractor — Extraction planning system.
|
||||
*
|
||||
* Compiles execution IR into precise browser fact requirements.
|
||||
*
|
||||
* Public surface:
|
||||
* - ExtractionPlanner (planner.ts)
|
||||
* - FactRequirementRegistry (requirements.ts)
|
||||
* - planSelectorResolution (selector.ts)
|
||||
* - deduplicateRequests, batchRequests (batching.ts)
|
||||
*/
|
||||
|
||||
export {
|
||||
ExtractionPlanner,
|
||||
type ExecutionContext,
|
||||
type PlannerOptions,
|
||||
type ExtractionPlan,
|
||||
} from './planner.js'
|
||||
|
||||
export {
|
||||
FactRequirementRegistry,
|
||||
analyzeRequiredFacts,
|
||||
createEmptyRequiredFacts,
|
||||
DiagnosticCollector,
|
||||
FACT_FAMILY,
|
||||
type FactRequirement,
|
||||
type RequiredFacts,
|
||||
type FactFamily,
|
||||
type Diagnostic,
|
||||
} from './requirements.js'
|
||||
|
||||
export {
|
||||
planSelectorResolution,
|
||||
type SelectorPlan,
|
||||
type SelectorResolutionPlan,
|
||||
} from './selector.js'
|
||||
|
||||
export {
|
||||
deduplicateRequests,
|
||||
batchRequests,
|
||||
type ExtractionRequest,
|
||||
type ExtractionBatch,
|
||||
} from './batching.js'
|
||||
@@ -0,0 +1,581 @@
|
||||
/**
|
||||
* Unit tests for the extraction planning system.
|
||||
*
|
||||
* Covers:
|
||||
* - Fact requirement analysis and registry
|
||||
* - Plan deduplication and batching
|
||||
* - Environment matrix expansion
|
||||
* - Unsupported fact diagnostics
|
||||
*/
|
||||
|
||||
import { describe, it } from 'node:test'
|
||||
import assert from 'node:assert'
|
||||
|
||||
import {
|
||||
FactRequirementRegistry,
|
||||
analyzeRequiredFacts,
|
||||
createEmptyRequiredFacts,
|
||||
DiagnosticCollector,
|
||||
FACT_FAMILY,
|
||||
} from './requirements.js'
|
||||
|
||||
import { deduplicateRequests, batchRequests } from './batching.js'
|
||||
import type { ExtractionRequest } from './batching.js'
|
||||
|
||||
import { ExtractionPlanner } from './planner.js'
|
||||
import type { ExecutionContext } from './planner.js'
|
||||
|
||||
import type { ExecutionIr, SemanticIr } from 'imhotep-core'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Build a minimal ExecutionIr with the given clause types. */
|
||||
function makeExecutionIr(clauseTypes: number[]): ExecutionIr {
|
||||
const count = clauseTypes.length
|
||||
return {
|
||||
clauseCount: count,
|
||||
clauseType: new Uint16Array(clauseTypes),
|
||||
clauseSubject: new Uint32Array(count),
|
||||
clauseReference: new Uint32Array(count),
|
||||
clauseFrame: new Uint32Array(count),
|
||||
clauseState: new Uint32Array(count),
|
||||
clauseTimeline: new Uint32Array(count),
|
||||
clauseTolerance: new Uint32Array(count),
|
||||
clauseEnvGuard: new Uint32Array(count),
|
||||
clauseArg0: new Float64Array(count),
|
||||
clauseArg1: new Float64Array(count),
|
||||
clauseFlags: new Uint32Array(count),
|
||||
clauseOrigin: new Uint32Array(count),
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a minimal SemanticIr with the given envGuards and states. */
|
||||
function makeSemanticIr(opts: {
|
||||
envGuards?: Array<{ id: string; normalizedCases?: Array<Record<string, unknown>> }>
|
||||
states?: Array<{ id: string; kind: string; name?: string }>
|
||||
timelines?: Array<{ id: string; mode: string }>
|
||||
subjects?: Array<{ id: string; selector: string; kind?: string }>
|
||||
}): SemanticIr {
|
||||
const ir: SemanticIr = {
|
||||
subjects: new Map(),
|
||||
frames: new Map(),
|
||||
states: new Map(),
|
||||
timelines: new Map(),
|
||||
tolerances: new Map(),
|
||||
envGuards: new Map(),
|
||||
clauses: new Map(),
|
||||
groups: new Map(),
|
||||
diagnosticMetadata: new Map(),
|
||||
}
|
||||
|
||||
for (const s of opts.subjects ?? []) {
|
||||
ir.subjects.set(s.id, s as unknown as import('imhotep-core').SemanticSubject)
|
||||
}
|
||||
|
||||
for (const g of opts.envGuards ?? []) {
|
||||
ir.envGuards.set(
|
||||
g.id,
|
||||
g as unknown as import('imhotep-core').SemanticEnvGuard
|
||||
)
|
||||
}
|
||||
|
||||
for (const s of opts.states ?? []) {
|
||||
ir.states.set(s.id, s as unknown as import('imhotep-core').SemanticState)
|
||||
}
|
||||
|
||||
for (const t of opts.timelines ?? []) {
|
||||
ir.timelines.set(t.id, t as unknown as import('imhotep-core').SemanticTimeline)
|
||||
}
|
||||
|
||||
return ir
|
||||
}
|
||||
|
||||
/** Build an ExecutionContext around the given IRs. */
|
||||
function makeExecutionContext(
|
||||
executionIr: ExecutionIr,
|
||||
semanticIr: SemanticIr,
|
||||
indexMaps?: Partial<{
|
||||
subjectIds: string[]
|
||||
frameIds: string[]
|
||||
stateIds: string[]
|
||||
timelineIds: string[]
|
||||
envGuardIds: string[]
|
||||
toleranceIds: string[]
|
||||
}>
|
||||
): ExecutionContext {
|
||||
const count = executionIr.clauseCount
|
||||
return {
|
||||
executionIr,
|
||||
semanticIr,
|
||||
subjectIds: indexMaps?.subjectIds ?? Array(count).fill(''),
|
||||
frameIds: indexMaps?.frameIds ?? Array(count).fill(''),
|
||||
stateIds: indexMaps?.stateIds ?? Array(count).fill(''),
|
||||
timelineIds: indexMaps?.timelineIds ?? Array(count).fill(''),
|
||||
envGuardIds: indexMaps?.envGuardIds ?? Array(count).fill(''),
|
||||
toleranceIds: indexMaps?.toleranceIds ?? Array(count).fill(''),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fact Requirement Analysis
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('analyzeRequiredFacts', () => {
|
||||
it('returns empty facts when there are no clauses', () => {
|
||||
const ir = makeExecutionIr([])
|
||||
const registry = new FactRequirementRegistry()
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(facts.geometry, false)
|
||||
assert.strictEqual(facts.text, false)
|
||||
assert.strictEqual(diagnostics.diagnostics.length, 0)
|
||||
})
|
||||
|
||||
it('resolves facts for a single registered clause type', () => {
|
||||
const ir = makeExecutionIr([1])
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [
|
||||
{ family: FACT_FAMILY.GEOMETRY },
|
||||
{ family: FACT_FAMILY.STYLES, properties: ['position'] },
|
||||
])
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(facts.geometry, true)
|
||||
assert.strictEqual(facts.styles.has('position'), true)
|
||||
assert.strictEqual(diagnostics.diagnostics.length, 0)
|
||||
})
|
||||
|
||||
it('unions facts across multiple clauses', () => {
|
||||
const ir = makeExecutionIr([1, 2])
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
registry.register(2, [
|
||||
{ family: FACT_FAMILY.TEXT },
|
||||
{ family: FACT_FAMILY.STYLES, properties: ['z-index'] },
|
||||
])
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(facts.geometry, true)
|
||||
assert.strictEqual(facts.text, true)
|
||||
assert.strictEqual(facts.styles.has('z-index'), true)
|
||||
assert.strictEqual(diagnostics.diagnostics.length, 0)
|
||||
})
|
||||
|
||||
it('deduplicates style properties across clauses', () => {
|
||||
const ir = makeExecutionIr([1, 1])
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [
|
||||
{ family: FACT_FAMILY.STYLES, properties: ['position', 'overflow'] },
|
||||
])
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(facts.styles.size, 2)
|
||||
assert.strictEqual(facts.styles.has('position'), true)
|
||||
assert.strictEqual(facts.styles.has('overflow'), true)
|
||||
})
|
||||
|
||||
it('emits a diagnostic for unsupported clause types', () => {
|
||||
const ir = makeExecutionIr([99])
|
||||
const registry = new FactRequirementRegistry()
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(facts.geometry, false)
|
||||
assert.strictEqual(diagnostics.diagnostics.length, 1)
|
||||
assert.strictEqual(
|
||||
diagnostics.diagnostics[0].code,
|
||||
'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
|
||||
)
|
||||
assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
|
||||
})
|
||||
|
||||
it('collects multiple unsupported clause diagnostics', () => {
|
||||
const ir = makeExecutionIr([99, 100])
|
||||
const registry = new FactRequirementRegistry()
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
|
||||
analyzeRequiredFacts(ir, registry, diagnostics)
|
||||
|
||||
assert.strictEqual(diagnostics.diagnostics.length, 2)
|
||||
assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
|
||||
assert.strictEqual(diagnostics.diagnostics[1].clauseIndex, 1)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Plan Deduplication
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('deduplicateRequests', () => {
|
||||
it('returns identical requests unchanged when there is only one', () => {
|
||||
const req: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [{ id: 's1', kind: 'default' }],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
|
||||
const result = deduplicateRequests([req])
|
||||
assert.strictEqual(result.length, 1)
|
||||
assert.strictEqual(result[0].requestId, 'r1')
|
||||
})
|
||||
|
||||
it('removes exact duplicate requests', () => {
|
||||
const base: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [{ id: 's1', kind: 'default' }],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
|
||||
const dup: ExtractionRequest = {
|
||||
...base,
|
||||
requestId: 'r2',
|
||||
envCaseId: 'e2',
|
||||
}
|
||||
|
||||
const result = deduplicateRequests([base, dup])
|
||||
assert.strictEqual(result.length, 1)
|
||||
assert.strictEqual(result[0].requestId, 'r1')
|
||||
})
|
||||
|
||||
it('keeps requests that differ in env', () => {
|
||||
const r1: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [{ id: 's1', kind: 'default' }],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
const r2: ExtractionRequest = {
|
||||
...r1,
|
||||
requestId: 'r2',
|
||||
envCaseId: 'e2',
|
||||
env: { viewportWidth: 2000 },
|
||||
}
|
||||
|
||||
const result = deduplicateRequests([r1, r2])
|
||||
assert.strictEqual(result.length, 2)
|
||||
})
|
||||
|
||||
it('keeps requests that differ in required facts', () => {
|
||||
const r1: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [{ id: 's1', kind: 'default' }],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
|
||||
requiredFacts: { ...createEmptyRequiredFacts(), geometry: true },
|
||||
diagnostics: [],
|
||||
}
|
||||
const r2: ExtractionRequest = {
|
||||
...r1,
|
||||
requestId: 'r2',
|
||||
requiredFacts: { ...createEmptyRequiredFacts(), text: true },
|
||||
}
|
||||
|
||||
const result = deduplicateRequests([r1, r2])
|
||||
assert.strictEqual(result.length, 2)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Batching
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('batchRequests', () => {
|
||||
it('puts a single request into one batch', () => {
|
||||
const req: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
|
||||
const batches = batchRequests([req])
|
||||
assert.strictEqual(batches.length, 1)
|
||||
assert.strictEqual(batches[0].requests.length, 1)
|
||||
})
|
||||
|
||||
it('groups requests with identical env into one batch', () => {
|
||||
const env = { viewportWidth: 1000, colorScheme: 'light' as const }
|
||||
const r1: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env,
|
||||
stateSnapshots: [],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
const r2: ExtractionRequest = {
|
||||
requestId: 'r2',
|
||||
envCaseId: 'e2',
|
||||
env,
|
||||
stateSnapshots: [{ id: 's2', kind: 'hover' }],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
|
||||
const batches = batchRequests([r1, r2])
|
||||
assert.strictEqual(batches.length, 1)
|
||||
assert.strictEqual(batches[0].requests.length, 2)
|
||||
})
|
||||
|
||||
it('splits requests with different env into separate batches', () => {
|
||||
const r1: ExtractionRequest = {
|
||||
requestId: 'r1',
|
||||
envCaseId: 'e1',
|
||||
env: { viewportWidth: 1000 },
|
||||
stateSnapshots: [],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
const r2: ExtractionRequest = {
|
||||
requestId: 'r2',
|
||||
envCaseId: 'e2',
|
||||
env: { viewportWidth: 2000 },
|
||||
stateSnapshots: [],
|
||||
timeline: { mode: 'static' },
|
||||
subjects: [],
|
||||
requiredFacts: createEmptyRequiredFacts(),
|
||||
diagnostics: [],
|
||||
}
|
||||
|
||||
const batches = batchRequests([r1, r2])
|
||||
assert.strictEqual(batches.length, 2)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ExtractionPlanner — Integration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('ExtractionPlanner', () => {
|
||||
it('produces a single request with defaults when IR is empty', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
defaultEnvironment: { viewportWidth: 1280, viewportHeight: 720 },
|
||||
defaultStateSnapshots: [{ id: 'default', kind: 'default' }],
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([])
|
||||
const semanticIr = makeSemanticIr({})
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr)
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.strictEqual(plan.requests.length, 1)
|
||||
assert.strictEqual(plan.requests[0].envCaseId, 'env_default')
|
||||
assert.strictEqual(plan.requests[0].env.viewportWidth, 1280)
|
||||
assert.strictEqual(plan.batches.length, 1)
|
||||
assert.strictEqual(plan.diagnostics.length, 0)
|
||||
})
|
||||
|
||||
it('expands environment matrix from envGuards', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
defaultEnvironment: { viewportWidth: 1280 },
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([1])
|
||||
// clause 0 references envGuard at index 0 = 'guard_1'
|
||||
executionIr.clauseEnvGuard[0] = 0
|
||||
|
||||
const semanticIr = makeSemanticIr({
|
||||
envGuards: [
|
||||
{
|
||||
id: 'guard_1',
|
||||
normalizedCases: [
|
||||
{ viewportWidth: 320, colorScheme: 'light' },
|
||||
{ viewportWidth: 768, colorScheme: 'dark' },
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr, {
|
||||
envGuardIds: ['guard_1'],
|
||||
})
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.strictEqual(plan.requests.length, 2)
|
||||
assert.strictEqual(plan.requests[0].env.viewportWidth, 320)
|
||||
assert.strictEqual(plan.requests[1].env.viewportWidth, 768)
|
||||
assert.strictEqual(plan.batches.length, 2)
|
||||
})
|
||||
|
||||
it('deduplicates identical env cases from multiple guards', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([1, 1])
|
||||
executionIr.clauseEnvGuard[0] = 0
|
||||
executionIr.clauseEnvGuard[1] = 1
|
||||
|
||||
const semanticIr = makeSemanticIr({
|
||||
envGuards: [
|
||||
{
|
||||
id: 'guard_a',
|
||||
normalizedCases: [{ viewportWidth: 1000 }],
|
||||
},
|
||||
{
|
||||
id: 'guard_b',
|
||||
normalizedCases: [{ viewportWidth: 1000 }],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr, {
|
||||
envGuardIds: ['guard_a', 'guard_b'],
|
||||
})
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
// Two env cases but identical, so deduplicated to 1 request
|
||||
assert.strictEqual(plan.requests.length, 1)
|
||||
assert.strictEqual(plan.batches.length, 1)
|
||||
})
|
||||
|
||||
it('emits diagnostics for unsupported clause types', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
defaultEnvironment: { viewportWidth: 1280 },
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([42])
|
||||
const semanticIr = makeSemanticIr({})
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr)
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.ok(
|
||||
plan.diagnostics.some(
|
||||
(d) => d.code === 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
it('plans state snapshots from semantic IR', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([1])
|
||||
executionIr.clauseState[0] = 0
|
||||
|
||||
const semanticIr = makeSemanticIr({
|
||||
states: [
|
||||
{ id: 'state_hover', kind: 'hover', name: 'hover' },
|
||||
{ id: 'state_default', kind: 'default' },
|
||||
],
|
||||
})
|
||||
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr, {
|
||||
stateIds: ['state_hover'],
|
||||
})
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.strictEqual(plan.requests[0].stateSnapshots.length, 1)
|
||||
assert.strictEqual(plan.requests[0].stateSnapshots[0].kind, 'hover')
|
||||
})
|
||||
|
||||
it('warns on non-static timelines but falls back to static', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([1])
|
||||
executionIr.clauseTimeline[0] = 0
|
||||
|
||||
const semanticIr = makeSemanticIr({
|
||||
timelines: [{ id: 'tl_keyframes', mode: 'keyframes' }],
|
||||
})
|
||||
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr, {
|
||||
timelineIds: ['tl_keyframes'],
|
||||
})
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.ok(
|
||||
plan.diagnostics.some(
|
||||
(d) => d.code === 'IMH_EXTRACTOR_NON_STATIC_TIMELINE'
|
||||
)
|
||||
)
|
||||
assert.strictEqual(plan.requests[0].timeline.mode, 'static')
|
||||
})
|
||||
|
||||
it('includes selector plans from semantic IR subjects', () => {
|
||||
const registry = new FactRequirementRegistry()
|
||||
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
|
||||
|
||||
const planner = new ExtractionPlanner({
|
||||
factRegistry: registry,
|
||||
})
|
||||
|
||||
const executionIr = makeExecutionIr([1])
|
||||
const semanticIr = makeSemanticIr({
|
||||
subjects: [
|
||||
{ id: 'sub_1', selector: '.button', kind: 'element' },
|
||||
{ id: 'sub_2', selector: '.modal', kind: 'element' },
|
||||
],
|
||||
})
|
||||
const ctx = makeExecutionContext(executionIr, semanticIr)
|
||||
|
||||
const plan = planner.createPlan(ctx)
|
||||
|
||||
assert.strictEqual(plan.requests[0].subjects.length, 2)
|
||||
assert.strictEqual(plan.requests[0].subjects[0].selector, '.button')
|
||||
assert.strictEqual(plan.requests[0].subjects[1].selector, '.modal')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,415 @@
|
||||
/**
|
||||
* Extraction plan generation from execution IR.
|
||||
*
|
||||
* This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
|
||||
* It consumes execution IR + semantic IR and produces deduplicated, batched
|
||||
* extraction requests that tell the runtime exactly which browser facts to capture.
|
||||
*/
|
||||
|
||||
import type {
|
||||
ExecutionIr,
|
||||
SemanticIr,
|
||||
Environment,
|
||||
StateSnapshot,
|
||||
TimelineSnapshot,
|
||||
} from 'imhotep-core'
|
||||
import type {
|
||||
FactRequirementRegistry,
|
||||
RequiredFacts,
|
||||
Diagnostic,
|
||||
} from './requirements.js'
|
||||
import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
|
||||
import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
|
||||
import { planSelectorResolution } from './selector.js'
|
||||
import type { ExtractionRequest, ExtractionBatch } from './batching.js'
|
||||
import { deduplicateRequests, batchRequests } from './batching.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Execution Context
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Bundles the flattened execution IR with the lookup tables needed to map
|
||||
* numeric indices back to semantic IR entities.
|
||||
*
|
||||
* The compiler (WBS 2) produces both IRs and these index-to-id arrays so
|
||||
* the planner never has to guess insertion order.
|
||||
*/
|
||||
export interface ExecutionContext {
|
||||
executionIr: ExecutionIr
|
||||
semanticIr: SemanticIr
|
||||
/** executionIr.clauseSubject[i] -> subject id */
|
||||
subjectIds: string[]
|
||||
/** executionIr.clauseFrame[i] -> frame id */
|
||||
frameIds: string[]
|
||||
/** executionIr.clauseState[i] -> state id */
|
||||
stateIds: string[]
|
||||
/** executionIr.clauseTimeline[i] -> timeline id */
|
||||
timelineIds: string[]
|
||||
/** executionIr.clauseEnvGuard[i] -> envGuard id */
|
||||
envGuardIds: string[]
|
||||
/** executionIr.clauseTolerance[i] -> tolerance id */
|
||||
toleranceIds: string[]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Planner Options
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PlannerOptions {
|
||||
/** Registry that knows which facts each clause family needs. */
|
||||
factRegistry: FactRequirementRegistry
|
||||
/** Default environment when no envGuards are active. */
|
||||
defaultEnvironment?: Partial<Environment>
|
||||
/** Default state snapshots when no state refs are active. */
|
||||
defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
|
||||
/** Default timeline mode when no timeline refs are active. */
|
||||
defaultTimelineMode?: string
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Extraction Plan Output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The final output of the planning phase.
|
||||
*
|
||||
* Contains every extraction request needed to satisfy the execution IR,
|
||||
* grouped into environment-compatible batches, plus any diagnostics.
|
||||
*/
|
||||
export interface ExtractionPlan {
|
||||
planId: string
|
||||
requests: ExtractionRequest[]
|
||||
batches: ExtractionBatch[]
|
||||
diagnostics: Diagnostic[]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Planner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let _planCounter = 0
|
||||
|
||||
export class ExtractionPlanner {
|
||||
constructor(private options: PlannerOptions) {}
|
||||
|
||||
/**
|
||||
* Build an extraction plan from an execution context.
|
||||
*
|
||||
* Steps:
|
||||
* 1. Analyze required facts across all clauses.
|
||||
* 2. Build selector resolution plan.
|
||||
* 3. Expand environment matrix from envGuards.
|
||||
* 4. Collect state snapshots.
|
||||
* 5. Collect timeline plan.
|
||||
* 6. Build one request per env case.
|
||||
* 7. Deduplicate and batch.
|
||||
* 8. Emit diagnostics for unsupported configurations.
|
||||
*/
|
||||
createPlan(context: ExecutionContext): ExtractionPlan {
|
||||
const diagnostics = new DiagnosticCollector()
|
||||
const { executionIr, semanticIr } = context
|
||||
|
||||
// Step 1: Union all required facts.
|
||||
const requiredFacts = analyzeRequiredFacts(
|
||||
executionIr,
|
||||
this.options.factRegistry,
|
||||
diagnostics
|
||||
)
|
||||
|
||||
// Step 2: Selector resolution plan.
|
||||
const selectorPlan = planSelectorResolution(semanticIr)
|
||||
for (const d of selectorPlan.diagnostics) {
|
||||
diagnostics.emit({
|
||||
code: d.code,
|
||||
severity: d.severity,
|
||||
message: d.message,
|
||||
category: 'resolution-error',
|
||||
})
|
||||
}
|
||||
|
||||
// Step 3: Environment matrix expansion.
|
||||
const envCases = expandEnvironmentMatrix(
|
||||
executionIr,
|
||||
context,
|
||||
semanticIr,
|
||||
this.options.defaultEnvironment,
|
||||
diagnostics
|
||||
)
|
||||
|
||||
// Step 4: State snapshot planning.
|
||||
const stateSnapshots = planStateSnapshots(
|
||||
executionIr,
|
||||
context,
|
||||
semanticIr,
|
||||
this.options.defaultStateSnapshots,
|
||||
diagnostics
|
||||
)
|
||||
|
||||
// Step 5: Timeline planning.
|
||||
const timeline = planTimeline(
|
||||
executionIr,
|
||||
context,
|
||||
semanticIr,
|
||||
this.options.defaultTimelineMode,
|
||||
diagnostics
|
||||
)
|
||||
|
||||
// Step 6: Build one extraction request per env case.
|
||||
const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
|
||||
requestId: `req_${idx + 1}`,
|
||||
envCaseId: envCase.id,
|
||||
env: envCase.env,
|
||||
stateSnapshots,
|
||||
timeline,
|
||||
subjects: selectorPlan.subjects,
|
||||
requiredFacts,
|
||||
diagnostics: [],
|
||||
}))
|
||||
|
||||
// If there are no env cases, still produce one request with defaults.
|
||||
if (requests.length === 0) {
|
||||
requests.push({
|
||||
requestId: 'req_default',
|
||||
envCaseId: 'env_default',
|
||||
env: this.options.defaultEnvironment ?? {},
|
||||
stateSnapshots,
|
||||
timeline,
|
||||
subjects: selectorPlan.subjects,
|
||||
requiredFacts,
|
||||
diagnostics: [],
|
||||
})
|
||||
}
|
||||
|
||||
// Step 7: Deduplicate.
|
||||
const deduped = deduplicateRequests(requests)
|
||||
|
||||
// Step 8: Batch by compatible environment.
|
||||
const batches = batchRequests(deduped)
|
||||
|
||||
return {
|
||||
planId: `plan_${++_planCounter}`,
|
||||
requests: deduped,
|
||||
batches,
|
||||
diagnostics: diagnostics.diagnostics,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Environment Matrix Expansion
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface EnvCaseEntry {
|
||||
id: string
|
||||
env: Partial<Environment>
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect every environment case referenced by any clause in the execution IR.
|
||||
*
|
||||
* For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
|
||||
* and unions all normalizedCases. If no envGuards are referenced, a single
|
||||
* default env case is returned.
|
||||
*/
|
||||
function expandEnvironmentMatrix(
|
||||
executionIr: ExecutionIr,
|
||||
context: ExecutionContext,
|
||||
semanticIr: SemanticIr,
|
||||
defaultEnv: Partial<Environment> | undefined,
|
||||
diagnostics: DiagnosticCollector
|
||||
): EnvCaseEntry[] {
|
||||
const seen = new Set<string>()
|
||||
const cases: EnvCaseEntry[] = []
|
||||
|
||||
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||||
const guardIndex = executionIr.clauseEnvGuard[i]
|
||||
const guardId = context.envGuardIds[guardIndex]
|
||||
if (!guardId) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
|
||||
severity: 'warning',
|
||||
category: 'extraction-error',
|
||||
message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
const guard = semanticIr.envGuards.get(guardId)
|
||||
if (!guard) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
|
||||
severity: 'error',
|
||||
category: 'resolution-error',
|
||||
message: `EnvGuard ${guardId} not found in semantic IR`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// The semantic IR envGuard may have normalizedCases (from ir.ts) or
|
||||
// a condition string (from index.ts). We handle both shapes.
|
||||
const normalizedCases = (guard as unknown as Record<string, unknown>)
|
||||
.normalizedCases as Array<Partial<Environment>> | undefined
|
||||
|
||||
if (normalizedCases && normalizedCases.length > 0) {
|
||||
for (const env of normalizedCases) {
|
||||
const key = JSON.stringify(env)
|
||||
if (seen.has(key)) continue
|
||||
seen.add(key)
|
||||
cases.push({
|
||||
id: `env_${cases.length + 1}`,
|
||||
env,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cases.length === 0 && defaultEnv) {
|
||||
cases.push({ id: 'env_default', env: defaultEnv })
|
||||
}
|
||||
|
||||
return cases
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// State Snapshot Planning
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Collect all state snapshots referenced by clauses.
|
||||
*
|
||||
* V1 supports: default, hover, focus-visible.
|
||||
* If a clause references an unsupported state kind, a diagnostic is emitted
|
||||
* but the state is still included so the solver can decide what to do.
|
||||
*/
|
||||
function planStateSnapshots(
|
||||
executionIr: ExecutionIr,
|
||||
context: ExecutionContext,
|
||||
semanticIr: SemanticIr,
|
||||
defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
|
||||
diagnostics: DiagnosticCollector
|
||||
): Array<{ id: string; kind: string; name?: string }> {
|
||||
const seen = new Set<string>()
|
||||
const snapshots: Array<{ id: string; kind: string; name?: string }> = []
|
||||
|
||||
const supportedStates = new Set([
|
||||
'default',
|
||||
'hover',
|
||||
'focus',
|
||||
'focusVisible',
|
||||
'active',
|
||||
'disabled',
|
||||
'visited',
|
||||
])
|
||||
|
||||
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||||
const stateIndex = executionIr.clauseState[i]
|
||||
const stateId = context.stateIds[stateIndex]
|
||||
if (!stateId) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_MISSING_STATE',
|
||||
severity: 'warning',
|
||||
category: 'extraction-error',
|
||||
message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if (seen.has(stateId)) continue
|
||||
seen.add(stateId)
|
||||
|
||||
const state = semanticIr.states.get(stateId)
|
||||
if (!state) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
|
||||
severity: 'error',
|
||||
category: 'resolution-error',
|
||||
message: `State ${stateId} not found in semantic IR`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
const kind = state.kind ?? 'default'
|
||||
if (!supportedStates.has(kind)) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
|
||||
severity: 'warning',
|
||||
category: 'extraction-error',
|
||||
message: `State kind "${kind}" is not fully supported in V1`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
}
|
||||
|
||||
snapshots.push({
|
||||
id: stateId,
|
||||
kind,
|
||||
name: (state as unknown as Record<string, unknown>).name as string | undefined,
|
||||
})
|
||||
}
|
||||
|
||||
if (snapshots.length === 0 && defaults) {
|
||||
for (const d of defaults) {
|
||||
if (!seen.has(d.id)) {
|
||||
seen.add(d.id)
|
||||
snapshots.push(d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return snapshots
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Timeline Planning
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Determine the timeline sampling plan.
|
||||
*
|
||||
* V1 is static-only. If any clause references a non-static timeline, a
|
||||
* diagnostic is emitted and the timeline mode falls back to 'static'.
|
||||
*/
|
||||
function planTimeline(
|
||||
executionIr: ExecutionIr,
|
||||
context: ExecutionContext,
|
||||
semanticIr: SemanticIr,
|
||||
defaultMode: string | undefined,
|
||||
diagnostics: DiagnosticCollector
|
||||
): { mode: string; samples?: number[] } {
|
||||
const seenModes = new Set<string>()
|
||||
let hasNonStatic = false
|
||||
|
||||
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||||
const timelineIndex = executionIr.clauseTimeline[i]
|
||||
const timelineId = context.timelineIds[timelineIndex]
|
||||
if (!timelineId) continue
|
||||
|
||||
const timeline = semanticIr.timelines.get(timelineId)
|
||||
if (!timeline) continue
|
||||
|
||||
const mode = timeline.mode ?? 'static'
|
||||
seenModes.add(mode)
|
||||
|
||||
if (mode !== 'static') {
|
||||
hasNonStatic = true
|
||||
}
|
||||
}
|
||||
|
||||
if (hasNonStatic) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
|
||||
severity: 'warning',
|
||||
category: 'extraction-error',
|
||||
message:
|
||||
'Non-static timelines detected; V1 falls back to static extraction. ' +
|
||||
`Modes seen: ${Array.from(seenModes).join(', ')}`,
|
||||
})
|
||||
}
|
||||
|
||||
// For V1 we always return static regardless of what the clauses asked for.
|
||||
return { mode: 'static' }
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
* Fact requirement analysis and registry.
|
||||
*
|
||||
* Maps clause families to the browser facts they need.
|
||||
* Analyzes execution IR to compute the union of required facts.
|
||||
*/
|
||||
|
||||
import type { ExecutionIr } from 'imhotep-core'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fact Families
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Well-known fact families the extractor can provide. */
|
||||
export const FACT_FAMILY = {
|
||||
GEOMETRY: 'geometry',
|
||||
FRAGMENTS: 'fragments',
|
||||
STYLES: 'styles',
|
||||
TOPOLOGY: 'topology',
|
||||
TEXT: 'text',
|
||||
SCROLL: 'scroll',
|
||||
CLIPPING: 'clipping',
|
||||
PAINT: 'paint',
|
||||
VISIBILITY: 'visibility',
|
||||
TRANSFORMS: 'transforms',
|
||||
} as const
|
||||
|
||||
export type FactFamily = (typeof FACT_FAMILY)[keyof typeof FACT_FAMILY]
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Requirement Descriptors
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A single fact requirement for a clause family. */
|
||||
export interface FactRequirement {
|
||||
family: FactFamily
|
||||
/** For style or topology families, which specific properties. */
|
||||
properties?: string[]
|
||||
}
|
||||
|
||||
/** Union of all required facts across an execution plan. */
|
||||
export interface RequiredFacts {
|
||||
geometry: boolean
|
||||
fragments: boolean
|
||||
styles: Set<string>
|
||||
topology: Set<string>
|
||||
text: boolean
|
||||
scroll: boolean
|
||||
clipping: boolean
|
||||
paint: boolean
|
||||
visibility: boolean
|
||||
transforms: boolean
|
||||
}
|
||||
|
||||
/** Create an empty RequiredFacts set. */
|
||||
export function createEmptyRequiredFacts(): RequiredFacts {
|
||||
return {
|
||||
geometry: false,
|
||||
fragments: false,
|
||||
styles: new Set(),
|
||||
topology: new Set(),
|
||||
text: false,
|
||||
scroll: false,
|
||||
clipping: false,
|
||||
paint: false,
|
||||
visibility: false,
|
||||
transforms: false,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Diagnostic Shape
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface Diagnostic {
|
||||
code: string
|
||||
severity: 'error' | 'warning' | 'info'
|
||||
message: string
|
||||
category?: string
|
||||
clauseIndex?: number
|
||||
clauseId?: string
|
||||
}
|
||||
|
||||
/** Mutable collector for diagnostics emitted during planning. */
|
||||
export class DiagnosticCollector {
|
||||
diagnostics: Diagnostic[] = []
|
||||
|
||||
emit(d: Diagnostic): void {
|
||||
this.diagnostics.push(d)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fact Requirement Registry
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Registry that maps numeric clause types to their required facts.
|
||||
*
|
||||
* This is populated at system init time by registering each clause family
|
||||
* (e.g. relation.leftOf, size.width, topology.clipChain).
|
||||
*/
|
||||
export class FactRequirementRegistry {
|
||||
private entries = new Map<number, FactRequirement[]>()
|
||||
|
||||
/** Declare the facts needed by a given clause type id. */
|
||||
register(clauseType: number, requirements: FactRequirement[]): void {
|
||||
this.entries.set(clauseType, requirements)
|
||||
}
|
||||
|
||||
/** Look up requirements for a clause type, or undefined if unsupported. */
|
||||
resolve(clauseType: number): FactRequirement[] | undefined {
|
||||
return this.entries.get(clauseType)
|
||||
}
|
||||
|
||||
/** True if the clause type has been registered. */
|
||||
has(clauseType: number): boolean {
|
||||
return this.entries.has(clauseType)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fact Analysis
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Walk every clause in the execution IR and union all required facts.
|
||||
*
|
||||
* If a clause type is not registered, an extraction-error diagnostic is
|
||||
* emitted and the clause is skipped.
|
||||
*/
|
||||
export function analyzeRequiredFacts(
|
||||
executionIr: ExecutionIr,
|
||||
registry: FactRequirementRegistry,
|
||||
diagnostics: DiagnosticCollector
|
||||
): RequiredFacts {
|
||||
const result = createEmptyRequiredFacts()
|
||||
|
||||
for (let i = 0; i < executionIr.clauseCount; i++) {
|
||||
const clauseType = executionIr.clauseType[i]
|
||||
const requirements = registry.resolve(clauseType)
|
||||
|
||||
if (!requirements) {
|
||||
diagnostics.emit({
|
||||
code: 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE',
|
||||
severity: 'error',
|
||||
category: 'extraction-error',
|
||||
message: `Clause type ${clauseType} has no registered fact requirements`,
|
||||
clauseIndex: i,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for (const req of requirements) {
|
||||
mergeRequirement(result, req)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/** Merge a single FactRequirement into a RequiredFacts accumulator. */
|
||||
function mergeRequirement(acc: RequiredFacts, req: FactRequirement): void {
|
||||
switch (req.family) {
|
||||
case FACT_FAMILY.GEOMETRY:
|
||||
acc.geometry = true
|
||||
break
|
||||
case FACT_FAMILY.FRAGMENTS:
|
||||
acc.fragments = true
|
||||
break
|
||||
case FACT_FAMILY.STYLES:
|
||||
if (req.properties) {
|
||||
for (const p of req.properties) acc.styles.add(p)
|
||||
}
|
||||
break
|
||||
case FACT_FAMILY.TOPOLOGY:
|
||||
if (req.properties) {
|
||||
for (const p of req.properties) acc.topology.add(p)
|
||||
}
|
||||
break
|
||||
case FACT_FAMILY.TEXT:
|
||||
acc.text = true
|
||||
break
|
||||
case FACT_FAMILY.SCROLL:
|
||||
acc.scroll = true
|
||||
break
|
||||
case FACT_FAMILY.CLIPPING:
|
||||
acc.clipping = true
|
||||
break
|
||||
case FACT_FAMILY.PAINT:
|
||||
acc.paint = true
|
||||
break
|
||||
case FACT_FAMILY.VISIBILITY:
|
||||
acc.visibility = true
|
||||
break
|
||||
case FACT_FAMILY.TRANSFORMS:
|
||||
acc.transforms = true
|
||||
break
|
||||
default:
|
||||
// Unknown fact families are ignored; caller may choose to warn.
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Selector resolution planning.
|
||||
*
|
||||
* Turns semantic IR subject definitions into a flat list of selector plans
|
||||
* that the extractor runtime will resolve against the DOM.
|
||||
*/
|
||||
|
||||
import type { SemanticIr } from 'imhotep-core'
|
||||
|
||||
interface SubjectDef {
|
||||
id: string
|
||||
selector: string
|
||||
kind?: string
|
||||
quantifier?: string
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Selector Plan Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A planned selector resolution for a single subject. */
|
||||
export interface SelectorPlan {
|
||||
/** Stable subject id from semantic IR. */
|
||||
id: string
|
||||
/** CSS selector string to resolve. */
|
||||
selector: string
|
||||
/** Subject kind (element, pseudoElement, textRange, etc). */
|
||||
kind: string
|
||||
}
|
||||
|
||||
/** Result of planning selector resolution for an entire semantic IR. */
|
||||
export interface SelectorResolutionPlan {
|
||||
subjects: SelectorPlan[]
|
||||
diagnostics: Array<{
|
||||
code: string
|
||||
severity: 'error' | 'warning'
|
||||
message: string
|
||||
subjectId?: string
|
||||
}>
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Selector Resolution Planner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build a selector resolution plan from semantic IR subjects.
|
||||
*
|
||||
* Every subject in the semantic IR becomes one SelectorPlan entry.
|
||||
* Duplicate selectors are preserved with distinct ids so that the
|
||||
* solver can still distinguish them by reference.
|
||||
*/
|
||||
export function planSelectorResolution(semanticIr: SemanticIr): SelectorResolutionPlan {
|
||||
const subjects: SelectorPlan[] = []
|
||||
const diagnostics: SelectorResolutionPlan['diagnostics'] = []
|
||||
|
||||
for (const [id, subject] of semanticIr.subjects) {
|
||||
const validation = validateSubject(subject)
|
||||
if (validation) {
|
||||
diagnostics.push(validation)
|
||||
continue
|
||||
}
|
||||
|
||||
subjects.push({
|
||||
id,
|
||||
selector: subject.selector,
|
||||
kind: subject.kind ?? 'element',
|
||||
})
|
||||
}
|
||||
|
||||
return { subjects, diagnostics }
|
||||
}
|
||||
|
||||
/** Validate a single subject definition. Returns a diagnostic if invalid. */
|
||||
function validateSubject(subject: SubjectDef): SelectorResolutionPlan['diagnostics'][number] | undefined {
|
||||
if (!subject.selector || subject.selector.trim().length === 0) {
|
||||
return {
|
||||
code: 'IMH_EXTRACTOR_EMPTY_SELECTOR',
|
||||
severity: 'error',
|
||||
message: `Subject ${subject.id} has an empty selector`,
|
||||
subjectId: subject.id,
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
Reference in New Issue
Block a user