v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

This commit is contained in:
John Dvorak
2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
{
"name": "imhotep-extractor",
"version": "1.0.0",
"type": "module",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/anomalyco/imhotep.git"
},
"engines": {
"node": ">=18.0.0"
},
"main": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"scripts": {
"build": "tsc",
"test": "node --test dist/**/*.test.js"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
},
"dependencies": {
"imhotep-core": "^1.0.0"
}
}
+144
View File
@@ -0,0 +1,144 @@
/**
* Plan deduplication and batching.
*
* Multiple environment cases, state snapshots, or timeline modes may produce
* identical extraction needs. This module collapses duplicate plans and groups
* compatible requests into batches so the runtime can issue bulk calls.
*/
import type { Environment, StateSnapshot, TimelineSnapshot } from 'imhotep-core'
import type { RequiredFacts, Diagnostic } from './requirements.js'
import type { SelectorPlan } from './selector.js'
// ---------------------------------------------------------------------------
// Extraction Request Shape
// ---------------------------------------------------------------------------
/**
* A single unit of work for the extractor runtime.
*
* One request corresponds to one env case + one state plan + one timeline plan.
* The runtime may still merge multiple requests into a single browser session
* if their env and subject sets are compatible.
*/
export interface ExtractionRequest {
/** Unique id for this request. */
requestId: string
/** Environment case id. */
envCaseId: string
/** Environment parameters (viewport, colorScheme, etc). */
env: Partial<Environment>
/** State snapshots to capture under this environment. */
stateSnapshots: Array<{ id: string; kind: string; name?: string }>
/** Timeline sampling plan. */
timeline: { mode: string; samples?: number[] }
/** Subjects whose selectors must be resolved. */
subjects: SelectorPlan[]
/** Union of all facts needed by active clauses in this context. */
requiredFacts: RequiredFacts
/** Diagnostics accumulated while building this request. */
diagnostics: Diagnostic[]
}
// ---------------------------------------------------------------------------
// Deduplication
// ---------------------------------------------------------------------------
/**
* Remove duplicate extraction requests.
*
* Two requests are duplicates when their env, state snapshot ids, timeline mode,
* subject selectors, and required facts are identical. The first requestId is kept.
*/
export function deduplicateRequests(requests: ExtractionRequest[]): ExtractionRequest[] {
const seen = new Map<string, ExtractionRequest>()
const deduped: ExtractionRequest[] = []
for (const req of requests) {
const key = requestKey(req)
if (seen.has(key)) continue
seen.set(key, req)
deduped.push(req)
}
return deduped
}
/** Build a deterministic string key for an extraction request. */
function requestKey(req: ExtractionRequest): string {
return JSON.stringify({
env: req.env,
stateSnapshotIds: req.stateSnapshots.map((s) => s.id).sort(),
timelineMode: req.timeline.mode,
subjectSelectors: req.subjects.map((s) => s.selector).sort(),
requiredFacts: serializeRequiredFacts(req.requiredFacts),
})
}
/** Serialize RequiredFacts into a plain, sortable object. */
function serializeRequiredFacts(facts: RequiredFacts): Record<string, unknown> {
return {
geometry: facts.geometry,
fragments: facts.fragments,
styles: Array.from(facts.styles).sort(),
topology: Array.from(facts.topology).sort(),
text: facts.text,
scroll: facts.scroll,
clipping: facts.clipping,
paint: facts.paint,
visibility: facts.visibility,
transforms: facts.transforms,
}
}
// ---------------------------------------------------------------------------
// Batching
// ---------------------------------------------------------------------------
/**
* A batch is a group of extraction requests that can share a single browser
* session because their environments are compatible.
*
* For V1 the batching rule is simple: requests with the exact same env
* (viewport, colorScheme, pointer, etc) can be batched.
*/
export interface ExtractionBatch {
/** Id for the batch. */
batchId: string
/** Shared environment for every request in the batch. */
env: Partial<Environment>
/** Requests grouped into this batch. */
requests: ExtractionRequest[]
}
/**
* Group deduplicated requests into batches by environment compatibility.
*
* Requests with identical env objects end up in the same batch.
*/
export function batchRequests(requests: ExtractionRequest[]): ExtractionBatch[] {
const groups = new Map<string, ExtractionRequest[]>()
for (const req of requests) {
const envKey = JSON.stringify(req.env)
const group = groups.get(envKey)
if (group) {
group.push(req)
} else {
groups.set(envKey, [req])
}
}
let batchCounter = 0
const batches: ExtractionBatch[] = []
for (const [envKey, group] of groups) {
batches.push({
batchId: `batch_${++batchCounter}`,
env: JSON.parse(envKey) as Partial<Environment>,
requests: group,
})
}
return batches
}
+43
View File
@@ -0,0 +1,43 @@
/**
* imhotep-extractor — Extraction planning system.
*
* Compiles execution IR into precise browser fact requirements.
*
* Public surface:
* - ExtractionPlanner (planner.ts)
* - FactRequirementRegistry (requirements.ts)
* - planSelectorResolution (selector.ts)
* - deduplicateRequests, batchRequests (batching.ts)
*/
export {
ExtractionPlanner,
type ExecutionContext,
type PlannerOptions,
type ExtractionPlan,
} from './planner.js'
export {
FactRequirementRegistry,
analyzeRequiredFacts,
createEmptyRequiredFacts,
DiagnosticCollector,
FACT_FAMILY,
type FactRequirement,
type RequiredFacts,
type FactFamily,
type Diagnostic,
} from './requirements.js'
export {
planSelectorResolution,
type SelectorPlan,
type SelectorResolutionPlan,
} from './selector.js'
export {
deduplicateRequests,
batchRequests,
type ExtractionRequest,
type ExtractionBatch,
} from './batching.js'
@@ -0,0 +1,581 @@
/**
* Unit tests for the extraction planning system.
*
* Covers:
* - Fact requirement analysis and registry
* - Plan deduplication and batching
* - Environment matrix expansion
* - Unsupported fact diagnostics
*/
import { describe, it } from 'node:test'
import assert from 'node:assert'
import {
FactRequirementRegistry,
analyzeRequiredFacts,
createEmptyRequiredFacts,
DiagnosticCollector,
FACT_FAMILY,
} from './requirements.js'
import { deduplicateRequests, batchRequests } from './batching.js'
import type { ExtractionRequest } from './batching.js'
import { ExtractionPlanner } from './planner.js'
import type { ExecutionContext } from './planner.js'
import type { ExecutionIr, SemanticIr } from 'imhotep-core'
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Build a minimal ExecutionIr with the given clause types. */
function makeExecutionIr(clauseTypes: number[]): ExecutionIr {
const count = clauseTypes.length
return {
clauseCount: count,
clauseType: new Uint16Array(clauseTypes),
clauseSubject: new Uint32Array(count),
clauseReference: new Uint32Array(count),
clauseFrame: new Uint32Array(count),
clauseState: new Uint32Array(count),
clauseTimeline: new Uint32Array(count),
clauseTolerance: new Uint32Array(count),
clauseEnvGuard: new Uint32Array(count),
clauseArg0: new Float64Array(count),
clauseArg1: new Float64Array(count),
clauseFlags: new Uint32Array(count),
clauseOrigin: new Uint32Array(count),
}
}
/** Build a minimal SemanticIr with the given envGuards and states. */
function makeSemanticIr(opts: {
envGuards?: Array<{ id: string; normalizedCases?: Array<Record<string, unknown>> }>
states?: Array<{ id: string; kind: string; name?: string }>
timelines?: Array<{ id: string; mode: string }>
subjects?: Array<{ id: string; selector: string; kind?: string }>
}): SemanticIr {
const ir: SemanticIr = {
subjects: new Map(),
frames: new Map(),
states: new Map(),
timelines: new Map(),
tolerances: new Map(),
envGuards: new Map(),
clauses: new Map(),
groups: new Map(),
diagnosticMetadata: new Map(),
}
for (const s of opts.subjects ?? []) {
ir.subjects.set(s.id, s as unknown as import('imhotep-core').SemanticSubject)
}
for (const g of opts.envGuards ?? []) {
ir.envGuards.set(
g.id,
g as unknown as import('imhotep-core').SemanticEnvGuard
)
}
for (const s of opts.states ?? []) {
ir.states.set(s.id, s as unknown as import('imhotep-core').SemanticState)
}
for (const t of opts.timelines ?? []) {
ir.timelines.set(t.id, t as unknown as import('imhotep-core').SemanticTimeline)
}
return ir
}
/** Build an ExecutionContext around the given IRs. */
function makeExecutionContext(
executionIr: ExecutionIr,
semanticIr: SemanticIr,
indexMaps?: Partial<{
subjectIds: string[]
frameIds: string[]
stateIds: string[]
timelineIds: string[]
envGuardIds: string[]
toleranceIds: string[]
}>
): ExecutionContext {
const count = executionIr.clauseCount
return {
executionIr,
semanticIr,
subjectIds: indexMaps?.subjectIds ?? Array(count).fill(''),
frameIds: indexMaps?.frameIds ?? Array(count).fill(''),
stateIds: indexMaps?.stateIds ?? Array(count).fill(''),
timelineIds: indexMaps?.timelineIds ?? Array(count).fill(''),
envGuardIds: indexMaps?.envGuardIds ?? Array(count).fill(''),
toleranceIds: indexMaps?.toleranceIds ?? Array(count).fill(''),
}
}
// ---------------------------------------------------------------------------
// Fact Requirement Analysis
// ---------------------------------------------------------------------------
describe('analyzeRequiredFacts', () => {
it('returns empty facts when there are no clauses', () => {
const ir = makeExecutionIr([])
const registry = new FactRequirementRegistry()
const diagnostics = new DiagnosticCollector()
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(facts.geometry, false)
assert.strictEqual(facts.text, false)
assert.strictEqual(diagnostics.diagnostics.length, 0)
})
it('resolves facts for a single registered clause type', () => {
const ir = makeExecutionIr([1])
const registry = new FactRequirementRegistry()
registry.register(1, [
{ family: FACT_FAMILY.GEOMETRY },
{ family: FACT_FAMILY.STYLES, properties: ['position'] },
])
const diagnostics = new DiagnosticCollector()
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(facts.geometry, true)
assert.strictEqual(facts.styles.has('position'), true)
assert.strictEqual(diagnostics.diagnostics.length, 0)
})
it('unions facts across multiple clauses', () => {
const ir = makeExecutionIr([1, 2])
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
registry.register(2, [
{ family: FACT_FAMILY.TEXT },
{ family: FACT_FAMILY.STYLES, properties: ['z-index'] },
])
const diagnostics = new DiagnosticCollector()
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(facts.geometry, true)
assert.strictEqual(facts.text, true)
assert.strictEqual(facts.styles.has('z-index'), true)
assert.strictEqual(diagnostics.diagnostics.length, 0)
})
it('deduplicates style properties across clauses', () => {
const ir = makeExecutionIr([1, 1])
const registry = new FactRequirementRegistry()
registry.register(1, [
{ family: FACT_FAMILY.STYLES, properties: ['position', 'overflow'] },
])
const diagnostics = new DiagnosticCollector()
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(facts.styles.size, 2)
assert.strictEqual(facts.styles.has('position'), true)
assert.strictEqual(facts.styles.has('overflow'), true)
})
it('emits a diagnostic for unsupported clause types', () => {
const ir = makeExecutionIr([99])
const registry = new FactRequirementRegistry()
const diagnostics = new DiagnosticCollector()
const facts = analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(facts.geometry, false)
assert.strictEqual(diagnostics.diagnostics.length, 1)
assert.strictEqual(
diagnostics.diagnostics[0].code,
'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
)
assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
})
it('collects multiple unsupported clause diagnostics', () => {
const ir = makeExecutionIr([99, 100])
const registry = new FactRequirementRegistry()
const diagnostics = new DiagnosticCollector()
analyzeRequiredFacts(ir, registry, diagnostics)
assert.strictEqual(diagnostics.diagnostics.length, 2)
assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
assert.strictEqual(diagnostics.diagnostics[1].clauseIndex, 1)
})
})
// ---------------------------------------------------------------------------
// Plan Deduplication
// ---------------------------------------------------------------------------
describe('deduplicateRequests', () => {
it('returns identical requests unchanged when there is only one', () => {
const req: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [{ id: 's1', kind: 'default' }],
timeline: { mode: 'static' },
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const result = deduplicateRequests([req])
assert.strictEqual(result.length, 1)
assert.strictEqual(result[0].requestId, 'r1')
})
it('removes exact duplicate requests', () => {
const base: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [{ id: 's1', kind: 'default' }],
timeline: { mode: 'static' },
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const dup: ExtractionRequest = {
...base,
requestId: 'r2',
envCaseId: 'e2',
}
const result = deduplicateRequests([base, dup])
assert.strictEqual(result.length, 1)
assert.strictEqual(result[0].requestId, 'r1')
})
it('keeps requests that differ in env', () => {
const r1: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [{ id: 's1', kind: 'default' }],
timeline: { mode: 'static' },
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const r2: ExtractionRequest = {
...r1,
requestId: 'r2',
envCaseId: 'e2',
env: { viewportWidth: 2000 },
}
const result = deduplicateRequests([r1, r2])
assert.strictEqual(result.length, 2)
})
it('keeps requests that differ in required facts', () => {
const r1: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [{ id: 's1', kind: 'default' }],
timeline: { mode: 'static' },
subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
requiredFacts: { ...createEmptyRequiredFacts(), geometry: true },
diagnostics: [],
}
const r2: ExtractionRequest = {
...r1,
requestId: 'r2',
requiredFacts: { ...createEmptyRequiredFacts(), text: true },
}
const result = deduplicateRequests([r1, r2])
assert.strictEqual(result.length, 2)
})
})
// ---------------------------------------------------------------------------
// Batching
// ---------------------------------------------------------------------------
describe('batchRequests', () => {
it('puts a single request into one batch', () => {
const req: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [],
timeline: { mode: 'static' },
subjects: [],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const batches = batchRequests([req])
assert.strictEqual(batches.length, 1)
assert.strictEqual(batches[0].requests.length, 1)
})
it('groups requests with identical env into one batch', () => {
const env = { viewportWidth: 1000, colorScheme: 'light' as const }
const r1: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env,
stateSnapshots: [],
timeline: { mode: 'static' },
subjects: [],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const r2: ExtractionRequest = {
requestId: 'r2',
envCaseId: 'e2',
env,
stateSnapshots: [{ id: 's2', kind: 'hover' }],
timeline: { mode: 'static' },
subjects: [],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const batches = batchRequests([r1, r2])
assert.strictEqual(batches.length, 1)
assert.strictEqual(batches[0].requests.length, 2)
})
it('splits requests with different env into separate batches', () => {
const r1: ExtractionRequest = {
requestId: 'r1',
envCaseId: 'e1',
env: { viewportWidth: 1000 },
stateSnapshots: [],
timeline: { mode: 'static' },
subjects: [],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const r2: ExtractionRequest = {
requestId: 'r2',
envCaseId: 'e2',
env: { viewportWidth: 2000 },
stateSnapshots: [],
timeline: { mode: 'static' },
subjects: [],
requiredFacts: createEmptyRequiredFacts(),
diagnostics: [],
}
const batches = batchRequests([r1, r2])
assert.strictEqual(batches.length, 2)
})
})
// ---------------------------------------------------------------------------
// ExtractionPlanner — Integration
// ---------------------------------------------------------------------------
describe('ExtractionPlanner', () => {
it('produces a single request with defaults when IR is empty', () => {
const registry = new FactRequirementRegistry()
const planner = new ExtractionPlanner({
factRegistry: registry,
defaultEnvironment: { viewportWidth: 1280, viewportHeight: 720 },
defaultStateSnapshots: [{ id: 'default', kind: 'default' }],
})
const executionIr = makeExecutionIr([])
const semanticIr = makeSemanticIr({})
const ctx = makeExecutionContext(executionIr, semanticIr)
const plan = planner.createPlan(ctx)
assert.strictEqual(plan.requests.length, 1)
assert.strictEqual(plan.requests[0].envCaseId, 'env_default')
assert.strictEqual(plan.requests[0].env.viewportWidth, 1280)
assert.strictEqual(plan.batches.length, 1)
assert.strictEqual(plan.diagnostics.length, 0)
})
it('expands environment matrix from envGuards', () => {
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
const planner = new ExtractionPlanner({
factRegistry: registry,
defaultEnvironment: { viewportWidth: 1280 },
})
const executionIr = makeExecutionIr([1])
// clause 0 references envGuard at index 0 = 'guard_1'
executionIr.clauseEnvGuard[0] = 0
const semanticIr = makeSemanticIr({
envGuards: [
{
id: 'guard_1',
normalizedCases: [
{ viewportWidth: 320, colorScheme: 'light' },
{ viewportWidth: 768, colorScheme: 'dark' },
],
},
],
})
const ctx = makeExecutionContext(executionIr, semanticIr, {
envGuardIds: ['guard_1'],
})
const plan = planner.createPlan(ctx)
assert.strictEqual(plan.requests.length, 2)
assert.strictEqual(plan.requests[0].env.viewportWidth, 320)
assert.strictEqual(plan.requests[1].env.viewportWidth, 768)
assert.strictEqual(plan.batches.length, 2)
})
it('deduplicates identical env cases from multiple guards', () => {
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
const planner = new ExtractionPlanner({
factRegistry: registry,
})
const executionIr = makeExecutionIr([1, 1])
executionIr.clauseEnvGuard[0] = 0
executionIr.clauseEnvGuard[1] = 1
const semanticIr = makeSemanticIr({
envGuards: [
{
id: 'guard_a',
normalizedCases: [{ viewportWidth: 1000 }],
},
{
id: 'guard_b',
normalizedCases: [{ viewportWidth: 1000 }],
},
],
})
const ctx = makeExecutionContext(executionIr, semanticIr, {
envGuardIds: ['guard_a', 'guard_b'],
})
const plan = planner.createPlan(ctx)
// Two env cases but identical, so deduplicated to 1 request
assert.strictEqual(plan.requests.length, 1)
assert.strictEqual(plan.batches.length, 1)
})
it('emits diagnostics for unsupported clause types', () => {
const registry = new FactRequirementRegistry()
const planner = new ExtractionPlanner({
factRegistry: registry,
defaultEnvironment: { viewportWidth: 1280 },
})
const executionIr = makeExecutionIr([42])
const semanticIr = makeSemanticIr({})
const ctx = makeExecutionContext(executionIr, semanticIr)
const plan = planner.createPlan(ctx)
assert.ok(
plan.diagnostics.some(
(d) => d.code === 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
)
)
})
it('plans state snapshots from semantic IR', () => {
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
const planner = new ExtractionPlanner({
factRegistry: registry,
})
const executionIr = makeExecutionIr([1])
executionIr.clauseState[0] = 0
const semanticIr = makeSemanticIr({
states: [
{ id: 'state_hover', kind: 'hover', name: 'hover' },
{ id: 'state_default', kind: 'default' },
],
})
const ctx = makeExecutionContext(executionIr, semanticIr, {
stateIds: ['state_hover'],
})
const plan = planner.createPlan(ctx)
assert.strictEqual(plan.requests[0].stateSnapshots.length, 1)
assert.strictEqual(plan.requests[0].stateSnapshots[0].kind, 'hover')
})
it('warns on non-static timelines but falls back to static', () => {
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
const planner = new ExtractionPlanner({
factRegistry: registry,
})
const executionIr = makeExecutionIr([1])
executionIr.clauseTimeline[0] = 0
const semanticIr = makeSemanticIr({
timelines: [{ id: 'tl_keyframes', mode: 'keyframes' }],
})
const ctx = makeExecutionContext(executionIr, semanticIr, {
timelineIds: ['tl_keyframes'],
})
const plan = planner.createPlan(ctx)
assert.ok(
plan.diagnostics.some(
(d) => d.code === 'IMH_EXTRACTOR_NON_STATIC_TIMELINE'
)
)
assert.strictEqual(plan.requests[0].timeline.mode, 'static')
})
it('includes selector plans from semantic IR subjects', () => {
const registry = new FactRequirementRegistry()
registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
const planner = new ExtractionPlanner({
factRegistry: registry,
})
const executionIr = makeExecutionIr([1])
const semanticIr = makeSemanticIr({
subjects: [
{ id: 'sub_1', selector: '.button', kind: 'element' },
{ id: 'sub_2', selector: '.modal', kind: 'element' },
],
})
const ctx = makeExecutionContext(executionIr, semanticIr)
const plan = planner.createPlan(ctx)
assert.strictEqual(plan.requests[0].subjects.length, 2)
assert.strictEqual(plan.requests[0].subjects[0].selector, '.button')
assert.strictEqual(plan.requests[0].subjects[1].selector, '.modal')
})
})
+415
View File
@@ -0,0 +1,415 @@
/**
* Extraction plan generation from execution IR.
*
* This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
* It consumes execution IR + semantic IR and produces deduplicated, batched
* extraction requests that tell the runtime exactly which browser facts to capture.
*/
import type {
ExecutionIr,
SemanticIr,
Environment,
StateSnapshot,
TimelineSnapshot,
} from 'imhotep-core'
import type {
FactRequirementRegistry,
RequiredFacts,
Diagnostic,
} from './requirements.js'
import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
import { planSelectorResolution } from './selector.js'
import type { ExtractionRequest, ExtractionBatch } from './batching.js'
import { deduplicateRequests, batchRequests } from './batching.js'
// ---------------------------------------------------------------------------
// Execution Context
// ---------------------------------------------------------------------------
/**
* Bundles the flattened execution IR with the lookup tables needed to map
* numeric indices back to semantic IR entities.
*
* The compiler (WBS 2) produces both IRs and these index-to-id arrays so
* the planner never has to guess insertion order.
*/
export interface ExecutionContext {
executionIr: ExecutionIr
semanticIr: SemanticIr
/** executionIr.clauseSubject[i] -> subject id */
subjectIds: string[]
/** executionIr.clauseFrame[i] -> frame id */
frameIds: string[]
/** executionIr.clauseState[i] -> state id */
stateIds: string[]
/** executionIr.clauseTimeline[i] -> timeline id */
timelineIds: string[]
/** executionIr.clauseEnvGuard[i] -> envGuard id */
envGuardIds: string[]
/** executionIr.clauseTolerance[i] -> tolerance id */
toleranceIds: string[]
}
// ---------------------------------------------------------------------------
// Planner Options
// ---------------------------------------------------------------------------
export interface PlannerOptions {
/** Registry that knows which facts each clause family needs. */
factRegistry: FactRequirementRegistry
/** Default environment when no envGuards are active. */
defaultEnvironment?: Partial<Environment>
/** Default state snapshots when no state refs are active. */
defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
/** Default timeline mode when no timeline refs are active. */
defaultTimelineMode?: string
}
// ---------------------------------------------------------------------------
// Extraction Plan Output
// ---------------------------------------------------------------------------
/**
* The final output of the planning phase.
*
* Contains every extraction request needed to satisfy the execution IR,
* grouped into environment-compatible batches, plus any diagnostics.
*/
export interface ExtractionPlan {
planId: string
requests: ExtractionRequest[]
batches: ExtractionBatch[]
diagnostics: Diagnostic[]
}
// ---------------------------------------------------------------------------
// Planner
// ---------------------------------------------------------------------------
let _planCounter = 0
export class ExtractionPlanner {
constructor(private options: PlannerOptions) {}
/**
* Build an extraction plan from an execution context.
*
* Steps:
* 1. Analyze required facts across all clauses.
* 2. Build selector resolution plan.
* 3. Expand environment matrix from envGuards.
* 4. Collect state snapshots.
* 5. Collect timeline plan.
* 6. Build one request per env case.
* 7. Deduplicate and batch.
* 8. Emit diagnostics for unsupported configurations.
*/
createPlan(context: ExecutionContext): ExtractionPlan {
const diagnostics = new DiagnosticCollector()
const { executionIr, semanticIr } = context
// Step 1: Union all required facts.
const requiredFacts = analyzeRequiredFacts(
executionIr,
this.options.factRegistry,
diagnostics
)
// Step 2: Selector resolution plan.
const selectorPlan = planSelectorResolution(semanticIr)
for (const d of selectorPlan.diagnostics) {
diagnostics.emit({
code: d.code,
severity: d.severity,
message: d.message,
category: 'resolution-error',
})
}
// Step 3: Environment matrix expansion.
const envCases = expandEnvironmentMatrix(
executionIr,
context,
semanticIr,
this.options.defaultEnvironment,
diagnostics
)
// Step 4: State snapshot planning.
const stateSnapshots = planStateSnapshots(
executionIr,
context,
semanticIr,
this.options.defaultStateSnapshots,
diagnostics
)
// Step 5: Timeline planning.
const timeline = planTimeline(
executionIr,
context,
semanticIr,
this.options.defaultTimelineMode,
diagnostics
)
// Step 6: Build one extraction request per env case.
const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
requestId: `req_${idx + 1}`,
envCaseId: envCase.id,
env: envCase.env,
stateSnapshots,
timeline,
subjects: selectorPlan.subjects,
requiredFacts,
diagnostics: [],
}))
// If there are no env cases, still produce one request with defaults.
if (requests.length === 0) {
requests.push({
requestId: 'req_default',
envCaseId: 'env_default',
env: this.options.defaultEnvironment ?? {},
stateSnapshots,
timeline,
subjects: selectorPlan.subjects,
requiredFacts,
diagnostics: [],
})
}
// Step 7: Deduplicate.
const deduped = deduplicateRequests(requests)
// Step 8: Batch by compatible environment.
const batches = batchRequests(deduped)
return {
planId: `plan_${++_planCounter}`,
requests: deduped,
batches,
diagnostics: diagnostics.diagnostics,
}
}
}
// ---------------------------------------------------------------------------
// Environment Matrix Expansion
// ---------------------------------------------------------------------------
interface EnvCaseEntry {
id: string
env: Partial<Environment>
}
/**
* Collect every environment case referenced by any clause in the execution IR.
*
* For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
* and unions all normalizedCases. If no envGuards are referenced, a single
* default env case is returned.
*/
function expandEnvironmentMatrix(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaultEnv: Partial<Environment> | undefined,
diagnostics: DiagnosticCollector
): EnvCaseEntry[] {
const seen = new Set<string>()
const cases: EnvCaseEntry[] = []
for (let i = 0; i < executionIr.clauseCount; i++) {
const guardIndex = executionIr.clauseEnvGuard[i]
const guardId = context.envGuardIds[guardIndex]
if (!guardId) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
severity: 'warning',
category: 'extraction-error',
message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
clauseIndex: i,
})
continue
}
const guard = semanticIr.envGuards.get(guardId)
if (!guard) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
severity: 'error',
category: 'resolution-error',
message: `EnvGuard ${guardId} not found in semantic IR`,
clauseIndex: i,
})
continue
}
// The semantic IR envGuard may have normalizedCases (from ir.ts) or
// a condition string (from index.ts). We handle both shapes.
const normalizedCases = (guard as unknown as Record<string, unknown>)
.normalizedCases as Array<Partial<Environment>> | undefined
if (normalizedCases && normalizedCases.length > 0) {
for (const env of normalizedCases) {
const key = JSON.stringify(env)
if (seen.has(key)) continue
seen.add(key)
cases.push({
id: `env_${cases.length + 1}`,
env,
})
}
}
}
if (cases.length === 0 && defaultEnv) {
cases.push({ id: 'env_default', env: defaultEnv })
}
return cases
}
// ---------------------------------------------------------------------------
// State Snapshot Planning
// ---------------------------------------------------------------------------
/**
* Collect all state snapshots referenced by clauses.
*
* V1 supports: default, hover, focus-visible.
* If a clause references an unsupported state kind, a diagnostic is emitted
* but the state is still included so the solver can decide what to do.
*/
function planStateSnapshots(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
diagnostics: DiagnosticCollector
): Array<{ id: string; kind: string; name?: string }> {
const seen = new Set<string>()
const snapshots: Array<{ id: string; kind: string; name?: string }> = []
const supportedStates = new Set([
'default',
'hover',
'focus',
'focusVisible',
'active',
'disabled',
'visited',
])
for (let i = 0; i < executionIr.clauseCount; i++) {
const stateIndex = executionIr.clauseState[i]
const stateId = context.stateIds[stateIndex]
if (!stateId) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_MISSING_STATE',
severity: 'warning',
category: 'extraction-error',
message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
clauseIndex: i,
})
continue
}
if (seen.has(stateId)) continue
seen.add(stateId)
const state = semanticIr.states.get(stateId)
if (!state) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
severity: 'error',
category: 'resolution-error',
message: `State ${stateId} not found in semantic IR`,
clauseIndex: i,
})
continue
}
const kind = state.kind ?? 'default'
if (!supportedStates.has(kind)) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
severity: 'warning',
category: 'extraction-error',
message: `State kind "${kind}" is not fully supported in V1`,
clauseIndex: i,
})
}
snapshots.push({
id: stateId,
kind,
name: (state as unknown as Record<string, unknown>).name as string | undefined,
})
}
if (snapshots.length === 0 && defaults) {
for (const d of defaults) {
if (!seen.has(d.id)) {
seen.add(d.id)
snapshots.push(d)
}
}
}
return snapshots
}
// ---------------------------------------------------------------------------
// Timeline Planning
// ---------------------------------------------------------------------------
/**
* Determine the timeline sampling plan.
*
* V1 is static-only. If any clause references a non-static timeline, a
* diagnostic is emitted and the timeline mode falls back to 'static'.
*/
function planTimeline(
executionIr: ExecutionIr,
context: ExecutionContext,
semanticIr: SemanticIr,
defaultMode: string | undefined,
diagnostics: DiagnosticCollector
): { mode: string; samples?: number[] } {
const seenModes = new Set<string>()
let hasNonStatic = false
for (let i = 0; i < executionIr.clauseCount; i++) {
const timelineIndex = executionIr.clauseTimeline[i]
const timelineId = context.timelineIds[timelineIndex]
if (!timelineId) continue
const timeline = semanticIr.timelines.get(timelineId)
if (!timeline) continue
const mode = timeline.mode ?? 'static'
seenModes.add(mode)
if (mode !== 'static') {
hasNonStatic = true
}
}
if (hasNonStatic) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
severity: 'warning',
category: 'extraction-error',
message:
'Non-static timelines detected; V1 falls back to static extraction. ' +
`Modes seen: ${Array.from(seenModes).join(', ')}`,
})
}
// For V1 we always return static regardless of what the clauses asked for.
return { mode: 'static' }
}
@@ -0,0 +1,203 @@
/**
* Fact requirement analysis and registry.
*
* Maps clause families to the browser facts they need.
* Analyzes execution IR to compute the union of required facts.
*/
import type { ExecutionIr } from 'imhotep-core'
// ---------------------------------------------------------------------------
// Fact Families
// ---------------------------------------------------------------------------
/** Well-known fact families the extractor can provide. */
export const FACT_FAMILY = {
GEOMETRY: 'geometry',
FRAGMENTS: 'fragments',
STYLES: 'styles',
TOPOLOGY: 'topology',
TEXT: 'text',
SCROLL: 'scroll',
CLIPPING: 'clipping',
PAINT: 'paint',
VISIBILITY: 'visibility',
TRANSFORMS: 'transforms',
} as const
export type FactFamily = (typeof FACT_FAMILY)[keyof typeof FACT_FAMILY]
// ---------------------------------------------------------------------------
// Requirement Descriptors
// ---------------------------------------------------------------------------
/** A single fact requirement for a clause family. */
export interface FactRequirement {
family: FactFamily
/** For style or topology families, which specific properties. */
properties?: string[]
}
/** Union of all required facts across an execution plan. */
export interface RequiredFacts {
geometry: boolean
fragments: boolean
styles: Set<string>
topology: Set<string>
text: boolean
scroll: boolean
clipping: boolean
paint: boolean
visibility: boolean
transforms: boolean
}
/** Create an empty RequiredFacts set. */
export function createEmptyRequiredFacts(): RequiredFacts {
return {
geometry: false,
fragments: false,
styles: new Set(),
topology: new Set(),
text: false,
scroll: false,
clipping: false,
paint: false,
visibility: false,
transforms: false,
}
}
// ---------------------------------------------------------------------------
// Diagnostic Shape
// ---------------------------------------------------------------------------
export interface Diagnostic {
code: string
severity: 'error' | 'warning' | 'info'
message: string
category?: string
clauseIndex?: number
clauseId?: string
}
/** Mutable collector for diagnostics emitted during planning. */
export class DiagnosticCollector {
diagnostics: Diagnostic[] = []
emit(d: Diagnostic): void {
this.diagnostics.push(d)
}
}
// ---------------------------------------------------------------------------
// Fact Requirement Registry
// ---------------------------------------------------------------------------
/**
* Registry that maps numeric clause types to their required facts.
*
* This is populated at system init time by registering each clause family
* (e.g. relation.leftOf, size.width, topology.clipChain).
*/
export class FactRequirementRegistry {
private entries = new Map<number, FactRequirement[]>()
/** Declare the facts needed by a given clause type id. */
register(clauseType: number, requirements: FactRequirement[]): void {
this.entries.set(clauseType, requirements)
}
/** Look up requirements for a clause type, or undefined if unsupported. */
resolve(clauseType: number): FactRequirement[] | undefined {
return this.entries.get(clauseType)
}
/** True if the clause type has been registered. */
has(clauseType: number): boolean {
return this.entries.has(clauseType)
}
}
// ---------------------------------------------------------------------------
// Fact Analysis
// ---------------------------------------------------------------------------
/**
* Walk every clause in the execution IR and union all required facts.
*
* If a clause type is not registered, an extraction-error diagnostic is
* emitted and the clause is skipped.
*/
export function analyzeRequiredFacts(
executionIr: ExecutionIr,
registry: FactRequirementRegistry,
diagnostics: DiagnosticCollector
): RequiredFacts {
const result = createEmptyRequiredFacts()
for (let i = 0; i < executionIr.clauseCount; i++) {
const clauseType = executionIr.clauseType[i]
const requirements = registry.resolve(clauseType)
if (!requirements) {
diagnostics.emit({
code: 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE',
severity: 'error',
category: 'extraction-error',
message: `Clause type ${clauseType} has no registered fact requirements`,
clauseIndex: i,
})
continue
}
for (const req of requirements) {
mergeRequirement(result, req)
}
}
return result
}
/** Merge a single FactRequirement into a RequiredFacts accumulator. */
function mergeRequirement(acc: RequiredFacts, req: FactRequirement): void {
switch (req.family) {
case FACT_FAMILY.GEOMETRY:
acc.geometry = true
break
case FACT_FAMILY.FRAGMENTS:
acc.fragments = true
break
case FACT_FAMILY.STYLES:
if (req.properties) {
for (const p of req.properties) acc.styles.add(p)
}
break
case FACT_FAMILY.TOPOLOGY:
if (req.properties) {
for (const p of req.properties) acc.topology.add(p)
}
break
case FACT_FAMILY.TEXT:
acc.text = true
break
case FACT_FAMILY.SCROLL:
acc.scroll = true
break
case FACT_FAMILY.CLIPPING:
acc.clipping = true
break
case FACT_FAMILY.PAINT:
acc.paint = true
break
case FACT_FAMILY.VISIBILITY:
acc.visibility = true
break
case FACT_FAMILY.TRANSFORMS:
acc.transforms = true
break
default:
// Unknown fact families are ignored; caller may choose to warn.
break
}
}
@@ -0,0 +1,85 @@
/**
* Selector resolution planning.
*
* Turns semantic IR subject definitions into a flat list of selector plans
* that the extractor runtime will resolve against the DOM.
*/
import type { SemanticIr } from 'imhotep-core'
interface SubjectDef {
id: string
selector: string
kind?: string
quantifier?: string
}
// ---------------------------------------------------------------------------
// Selector Plan Types
// ---------------------------------------------------------------------------
/** A planned selector resolution for a single subject. */
export interface SelectorPlan {
/** Stable subject id from semantic IR. */
id: string
/** CSS selector string to resolve. */
selector: string
/** Subject kind (element, pseudoElement, textRange, etc). */
kind: string
}
/** Result of planning selector resolution for an entire semantic IR. */
export interface SelectorResolutionPlan {
subjects: SelectorPlan[]
diagnostics: Array<{
code: string
severity: 'error' | 'warning'
message: string
subjectId?: string
}>
}
// ---------------------------------------------------------------------------
// Selector Resolution Planner
// ---------------------------------------------------------------------------
/**
* Build a selector resolution plan from semantic IR subjects.
*
* Every subject in the semantic IR becomes one SelectorPlan entry.
* Duplicate selectors are preserved with distinct ids so that the
* solver can still distinguish them by reference.
*/
export function planSelectorResolution(semanticIr: SemanticIr): SelectorResolutionPlan {
const subjects: SelectorPlan[] = []
const diagnostics: SelectorResolutionPlan['diagnostics'] = []
for (const [id, subject] of semanticIr.subjects) {
const validation = validateSubject(subject)
if (validation) {
diagnostics.push(validation)
continue
}
subjects.push({
id,
selector: subject.selector,
kind: subject.kind ?? 'element',
})
}
return { subjects, diagnostics }
}
/** Validate a single subject definition. Returns a diagnostic if invalid. */
function validateSubject(subject: SubjectDef): SelectorResolutionPlan['diagnostics'][number] | undefined {
if (!subject.selector || subject.selector.trim().length === 0) {
return {
code: 'IMH_EXTRACTOR_EMPTY_SELECTOR',
severity: 'error',
message: `Subject ${subject.id} has an empty selector`,
subjectId: subject.id,
}
}
return undefined
}
+13
View File
@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"paths": {},
"composite": false,
"noEmitOnError": false
},
"include": [
"src/**/*"
]
}