v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
@@ -0,0 +1,31 @@
+{
+  "name": "imhotep-extractor",
+  "version": "1.0.0",
+  "type": "module",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/anomalyco/imhotep.git"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "test": "node --test dist/**/*.test.js"
+  },
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "dependencies": {
+    "imhotep-core": "^1.0.0"
+  }
+}
@@ -0,0 +1,144 @@
+/**
+ * Plan deduplication and batching.
+ *
+ * Multiple environment cases, state snapshots, or timeline modes may produce
+ * identical extraction needs. This module collapses duplicate plans and groups
+ * compatible requests into batches so the runtime can issue bulk calls.
+ */
+
+import type { Environment, StateSnapshot, TimelineSnapshot } from 'imhotep-core'
+import type { RequiredFacts, Diagnostic } from './requirements.js'
+import type { SelectorPlan } from './selector.js'
+
+// ---------------------------------------------------------------------------
+// Extraction Request Shape
+// ---------------------------------------------------------------------------
+
+/**
+ * A single unit of work for the extractor runtime.
+ *
+ * One request corresponds to one env case + one state plan + one timeline plan.
+ * The runtime may still merge multiple requests into a single browser session
+ * if their env and subject sets are compatible.
+ */
+export interface ExtractionRequest {
+  /** Unique id for this request. */
+  requestId: string
+  /** Environment case id. */
+  envCaseId: string
+  /** Environment parameters (viewport, colorScheme, etc). */
+  env: Partial<Environment>
+  /** State snapshots to capture under this environment. */
+  stateSnapshots: Array<{ id: string; kind: string; name?: string }>
+  /** Timeline sampling plan. */
+  timeline: { mode: string; samples?: number[] }
+  /** Subjects whose selectors must be resolved. */
+  subjects: SelectorPlan[]
+  /** Union of all facts needed by active clauses in this context. */
+  requiredFacts: RequiredFacts
+  /** Diagnostics accumulated while building this request. */
+  diagnostics: Diagnostic[]
+}
+
+// ---------------------------------------------------------------------------
+// Deduplication
+// ---------------------------------------------------------------------------
+
+/**
+ * Remove duplicate extraction requests.
+ *
+ * Two requests are duplicates when their env, state snapshot ids, timeline mode,
+ * subject selectors, and required facts are identical. The first requestId is kept.
+ */
+export function deduplicateRequests(requests: ExtractionRequest[]): ExtractionRequest[] {
+  const seen = new Map<string, ExtractionRequest>()
+  const deduped: ExtractionRequest[] = []
+
+  for (const req of requests) {
+    const key = requestKey(req)
+    if (seen.has(key)) continue
+    seen.set(key, req)
+    deduped.push(req)
+  }
+
+  return deduped
+}
+
+/** Build a deterministic string key for an extraction request. */
+function requestKey(req: ExtractionRequest): string {
+  return JSON.stringify({
+    env: req.env,
+    stateSnapshotIds: req.stateSnapshots.map((s) => s.id).sort(),
+    timelineMode: req.timeline.mode,
+    subjectSelectors: req.subjects.map((s) => s.selector).sort(),
+    requiredFacts: serializeRequiredFacts(req.requiredFacts),
+  })
+}
+
+/** Serialize RequiredFacts into a plain, sortable object. */
+function serializeRequiredFacts(facts: RequiredFacts): Record<string, unknown> {
+  return {
+    geometry: facts.geometry,
+    fragments: facts.fragments,
+    styles: Array.from(facts.styles).sort(),
+    topology: Array.from(facts.topology).sort(),
+    text: facts.text,
+    scroll: facts.scroll,
+    clipping: facts.clipping,
+    paint: facts.paint,
+    visibility: facts.visibility,
+    transforms: facts.transforms,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Batching
+// ---------------------------------------------------------------------------
+
+/**
+ * A batch is a group of extraction requests that can share a single browser
+ * session because their environments are compatible.
+ *
+ * For V1 the batching rule is simple: requests with the exact same env
+ * (viewport, colorScheme, pointer, etc) can be batched.
+ */
+export interface ExtractionBatch {
+  /** Id for the batch. */
+  batchId: string
+  /** Shared environment for every request in the batch. */
+  env: Partial<Environment>
+  /** Requests grouped into this batch. */
+  requests: ExtractionRequest[]
+}
+
+/**
+ * Group deduplicated requests into batches by environment compatibility.
+ *
+ * Requests with identical env objects end up in the same batch.
+ */
+export function batchRequests(requests: ExtractionRequest[]): ExtractionBatch[] {
+  const groups = new Map<string, ExtractionRequest[]>()
+
+  for (const req of requests) {
+    const envKey = JSON.stringify(req.env)
+    const group = groups.get(envKey)
+    if (group) {
+      group.push(req)
+    } else {
+      groups.set(envKey, [req])
+    }
+  }
+
+  let batchCounter = 0
+  const batches: ExtractionBatch[] = []
+
+  for (const [envKey, group] of groups) {
+    batches.push({
+      batchId: `batch_${++batchCounter}`,
+      env: JSON.parse(envKey) as Partial<Environment>,
+      requests: group,
+    })
+  }
+
+  return batches
+}
@@ -0,0 +1,43 @@
+/**
+ * imhotep-extractor — Extraction planning system.
+ *
+ * Compiles execution IR into precise browser fact requirements.
+ *
+ * Public surface:
+ *   - ExtractionPlanner    (planner.ts)
+ *   - FactRequirementRegistry  (requirements.ts)
+ *   - planSelectorResolution   (selector.ts)
+ *   - deduplicateRequests, batchRequests  (batching.ts)
+ */
+
+export {
+  ExtractionPlanner,
+  type ExecutionContext,
+  type PlannerOptions,
+  type ExtractionPlan,
+} from './planner.js'
+
+export {
+  FactRequirementRegistry,
+  analyzeRequiredFacts,
+  createEmptyRequiredFacts,
+  DiagnosticCollector,
+  FACT_FAMILY,
+  type FactRequirement,
+  type RequiredFacts,
+  type FactFamily,
+  type Diagnostic,
+} from './requirements.js'
+
+export {
+  planSelectorResolution,
+  type SelectorPlan,
+  type SelectorResolutionPlan,
+} from './selector.js'
+
+export {
+  deduplicateRequests,
+  batchRequests,
+  type ExtractionRequest,
+  type ExtractionBatch,
+} from './batching.js'
@@ -0,0 +1,581 @@
+/**
+ * Unit tests for the extraction planning system.
+ *
+ * Covers:
+ *   - Fact requirement analysis and registry
+ *   - Plan deduplication and batching
+ *   - Environment matrix expansion
+ *   - Unsupported fact diagnostics
+ */
+
+import { describe, it } from 'node:test'
+import assert from 'node:assert'
+
+import {
+  FactRequirementRegistry,
+  analyzeRequiredFacts,
+  createEmptyRequiredFacts,
+  DiagnosticCollector,
+  FACT_FAMILY,
+} from './requirements.js'
+
+import { deduplicateRequests, batchRequests } from './batching.js'
+import type { ExtractionRequest } from './batching.js'
+
+import { ExtractionPlanner } from './planner.js'
+import type { ExecutionContext } from './planner.js'
+
+import type { ExecutionIr, SemanticIr } from 'imhotep-core'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Build a minimal ExecutionIr with the given clause types. */
+function makeExecutionIr(clauseTypes: number[]): ExecutionIr {
+  const count = clauseTypes.length
+  return {
+    clauseCount: count,
+    clauseType: new Uint16Array(clauseTypes),
+    clauseSubject: new Uint32Array(count),
+    clauseReference: new Uint32Array(count),
+    clauseFrame: new Uint32Array(count),
+    clauseState: new Uint32Array(count),
+    clauseTimeline: new Uint32Array(count),
+    clauseTolerance: new Uint32Array(count),
+    clauseEnvGuard: new Uint32Array(count),
+    clauseArg0: new Float64Array(count),
+    clauseArg1: new Float64Array(count),
+    clauseFlags: new Uint32Array(count),
+    clauseOrigin: new Uint32Array(count),
+  }
+}
+
+/** Build a minimal SemanticIr with the given envGuards and states. */
+function makeSemanticIr(opts: {
+  envGuards?: Array<{ id: string; normalizedCases?: Array<Record<string, unknown>> }>
+  states?: Array<{ id: string; kind: string; name?: string }>
+  timelines?: Array<{ id: string; mode: string }>
+  subjects?: Array<{ id: string; selector: string; kind?: string }>
+}): SemanticIr {
+  const ir: SemanticIr = {
+    subjects: new Map(),
+    frames: new Map(),
+    states: new Map(),
+    timelines: new Map(),
+    tolerances: new Map(),
+    envGuards: new Map(),
+    clauses: new Map(),
+    groups: new Map(),
+    diagnosticMetadata: new Map(),
+  }
+
+    for (const s of opts.subjects ?? []) {
+    ir.subjects.set(s.id, s as unknown as import('imhotep-core').SemanticSubject)
+  }
+
+  for (const g of opts.envGuards ?? []) {
+    ir.envGuards.set(
+      g.id,
+      g as unknown as import('imhotep-core').SemanticEnvGuard
+    )
+  }
+
+  for (const s of opts.states ?? []) {
+    ir.states.set(s.id, s as unknown as import('imhotep-core').SemanticState)
+  }
+
+  for (const t of opts.timelines ?? []) {
+    ir.timelines.set(t.id, t as unknown as import('imhotep-core').SemanticTimeline)
+  }
+
+  return ir
+}
+
+/** Build an ExecutionContext around the given IRs. */
+function makeExecutionContext(
+  executionIr: ExecutionIr,
+  semanticIr: SemanticIr,
+  indexMaps?: Partial<{
+    subjectIds: string[]
+    frameIds: string[]
+    stateIds: string[]
+    timelineIds: string[]
+    envGuardIds: string[]
+    toleranceIds: string[]
+  }>
+): ExecutionContext {
+  const count = executionIr.clauseCount
+  return {
+    executionIr,
+    semanticIr,
+    subjectIds: indexMaps?.subjectIds ?? Array(count).fill(''),
+    frameIds: indexMaps?.frameIds ?? Array(count).fill(''),
+    stateIds: indexMaps?.stateIds ?? Array(count).fill(''),
+    timelineIds: indexMaps?.timelineIds ?? Array(count).fill(''),
+    envGuardIds: indexMaps?.envGuardIds ?? Array(count).fill(''),
+    toleranceIds: indexMaps?.toleranceIds ?? Array(count).fill(''),
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Fact Requirement Analysis
+// ---------------------------------------------------------------------------
+
+describe('analyzeRequiredFacts', () => {
+  it('returns empty facts when there are no clauses', () => {
+    const ir = makeExecutionIr([])
+    const registry = new FactRequirementRegistry()
+    const diagnostics = new DiagnosticCollector()
+
+    const facts = analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(facts.geometry, false)
+    assert.strictEqual(facts.text, false)
+    assert.strictEqual(diagnostics.diagnostics.length, 0)
+  })
+
+  it('resolves facts for a single registered clause type', () => {
+    const ir = makeExecutionIr([1])
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [
+      { family: FACT_FAMILY.GEOMETRY },
+      { family: FACT_FAMILY.STYLES, properties: ['position'] },
+    ])
+    const diagnostics = new DiagnosticCollector()
+
+    const facts = analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(facts.geometry, true)
+    assert.strictEqual(facts.styles.has('position'), true)
+    assert.strictEqual(diagnostics.diagnostics.length, 0)
+  })
+
+  it('unions facts across multiple clauses', () => {
+    const ir = makeExecutionIr([1, 2])
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+    registry.register(2, [
+      { family: FACT_FAMILY.TEXT },
+      { family: FACT_FAMILY.STYLES, properties: ['z-index'] },
+    ])
+    const diagnostics = new DiagnosticCollector()
+
+    const facts = analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(facts.geometry, true)
+    assert.strictEqual(facts.text, true)
+    assert.strictEqual(facts.styles.has('z-index'), true)
+    assert.strictEqual(diagnostics.diagnostics.length, 0)
+  })
+
+  it('deduplicates style properties across clauses', () => {
+    const ir = makeExecutionIr([1, 1])
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [
+      { family: FACT_FAMILY.STYLES, properties: ['position', 'overflow'] },
+    ])
+    const diagnostics = new DiagnosticCollector()
+
+    const facts = analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(facts.styles.size, 2)
+    assert.strictEqual(facts.styles.has('position'), true)
+    assert.strictEqual(facts.styles.has('overflow'), true)
+  })
+
+  it('emits a diagnostic for unsupported clause types', () => {
+    const ir = makeExecutionIr([99])
+    const registry = new FactRequirementRegistry()
+    const diagnostics = new DiagnosticCollector()
+
+    const facts = analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(facts.geometry, false)
+    assert.strictEqual(diagnostics.diagnostics.length, 1)
+    assert.strictEqual(
+      diagnostics.diagnostics[0].code,
+      'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
+    )
+    assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
+  })
+
+  it('collects multiple unsupported clause diagnostics', () => {
+    const ir = makeExecutionIr([99, 100])
+    const registry = new FactRequirementRegistry()
+    const diagnostics = new DiagnosticCollector()
+
+    analyzeRequiredFacts(ir, registry, diagnostics)
+
+    assert.strictEqual(diagnostics.diagnostics.length, 2)
+    assert.strictEqual(diagnostics.diagnostics[0].clauseIndex, 0)
+    assert.strictEqual(diagnostics.diagnostics[1].clauseIndex, 1)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Plan Deduplication
+// ---------------------------------------------------------------------------
+
+describe('deduplicateRequests', () => {
+  it('returns identical requests unchanged when there is only one', () => {
+    const req: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [{ id: 's1', kind: 'default' }],
+      timeline: { mode: 'static' },
+      subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+
+    const result = deduplicateRequests([req])
+    assert.strictEqual(result.length, 1)
+    assert.strictEqual(result[0].requestId, 'r1')
+  })
+
+  it('removes exact duplicate requests', () => {
+    const base: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [{ id: 's1', kind: 'default' }],
+      timeline: { mode: 'static' },
+      subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+
+    const dup: ExtractionRequest = {
+      ...base,
+      requestId: 'r2',
+      envCaseId: 'e2',
+    }
+
+    const result = deduplicateRequests([base, dup])
+    assert.strictEqual(result.length, 1)
+    assert.strictEqual(result[0].requestId, 'r1')
+  })
+
+  it('keeps requests that differ in env', () => {
+    const r1: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [{ id: 's1', kind: 'default' }],
+      timeline: { mode: 'static' },
+      subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+    const r2: ExtractionRequest = {
+      ...r1,
+      requestId: 'r2',
+      envCaseId: 'e2',
+      env: { viewportWidth: 2000 },
+    }
+
+    const result = deduplicateRequests([r1, r2])
+    assert.strictEqual(result.length, 2)
+  })
+
+  it('keeps requests that differ in required facts', () => {
+    const r1: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [{ id: 's1', kind: 'default' }],
+      timeline: { mode: 'static' },
+      subjects: [{ id: 'sub1', selector: '.a', kind: 'element' }],
+      requiredFacts: { ...createEmptyRequiredFacts(), geometry: true },
+      diagnostics: [],
+    }
+    const r2: ExtractionRequest = {
+      ...r1,
+      requestId: 'r2',
+      requiredFacts: { ...createEmptyRequiredFacts(), text: true },
+    }
+
+    const result = deduplicateRequests([r1, r2])
+    assert.strictEqual(result.length, 2)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Batching
+// ---------------------------------------------------------------------------
+
+describe('batchRequests', () => {
+  it('puts a single request into one batch', () => {
+    const req: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [],
+      timeline: { mode: 'static' },
+      subjects: [],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+
+    const batches = batchRequests([req])
+    assert.strictEqual(batches.length, 1)
+    assert.strictEqual(batches[0].requests.length, 1)
+  })
+
+  it('groups requests with identical env into one batch', () => {
+    const env = { viewportWidth: 1000, colorScheme: 'light' as const }
+    const r1: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env,
+      stateSnapshots: [],
+      timeline: { mode: 'static' },
+      subjects: [],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+    const r2: ExtractionRequest = {
+      requestId: 'r2',
+      envCaseId: 'e2',
+      env,
+      stateSnapshots: [{ id: 's2', kind: 'hover' }],
+      timeline: { mode: 'static' },
+      subjects: [],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+
+    const batches = batchRequests([r1, r2])
+    assert.strictEqual(batches.length, 1)
+    assert.strictEqual(batches[0].requests.length, 2)
+  })
+
+  it('splits requests with different env into separate batches', () => {
+    const r1: ExtractionRequest = {
+      requestId: 'r1',
+      envCaseId: 'e1',
+      env: { viewportWidth: 1000 },
+      stateSnapshots: [],
+      timeline: { mode: 'static' },
+      subjects: [],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+    const r2: ExtractionRequest = {
+      requestId: 'r2',
+      envCaseId: 'e2',
+      env: { viewportWidth: 2000 },
+      stateSnapshots: [],
+      timeline: { mode: 'static' },
+      subjects: [],
+      requiredFacts: createEmptyRequiredFacts(),
+      diagnostics: [],
+    }
+
+    const batches = batchRequests([r1, r2])
+    assert.strictEqual(batches.length, 2)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// ExtractionPlanner — Integration
+// ---------------------------------------------------------------------------
+
+describe('ExtractionPlanner', () => {
+  it('produces a single request with defaults when IR is empty', () => {
+    const registry = new FactRequirementRegistry()
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+      defaultEnvironment: { viewportWidth: 1280, viewportHeight: 720 },
+      defaultStateSnapshots: [{ id: 'default', kind: 'default' }],
+    })
+
+    const executionIr = makeExecutionIr([])
+    const semanticIr = makeSemanticIr({})
+    const ctx = makeExecutionContext(executionIr, semanticIr)
+
+    const plan = planner.createPlan(ctx)
+
+    assert.strictEqual(plan.requests.length, 1)
+    assert.strictEqual(plan.requests[0].envCaseId, 'env_default')
+    assert.strictEqual(plan.requests[0].env.viewportWidth, 1280)
+    assert.strictEqual(plan.batches.length, 1)
+    assert.strictEqual(plan.diagnostics.length, 0)
+  })
+
+  it('expands environment matrix from envGuards', () => {
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+      defaultEnvironment: { viewportWidth: 1280 },
+    })
+
+    const executionIr = makeExecutionIr([1])
+    // clause 0 references envGuard at index 0 = 'guard_1'
+    executionIr.clauseEnvGuard[0] = 0
+
+    const semanticIr = makeSemanticIr({
+      envGuards: [
+        {
+          id: 'guard_1',
+          normalizedCases: [
+            { viewportWidth: 320, colorScheme: 'light' },
+            { viewportWidth: 768, colorScheme: 'dark' },
+          ],
+        },
+      ],
+    })
+
+    const ctx = makeExecutionContext(executionIr, semanticIr, {
+      envGuardIds: ['guard_1'],
+    })
+
+    const plan = planner.createPlan(ctx)
+
+    assert.strictEqual(plan.requests.length, 2)
+    assert.strictEqual(plan.requests[0].env.viewportWidth, 320)
+    assert.strictEqual(plan.requests[1].env.viewportWidth, 768)
+    assert.strictEqual(plan.batches.length, 2)
+  })
+
+  it('deduplicates identical env cases from multiple guards', () => {
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+    })
+
+    const executionIr = makeExecutionIr([1, 1])
+    executionIr.clauseEnvGuard[0] = 0
+    executionIr.clauseEnvGuard[1] = 1
+
+    const semanticIr = makeSemanticIr({
+      envGuards: [
+        {
+          id: 'guard_a',
+          normalizedCases: [{ viewportWidth: 1000 }],
+        },
+        {
+          id: 'guard_b',
+          normalizedCases: [{ viewportWidth: 1000 }],
+        },
+      ],
+    })
+
+    const ctx = makeExecutionContext(executionIr, semanticIr, {
+      envGuardIds: ['guard_a', 'guard_b'],
+    })
+
+    const plan = planner.createPlan(ctx)
+
+    // Two env cases but identical, so deduplicated to 1 request
+    assert.strictEqual(plan.requests.length, 1)
+    assert.strictEqual(plan.batches.length, 1)
+  })
+
+  it('emits diagnostics for unsupported clause types', () => {
+    const registry = new FactRequirementRegistry()
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+      defaultEnvironment: { viewportWidth: 1280 },
+    })
+
+    const executionIr = makeExecutionIr([42])
+    const semanticIr = makeSemanticIr({})
+    const ctx = makeExecutionContext(executionIr, semanticIr)
+
+    const plan = planner.createPlan(ctx)
+
+    assert.ok(
+      plan.diagnostics.some(
+        (d) => d.code === 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE'
+      )
+    )
+  })
+
+  it('plans state snapshots from semantic IR', () => {
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+    })
+
+    const executionIr = makeExecutionIr([1])
+    executionIr.clauseState[0] = 0
+
+    const semanticIr = makeSemanticIr({
+      states: [
+        { id: 'state_hover', kind: 'hover', name: 'hover' },
+        { id: 'state_default', kind: 'default' },
+      ],
+    })
+
+    const ctx = makeExecutionContext(executionIr, semanticIr, {
+      stateIds: ['state_hover'],
+    })
+
+    const plan = planner.createPlan(ctx)
+
+    assert.strictEqual(plan.requests[0].stateSnapshots.length, 1)
+    assert.strictEqual(plan.requests[0].stateSnapshots[0].kind, 'hover')
+  })
+
+  it('warns on non-static timelines but falls back to static', () => {
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+    })
+
+    const executionIr = makeExecutionIr([1])
+    executionIr.clauseTimeline[0] = 0
+
+    const semanticIr = makeSemanticIr({
+      timelines: [{ id: 'tl_keyframes', mode: 'keyframes' }],
+    })
+
+    const ctx = makeExecutionContext(executionIr, semanticIr, {
+      timelineIds: ['tl_keyframes'],
+    })
+
+    const plan = planner.createPlan(ctx)
+
+    assert.ok(
+      plan.diagnostics.some(
+        (d) => d.code === 'IMH_EXTRACTOR_NON_STATIC_TIMELINE'
+      )
+    )
+    assert.strictEqual(plan.requests[0].timeline.mode, 'static')
+  })
+
+  it('includes selector plans from semantic IR subjects', () => {
+    const registry = new FactRequirementRegistry()
+    registry.register(1, [{ family: FACT_FAMILY.GEOMETRY }])
+
+    const planner = new ExtractionPlanner({
+      factRegistry: registry,
+    })
+
+    const executionIr = makeExecutionIr([1])
+    const semanticIr = makeSemanticIr({
+      subjects: [
+        { id: 'sub_1', selector: '.button', kind: 'element' },
+        { id: 'sub_2', selector: '.modal', kind: 'element' },
+      ],
+    })
+    const ctx = makeExecutionContext(executionIr, semanticIr)
+
+    const plan = planner.createPlan(ctx)
+
+    assert.strictEqual(plan.requests[0].subjects.length, 2)
+    assert.strictEqual(plan.requests[0].subjects[0].selector, '.button')
+    assert.strictEqual(plan.requests[0].subjects[1].selector, '.modal')
+  })
+})
@@ -0,0 +1,415 @@
+/**
+ * Extraction plan generation from execution IR.
+ *
+ * This is the main orchestrator for WBS 3: Fact Planning and Extraction Planning.
+ * It consumes execution IR + semantic IR and produces deduplicated, batched
+ * extraction requests that tell the runtime exactly which browser facts to capture.
+ */
+
+import type {
+  ExecutionIr,
+  SemanticIr,
+  Environment,
+  StateSnapshot,
+  TimelineSnapshot,
+} from 'imhotep-core'
+import type {
+  FactRequirementRegistry,
+  RequiredFacts,
+  Diagnostic,
+} from './requirements.js'
+import { analyzeRequiredFacts, DiagnosticCollector } from './requirements.js'
+import type { SelectorPlan, SelectorResolutionPlan } from './selector.js'
+import { planSelectorResolution } from './selector.js'
+import type { ExtractionRequest, ExtractionBatch } from './batching.js'
+import { deduplicateRequests, batchRequests } from './batching.js'
+
+// ---------------------------------------------------------------------------
+// Execution Context
+// ---------------------------------------------------------------------------
+
+/**
+ * Bundles the flattened execution IR with the lookup tables needed to map
+ * numeric indices back to semantic IR entities.
+ *
+ * The compiler (WBS 2) produces both IRs and these index-to-id arrays so
+ * the planner never has to guess insertion order.
+ */
+export interface ExecutionContext {
+  executionIr: ExecutionIr
+  semanticIr: SemanticIr
+  /** executionIr.clauseSubject[i] -> subject id */
+  subjectIds: string[]
+  /** executionIr.clauseFrame[i] -> frame id */
+  frameIds: string[]
+  /** executionIr.clauseState[i] -> state id */
+  stateIds: string[]
+  /** executionIr.clauseTimeline[i] -> timeline id */
+  timelineIds: string[]
+  /** executionIr.clauseEnvGuard[i] -> envGuard id */
+  envGuardIds: string[]
+  /** executionIr.clauseTolerance[i] -> tolerance id */
+  toleranceIds: string[]
+}
+
+// ---------------------------------------------------------------------------
+// Planner Options
+// ---------------------------------------------------------------------------
+
+export interface PlannerOptions {
+  /** Registry that knows which facts each clause family needs. */
+  factRegistry: FactRequirementRegistry
+  /** Default environment when no envGuards are active. */
+  defaultEnvironment?: Partial<Environment>
+  /** Default state snapshots when no state refs are active. */
+  defaultStateSnapshots?: Array<{ id: string; kind: string; name?: string }>
+  /** Default timeline mode when no timeline refs are active. */
+  defaultTimelineMode?: string
+}
+
+// ---------------------------------------------------------------------------
+// Extraction Plan Output
+// ---------------------------------------------------------------------------
+
+/**
+ * The final output of the planning phase.
+ *
+ * Contains every extraction request needed to satisfy the execution IR,
+ * grouped into environment-compatible batches, plus any diagnostics.
+ */
+export interface ExtractionPlan {
+  planId: string
+  requests: ExtractionRequest[]
+  batches: ExtractionBatch[]
+  diagnostics: Diagnostic[]
+}
+
+// ---------------------------------------------------------------------------
+// Planner
+// ---------------------------------------------------------------------------
+
+let _planCounter = 0
+
+export class ExtractionPlanner {
+  constructor(private options: PlannerOptions) {}
+
+  /**
+   * Build an extraction plan from an execution context.
+   *
+   * Steps:
+   *   1. Analyze required facts across all clauses.
+   *   2. Build selector resolution plan.
+   *   3. Expand environment matrix from envGuards.
+   *   4. Collect state snapshots.
+   *   5. Collect timeline plan.
+   *   6. Build one request per env case.
+   *   7. Deduplicate and batch.
+   *   8. Emit diagnostics for unsupported configurations.
+   */
+  createPlan(context: ExecutionContext): ExtractionPlan {
+    const diagnostics = new DiagnosticCollector()
+    const { executionIr, semanticIr } = context
+
+    // Step 1: Union all required facts.
+    const requiredFacts = analyzeRequiredFacts(
+      executionIr,
+      this.options.factRegistry,
+      diagnostics
+    )
+
+    // Step 2: Selector resolution plan.
+    const selectorPlan = planSelectorResolution(semanticIr)
+    for (const d of selectorPlan.diagnostics) {
+      diagnostics.emit({
+        code: d.code,
+        severity: d.severity,
+        message: d.message,
+        category: 'resolution-error',
+      })
+    }
+
+    // Step 3: Environment matrix expansion.
+    const envCases = expandEnvironmentMatrix(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultEnvironment,
+      diagnostics
+    )
+
+    // Step 4: State snapshot planning.
+    const stateSnapshots = planStateSnapshots(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultStateSnapshots,
+      diagnostics
+    )
+
+    // Step 5: Timeline planning.
+    const timeline = planTimeline(
+      executionIr,
+      context,
+      semanticIr,
+      this.options.defaultTimelineMode,
+      diagnostics
+    )
+
+    // Step 6: Build one extraction request per env case.
+    const requests: ExtractionRequest[] = envCases.map((envCase, idx) => ({
+      requestId: `req_${idx + 1}`,
+      envCaseId: envCase.id,
+      env: envCase.env,
+      stateSnapshots,
+      timeline,
+      subjects: selectorPlan.subjects,
+      requiredFacts,
+      diagnostics: [],
+    }))
+
+    // If there are no env cases, still produce one request with defaults.
+    if (requests.length === 0) {
+      requests.push({
+        requestId: 'req_default',
+        envCaseId: 'env_default',
+        env: this.options.defaultEnvironment ?? {},
+        stateSnapshots,
+        timeline,
+        subjects: selectorPlan.subjects,
+        requiredFacts,
+        diagnostics: [],
+      })
+    }
+
+    // Step 7: Deduplicate.
+    const deduped = deduplicateRequests(requests)
+
+    // Step 8: Batch by compatible environment.
+    const batches = batchRequests(deduped)
+
+    return {
+      planId: `plan_${++_planCounter}`,
+      requests: deduped,
+      batches,
+      diagnostics: diagnostics.diagnostics,
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Environment Matrix Expansion
+// ---------------------------------------------------------------------------
+
+interface EnvCaseEntry {
+  id: string
+  env: Partial<Environment>
+}
+
+/**
+ * Collect every environment case referenced by any clause in the execution IR.
+ *
+ * For V1 this walks clauseEnvGuard indices, looks them up in the semantic IR,
+ * and unions all normalizedCases. If no envGuards are referenced, a single
+ * default env case is returned.
+ */
+function expandEnvironmentMatrix(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaultEnv: Partial<Environment> | undefined,
+  diagnostics: DiagnosticCollector
+): EnvCaseEntry[] {
+  const seen = new Set<string>()
+  const cases: EnvCaseEntry[] = []
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const guardIndex = executionIr.clauseEnvGuard[i]
+    const guardId = context.envGuardIds[guardIndex]
+    if (!guardId) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_MISSING_ENV_GUARD',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `Clause ${i} references envGuard index ${guardIndex} which has no id mapping`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    const guard = semanticIr.envGuards.get(guardId)
+    if (!guard) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNRESOLVED_ENV_GUARD',
+        severity: 'error',
+        category: 'resolution-error',
+        message: `EnvGuard ${guardId} not found in semantic IR`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    // The semantic IR envGuard may have normalizedCases (from ir.ts) or
+    // a condition string (from index.ts). We handle both shapes.
+    const normalizedCases = (guard as unknown as Record<string, unknown>)
+      .normalizedCases as Array<Partial<Environment>> | undefined
+
+    if (normalizedCases && normalizedCases.length > 0) {
+      for (const env of normalizedCases) {
+        const key = JSON.stringify(env)
+        if (seen.has(key)) continue
+        seen.add(key)
+        cases.push({
+          id: `env_${cases.length + 1}`,
+          env,
+        })
+      }
+    }
+  }
+
+  if (cases.length === 0 && defaultEnv) {
+    cases.push({ id: 'env_default', env: defaultEnv })
+  }
+
+  return cases
+}
+
+// ---------------------------------------------------------------------------
+// State Snapshot Planning
+// ---------------------------------------------------------------------------
+
+/**
+ * Collect all state snapshots referenced by clauses.
+ *
+ * V1 supports: default, hover, focus-visible.
+ * If a clause references an unsupported state kind, a diagnostic is emitted
+ * but the state is still included so the solver can decide what to do.
+ */
+function planStateSnapshots(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaults: Array<{ id: string; kind: string; name?: string }> | undefined,
+  diagnostics: DiagnosticCollector
+): Array<{ id: string; kind: string; name?: string }> {
+  const seen = new Set<string>()
+  const snapshots: Array<{ id: string; kind: string; name?: string }> = []
+
+  const supportedStates = new Set([
+    'default',
+    'hover',
+    'focus',
+    'focusVisible',
+    'active',
+    'disabled',
+    'visited',
+  ])
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const stateIndex = executionIr.clauseState[i]
+    const stateId = context.stateIds[stateIndex]
+    if (!stateId) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_MISSING_STATE',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `Clause ${i} references state index ${stateIndex} which has no id mapping`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    if (seen.has(stateId)) continue
+    seen.add(stateId)
+
+    const state = semanticIr.states.get(stateId)
+    if (!state) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNRESOLVED_STATE',
+        severity: 'error',
+        category: 'resolution-error',
+        message: `State ${stateId} not found in semantic IR`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    const kind = state.kind ?? 'default'
+    if (!supportedStates.has(kind)) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNSUPPORTED_STATE',
+        severity: 'warning',
+        category: 'extraction-error',
+        message: `State kind "${kind}" is not fully supported in V1`,
+        clauseIndex: i,
+      })
+    }
+
+    snapshots.push({
+      id: stateId,
+      kind,
+      name: (state as unknown as Record<string, unknown>).name as string | undefined,
+    })
+  }
+
+  if (snapshots.length === 0 && defaults) {
+    for (const d of defaults) {
+      if (!seen.has(d.id)) {
+        seen.add(d.id)
+        snapshots.push(d)
+      }
+    }
+  }
+
+  return snapshots
+}
+
+// ---------------------------------------------------------------------------
+// Timeline Planning
+// ---------------------------------------------------------------------------
+
+/**
+ * Determine the timeline sampling plan.
+ *
+ * V1 is static-only. If any clause references a non-static timeline, a
+ * diagnostic is emitted and the timeline mode falls back to 'static'.
+ */
+function planTimeline(
+  executionIr: ExecutionIr,
+  context: ExecutionContext,
+  semanticIr: SemanticIr,
+  defaultMode: string | undefined,
+  diagnostics: DiagnosticCollector
+): { mode: string; samples?: number[] } {
+  const seenModes = new Set<string>()
+  let hasNonStatic = false
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const timelineIndex = executionIr.clauseTimeline[i]
+    const timelineId = context.timelineIds[timelineIndex]
+    if (!timelineId) continue
+
+    const timeline = semanticIr.timelines.get(timelineId)
+    if (!timeline) continue
+
+    const mode = timeline.mode ?? 'static'
+    seenModes.add(mode)
+
+    if (mode !== 'static') {
+      hasNonStatic = true
+    }
+  }
+
+  if (hasNonStatic) {
+    diagnostics.emit({
+      code: 'IMH_EXTRACTOR_NON_STATIC_TIMELINE',
+      severity: 'warning',
+      category: 'extraction-error',
+      message:
+        'Non-static timelines detected; V1 falls back to static extraction. ' +
+        `Modes seen: ${Array.from(seenModes).join(', ')}`,
+    })
+  }
+
+  // For V1 we always return static regardless of what the clauses asked for.
+  return { mode: 'static' }
+}
@@ -0,0 +1,203 @@
+/**
+ * Fact requirement analysis and registry.
+ *
+ * Maps clause families to the browser facts they need.
+ * Analyzes execution IR to compute the union of required facts.
+ */
+
+import type { ExecutionIr } from 'imhotep-core'
+
+// ---------------------------------------------------------------------------
+// Fact Families
+// ---------------------------------------------------------------------------
+
+/** Well-known fact families the extractor can provide. */
+export const FACT_FAMILY = {
+  GEOMETRY: 'geometry',
+  FRAGMENTS: 'fragments',
+  STYLES: 'styles',
+  TOPOLOGY: 'topology',
+  TEXT: 'text',
+  SCROLL: 'scroll',
+  CLIPPING: 'clipping',
+  PAINT: 'paint',
+  VISIBILITY: 'visibility',
+  TRANSFORMS: 'transforms',
+} as const
+
+export type FactFamily = (typeof FACT_FAMILY)[keyof typeof FACT_FAMILY]
+
+// ---------------------------------------------------------------------------
+// Requirement Descriptors
+// ---------------------------------------------------------------------------
+
+/** A single fact requirement for a clause family. */
+export interface FactRequirement {
+  family: FactFamily
+  /** For style or topology families, which specific properties. */
+  properties?: string[]
+}
+
+/** Union of all required facts across an execution plan. */
+export interface RequiredFacts {
+  geometry: boolean
+  fragments: boolean
+  styles: Set<string>
+  topology: Set<string>
+  text: boolean
+  scroll: boolean
+  clipping: boolean
+  paint: boolean
+  visibility: boolean
+  transforms: boolean
+}
+
+/** Create an empty RequiredFacts set. */
+export function createEmptyRequiredFacts(): RequiredFacts {
+  return {
+    geometry: false,
+    fragments: false,
+    styles: new Set(),
+    topology: new Set(),
+    text: false,
+    scroll: false,
+    clipping: false,
+    paint: false,
+    visibility: false,
+    transforms: false,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Diagnostic Shape
+// ---------------------------------------------------------------------------
+
+export interface Diagnostic {
+  code: string
+  severity: 'error' | 'warning' | 'info'
+  message: string
+  category?: string
+  clauseIndex?: number
+  clauseId?: string
+}
+
+/** Mutable collector for diagnostics emitted during planning. */
+export class DiagnosticCollector {
+  diagnostics: Diagnostic[] = []
+
+  emit(d: Diagnostic): void {
+    this.diagnostics.push(d)
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Fact Requirement Registry
+// ---------------------------------------------------------------------------
+
+/**
+ * Registry that maps numeric clause types to their required facts.
+ *
+ * This is populated at system init time by registering each clause family
+ * (e.g. relation.leftOf, size.width, topology.clipChain).
+ */
+export class FactRequirementRegistry {
+  private entries = new Map<number, FactRequirement[]>()
+
+  /** Declare the facts needed by a given clause type id. */
+  register(clauseType: number, requirements: FactRequirement[]): void {
+    this.entries.set(clauseType, requirements)
+  }
+
+  /** Look up requirements for a clause type, or undefined if unsupported. */
+  resolve(clauseType: number): FactRequirement[] | undefined {
+    return this.entries.get(clauseType)
+  }
+
+  /** True if the clause type has been registered. */
+  has(clauseType: number): boolean {
+    return this.entries.has(clauseType)
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Fact Analysis
+// ---------------------------------------------------------------------------
+
+/**
+ * Walk every clause in the execution IR and union all required facts.
+ *
+ * If a clause type is not registered, an extraction-error diagnostic is
+ * emitted and the clause is skipped.
+ */
+export function analyzeRequiredFacts(
+  executionIr: ExecutionIr,
+  registry: FactRequirementRegistry,
+  diagnostics: DiagnosticCollector
+): RequiredFacts {
+  const result = createEmptyRequiredFacts()
+
+  for (let i = 0; i < executionIr.clauseCount; i++) {
+    const clauseType = executionIr.clauseType[i]
+    const requirements = registry.resolve(clauseType)
+
+    if (!requirements) {
+      diagnostics.emit({
+        code: 'IMH_EXTRACTOR_UNSUPPORTED_CLAUSE_TYPE',
+        severity: 'error',
+        category: 'extraction-error',
+        message: `Clause type ${clauseType} has no registered fact requirements`,
+        clauseIndex: i,
+      })
+      continue
+    }
+
+    for (const req of requirements) {
+      mergeRequirement(result, req)
+    }
+  }
+
+  return result
+}
+
+/** Merge a single FactRequirement into a RequiredFacts accumulator. */
+function mergeRequirement(acc: RequiredFacts, req: FactRequirement): void {
+  switch (req.family) {
+    case FACT_FAMILY.GEOMETRY:
+      acc.geometry = true
+      break
+    case FACT_FAMILY.FRAGMENTS:
+      acc.fragments = true
+      break
+    case FACT_FAMILY.STYLES:
+      if (req.properties) {
+        for (const p of req.properties) acc.styles.add(p)
+      }
+      break
+    case FACT_FAMILY.TOPOLOGY:
+      if (req.properties) {
+        for (const p of req.properties) acc.topology.add(p)
+      }
+      break
+    case FACT_FAMILY.TEXT:
+      acc.text = true
+      break
+    case FACT_FAMILY.SCROLL:
+      acc.scroll = true
+      break
+    case FACT_FAMILY.CLIPPING:
+      acc.clipping = true
+      break
+    case FACT_FAMILY.PAINT:
+      acc.paint = true
+      break
+    case FACT_FAMILY.VISIBILITY:
+      acc.visibility = true
+      break
+    case FACT_FAMILY.TRANSFORMS:
+      acc.transforms = true
+      break
+    default:
+      // Unknown fact families are ignored; caller may choose to warn.
+      break
+  }
+}
@@ -0,0 +1,85 @@
+/**
+ * Selector resolution planning.
+ *
+ * Turns semantic IR subject definitions into a flat list of selector plans
+ * that the extractor runtime will resolve against the DOM.
+ */
+
+import type { SemanticIr } from 'imhotep-core'
+
+interface SubjectDef {
+  id: string
+  selector: string
+  kind?: string
+  quantifier?: string
+}
+
+// ---------------------------------------------------------------------------
+// Selector Plan Types
+// ---------------------------------------------------------------------------
+
+/** A planned selector resolution for a single subject. */
+export interface SelectorPlan {
+  /** Stable subject id from semantic IR. */
+  id: string
+  /** CSS selector string to resolve. */
+  selector: string
+  /** Subject kind (element, pseudoElement, textRange, etc). */
+  kind: string
+}
+
+/** Result of planning selector resolution for an entire semantic IR. */
+export interface SelectorResolutionPlan {
+  subjects: SelectorPlan[]
+  diagnostics: Array<{
+    code: string
+    severity: 'error' | 'warning'
+    message: string
+    subjectId?: string
+  }>
+}
+
+// ---------------------------------------------------------------------------
+// Selector Resolution Planner
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a selector resolution plan from semantic IR subjects.
+ *
+ * Every subject in the semantic IR becomes one SelectorPlan entry.
+ * Duplicate selectors are preserved with distinct ids so that the
+ * solver can still distinguish them by reference.
+ */
+export function planSelectorResolution(semanticIr: SemanticIr): SelectorResolutionPlan {
+  const subjects: SelectorPlan[] = []
+  const diagnostics: SelectorResolutionPlan['diagnostics'] = []
+
+  for (const [id, subject] of semanticIr.subjects) {
+    const validation = validateSubject(subject)
+    if (validation) {
+      diagnostics.push(validation)
+      continue
+    }
+
+    subjects.push({
+      id,
+      selector: subject.selector,
+      kind: subject.kind ?? 'element',
+    })
+  }
+
+  return { subjects, diagnostics }
+}
+
+/** Validate a single subject definition. Returns a diagnostic if invalid. */
+function validateSubject(subject: SubjectDef): SelectorResolutionPlan['diagnostics'][number] | undefined {
+  if (!subject.selector || subject.selector.trim().length === 0) {
+    return {
+      code: 'IMH_EXTRACTOR_EMPTY_SELECTOR',
+      severity: 'error',
+      message: `Subject ${subject.id} has an empty selector`,
+      subjectId: subject.id,
+    }
+  }
+  return undefined
+}
@@ -0,0 +1,13 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "paths": {},
+    "composite": false,
+    "noEmitOnError": false
+  },
+  "include": [
+    "src/**/*"
+  ]
+}