Files
Imhotep/packages/imhotep-core/src/pipeline.ts
T

1370 lines
43 KiB
TypeScript

/**
* End-to-end integration pipeline for Imhotep.
*
* Orchestrates the full evaluation flow:
* Execution IR → Extraction Plan → Browser Facts → Geometry World
* → Topology Derivation → Clause Evaluation → Proof Generation
* → Witness Shrinking → Diagnostic Formatting
*
* All stage implementations are injected via PipelineConfig so the
* pipeline remains testable and backend-agnostic.
*/
// Local type aliases for cross-package contracts (avoids circular project references)
interface ExecutionContext {
executionIr: {
clauseCount: number
clauseType: number[]
clauseSubject: number[]
clauseReference: number[]
clauseFrame: number[]
clauseArg0: number[]
clauseArg1: number[]
clauseFlags: number[]
clauseState: number[]
clauseTimeline: number[]
clauseEnvGuard: number[]
clauseTolerance: number[]
}
stateIds: string[]
timelineIds: string[]
envGuardIds: string[]
toleranceIds: string[]
}
interface ExtractionPlanner {
createPlan(context: ExecutionContext): ExtractionPlan
}
interface ExtractionPlan {
batches: Array<{ requests: ExtractionRequest[] }>
diagnostics: Array<{ code: string; severity: string; message: string; category?: string }>
}
interface ExtractionRequest {
requestId: string
envCaseId: string
subjects: Array<{ selector: string }>
stateSnapshots: Array<{ id: string; kind: string }>
}
interface EvaluationOutput {
clauseResults: ClauseResult[]
groupResults: GroupResult[]
proofs: Proof[]
diagnostics: Array<{ code: string; severity: string; category: string; message: string; clauseId?: string }>
trace: Array<{ phase: string; at: number; clauseId?: string }>
}
interface ClauseDescriptor {
clauseId: string
clauseKind: string
version: number
subjectRef?: number
referenceRef?: number
frameRef?: number
stateRef?: string
timelineRef?: string
envGuardRef?: string
toleranceRef?: string
bounds?: Record<string, number>
options?: Record<string, unknown>
}
interface Witness {
proof: {
proofId: string
clauseId: string
outcome: 'pass' | 'fail'
truth: string
failedPredicate?: unknown
witness?: unknown
}
envCases: string[]
snapshots: string[]
subjects: number[]
facts?: unknown
}
interface ShrinkResult {
reduced: boolean
axes: string[]
}
import type {
ImhotepId,
Proof,
ClauseResult,
GroupResult,
Environment,
} from './types.js'
import type {
Diagnostic,
TraceEvent,
EvaluationTrace,
} from './diagnostics.js'
import { createDiagnostic } from './diagnostics.js'
// Adapter interfaces - implementations injected at runtime
interface ExtractorRequest {
requestId: string
sceneTarget: { pageRef: string; url?: string }
env: Environment
statePlan: { snapshots: string[] }
timelinePlan: { mode: string }
subjects: Array<{ id: string; selector: string }>
requiredFacts: Record<string, unknown>
}
interface ExtractorResponse {
requestId: string
status: 'ok' | 'partial' | 'error'
snapshots: GeometryWorldSnapshot[]
diagnostics: Diagnostic[]
extractionTrace?: unknown
}
interface CDPSource {
url: string
browserName: string
browserVersion: string
extractedAt: string | number
}
interface CDPDomNode {
nodeId: number
backendNodeId: number
parentNodeId: number
firstChildIndex: number
childCount: number
shadowRootKind: string
tagNameStringId: number
roleStringId: number
ariaNameStringId: number
}
interface CDPSubjects {
ids: number[]
domNodeId: number[]
subjectKind: number[]
primaryBoxId: number[]
firstFragmentId: number[]
fragmentCount: number[]
firstTextRunId: number[]
textRunCount: number[]
}
interface CDPFrames {
frameId: number[]
frameKind: number[]
ownerSubjectId: number[]
parentFrameId: number[]
originX: number[]
originY: number[]
axisMatrixStart: number[]
clipRectId: number[]
scrollContainerId: number[]
writingMode: number[]
}
interface CDPMatrices {
values: number[]
}
interface CDPRects {
rectId: number[]
left: number[]
top: number[]
right: number[]
bottom: number[]
}
interface CDPBoxes {
boxId: number[]
subjectId: number[]
frameId: number[]
borderLeft: number[]
borderTop: number[]
borderRight: number[]
borderBottom: number[]
paddingLeft: number[]
paddingTop: number[]
paddingRight: number[]
paddingBottom: number[]
contentLeft: number[]
contentTop: number[]
contentRight: number[]
contentBottom: number[]
}
interface CDPFragments {
fragmentId: number[]
subjectId: number[]
fragmentKind: number[]
boxLeft: number[]
boxTop: number[]
boxRight: number[]
boxBottom: number[]
lineIndex: number[]
flowIndex: number[]
parentFragmentId: number[]
}
interface CDPTransforms {
transformId: number[]
subjectId: number[]
matrixStart: number[]
matrixLength: number[]
originX: number[]
originY: number[]
}
interface CDPStyles {
subjectId: number[]
display: number[]
position: number[]
zIndexKind: number[]
zIndexValue: number[]
overflowX: number[]
overflowY: number[]
opacity: number[]
visibility: number[]
containFlags: number[]
pointerEvents: number[]
lineHeight: number[]
fontFamilyStringId: number[]
fontSize: number[]
fontWeight: number[]
}
interface CDPTopology {
containingBlockOf: number[]
nearestPositionedAncestorOf: number[]
scrollContainerOf: number[]
stackingContextOf: number[]
formattingContextOf: number[]
clippingRootOf: number[]
paintOrderBucket: number[]
paintOrderIndex: number[]
}
interface CDPScrollEntry {
containerId: number
scrollLeft: number
scrollTop: number
scrollWidth: number
scrollHeight: number
clientWidth: number
clientHeight: number
}
interface CDPClippingEntry {
clipNodeId: number
subjectId: number
clipKind: number
clipLeft: number
clipTop: number
clipRight: number
clipBottom: number
parentClipNodeId: number
}
interface CDPProvenanceEntry {
factId: number
extractionStepId: number
sourceKind: number
sourceRef: number
}
interface CDPConfidenceEntry {
factId: number
confidence: number
reasonCode: number
}
interface GeometryWorldSnapshot {
sceneId: string
snapshotId: string
env: Environment
source: CDPSource
strings: StringTable
subjects: CDPSubjects
dom: { nodes: CDPDomNode[] }
frames: CDPFrames
matrices: CDPMatrices
rects: CDPRects
boxes: CDPBoxes
fragments: CDPFragments
transforms: CDPTransforms
styles: CDPStyles
text: unknown
topology: CDPTopology
scroll: CDPScrollEntry[]
clipping: CDPClippingEntry[]
paint: unknown
visibility: unknown
provenance: CDPProvenanceEntry[]
confidence: CDPConfidenceEntry[]
}
import type {
GeometryWorld,
StringTable,
WorldEnvironment,
WorldSource,
SubjectsTable,
DomTable,
FramesTable,
MatricesTable,
RectsTable,
BoxesTable,
FragmentsTable,
TransformsTable,
StylesTable,
TextTable,
TopologyTable,
ScrollTable,
ClippingTable,
PaintTable,
VisibilityTable,
ProvenanceTable,
ConfidenceTable,
} from './world.js'
// ---------------------------------------------------------------------------
// Adapter Interfaces (Dependency Injection)
// ---------------------------------------------------------------------------
/**
* Adapts the CDP extractor to the pipeline.
* Receives a planner request and runtime handle, returns raw extraction response.
*/
export interface PipelineExtractorAdapter<TRuntime = unknown> {
extract(runtime: TRuntime, request: ExtractionRequest): Promise<ExtractorResponse>
}
/**
* Converts a CDP geometry world snapshot into the canonical core GeometryWorld.
*/
export interface NormalizerAdapter {
normalize(snapshot: GeometryWorldSnapshot): GeometryWorld
}
/**
* Derives additional topology facts from a geometry world.
* Optional: if omitted, topology from extraction is used as-is.
*/
export interface TopologyAdapter {
derive(world: GeometryWorld): GeometryWorld
}
/**
* Evaluates clauses against a geometry world.
*/
export interface SolverAdapter {
evaluate(world: GeometryWorld, clauses: ClauseDescriptor[]): EvaluationOutput
}
/**
* Shrinks failing witnesses and formats diagnostics.
*/
export interface ReporterAdapter {
shrinkWitness(witness: Witness, stillFails: (w: Witness) => boolean): ShrinkResult
formatDiagnostics(evaluation: EvaluationOutput, proofs: Proof[]): Diagnostic[]
}
/**
* Materializes UI states (hover, focus, etc.) on the runtime before extraction.
* Optional: if omitted, only default state is captured.
*/
export interface StateMaterializerAdapter<TRuntime = unknown> {
materialize(runtime: TRuntime, stateKind: string, selector: string): Promise<void>
}
// ---------------------------------------------------------------------------
// Pipeline Configuration and Input
// ---------------------------------------------------------------------------
export interface PipelineConfig<TRuntime = unknown> {
planner: ExtractionPlanner
extractor: PipelineExtractorAdapter<TRuntime>
normalizer: NormalizerAdapter
topologyDeriver?: TopologyAdapter
solver: SolverAdapter
reporter: ReporterAdapter
stateMaterializer?: StateMaterializerAdapter<TRuntime>
}
export interface PipelineInput<TRuntime = unknown> {
executionContext: ExecutionContext
runtime: TRuntime
}
// ---------------------------------------------------------------------------
// Pipeline Output
// ---------------------------------------------------------------------------
export interface WorldEntry {
world: GeometryWorld
envCaseId: string
snapshotId: string
requestId: string
extractionStatus: 'ok' | 'partial' | 'error'
extractionDiagnostics: Diagnostic[]
}
export interface PipelineResult {
plan: ExtractionPlan
worlds: WorldEntry[]
clauseResults: ClauseResult[]
groupResults: ClauseResult[]
proofs: Proof[]
shrunkWitnesses: ShrinkResult[]
diagnostics: Diagnostic[]
status: 'pass' | 'fail' | 'error' | 'partial'
trace: EvaluationTrace
}
// ---------------------------------------------------------------------------
// Clause Kind Mapping
// ---------------------------------------------------------------------------
/**
* Reverse mapping from execution IR clause type codes to solver clause kinds.
* Mirrors the relationCodes table in imhotep-dsl/compiler.ts.
*/
const CODE_TO_CLAUSE_KIND: Record<number, string> = {
1: 'relation.leftOf',
2: 'relation.rightOf',
3: 'relation.above',
4: 'relation.below',
5: 'alignment.alignedWith',
6: 'alignment.leftAlignedWith',
7: 'alignment.rightAlignedWith',
8: 'alignment.topAlignedWith',
9: 'alignment.bottomAlignedWith',
10: 'alignment.centeredWithin',
11: 'relation.inside',
12: 'relation.contains',
13: 'relation.overlaps',
14: 'relation.separatedFrom',
15: 'size.aspectRatio',
}
// ---------------------------------------------------------------------------
// Main Pipeline Orchestrator
// ---------------------------------------------------------------------------
let _traceId = 0
function nextTraceId(): string {
return `trace_${++_traceId}`
}
function now(): number {
return Date.now()
}
/**
* Run the full Imhotep evaluation pipeline.
*
* Steps:
* 1. Plan extraction requirements from execution context.
* 2. For each request: materialize states, extract, normalize, derive topology.
* 3. Convert execution IR to clause descriptors.
* 4. Evaluate clauses against every captured world.
* 5. Generate proofs.
* 6. Shrink failing witnesses.
* 7. Format diagnostics.
* 8. Aggregate status and return structured result.
*/
export async function runPipeline<TRuntime>(
input: PipelineInput<TRuntime>,
config: PipelineConfig<TRuntime>,
): Promise<PipelineResult> {
const traceEvents: TraceEvent[] = []
const allDiagnostics: Diagnostic[] = []
function trace(phase: TraceEvent['phase'], refs: TraceEvent['refs'], payload: Record<string, unknown> = {}): void {
traceEvents.push({
traceEventId: nextTraceId(),
phase,
at: now(),
refs,
payload,
})
}
// --- Step 1: Plan extraction ---
trace('fact-requirements-computed', {})
const plan = config.planner.createPlan(input.executionContext)
allDiagnostics.push(...plan.diagnostics.map((d: { code: string; severity: string; message: string; category?: string }) => plannerDiagnosticToCore(d)))
// --- Step 2: Extract worlds ---
trace('extraction-started', {})
const worlds: WorldEntry[] = []
for (const batch of plan.batches) {
for (const request of batch.requests) {
// For each state snapshot in the request, materialize and extract.
// If no state snapshots, extract once in default state.
const snapshots = request.stateSnapshots.length > 0
? request.stateSnapshots
: [{ id: 'default', kind: 'default' }]
for (const stateSnap of snapshots) {
try {
// Materialize state if adapter provided and not default
if (config.stateMaterializer && stateSnap.kind !== 'default') {
for (const subject of request.subjects) {
await config.stateMaterializer.materialize(input.runtime, stateSnap.kind, subject.selector)
}
}
// Extract facts
const response = await config.extractor.extract(input.runtime, request)
// Normalize to geometry world
const world = config.normalizer.normalize(response.snapshots[0])
// Derive topology if adapter provided
const finalWorld = config.topologyDeriver
? config.topologyDeriver.derive(world)
: world
worlds.push({
world: finalWorld,
envCaseId: request.envCaseId,
snapshotId: stateSnap.id,
requestId: request.requestId,
extractionStatus: response.status,
extractionDiagnostics: response.diagnostics.map((d: any) => cdpDiagnosticToCore(d, request.requestId)),
})
trace('extraction-step-completed', {
snapshotId: stateSnap.id,
}, {
requestId: request.requestId,
status: response.status,
diagnosticCount: response.diagnostics.length,
})
// Surface extraction diagnostics
allDiagnostics.push(...worlds[worlds.length - 1].extractionDiagnostics)
// Surface partial extraction as indeterminate diagnostic
if (response.status === 'partial') {
allDiagnostics.push(createDiagnostic(
'IMH_EXTRACT_PARTIAL',
'extraction-error',
'warning',
`Partial extraction for request ${request.requestId}; some facts may be missing.`,
{
source: 'imhotep-pipeline',
envCaseId: request.envCaseId,
snapshotId: stateSnap.id,
},
))
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
allDiagnostics.push(createDiagnostic(
'IMH_EXTRACT_PROTOCOL_ERROR',
'extraction-error',
'error',
`Extraction failed for request ${request.requestId}: ${message}`,
{
source: 'imhotep-pipeline',
envCaseId: request.envCaseId,
snapshotId: stateSnap.id,
},
))
// Push an empty world so downstream stages can still run
worlds.push({
world: createEmptyWorld(request.requestId, stateSnap.id),
envCaseId: request.envCaseId,
snapshotId: stateSnap.id,
requestId: request.requestId,
extractionStatus: 'error',
extractionDiagnostics: [],
})
}
}
}
}
// --- Step 3: Convert Execution IR to Clause Descriptors ---
const clauses = executionIrToClauseDescriptors(input.executionContext)
// --- Step 4: Evaluate clauses against all worlds ---
trace('world-normalized', {})
const allClauseResults: ClauseResult[] = []
const allGroupResults: ClauseResult[] = []
const allProofs: Proof[] = []
for (const entry of worlds) {
if (entry.extractionStatus === 'error') {
// Emit indeterminate results for all clauses when extraction failed
for (const clause of clauses) {
allClauseResults.push({
clauseId: clause.clauseId,
status: 'error',
truth: 'indeterminate',
metrics: {},
witness: {
subjectId: clause.subjectRef ?? 0,
referenceId: clause.referenceRef,
frameId: clause.frameRef ?? 0,
envCaseId: entry.envCaseId,
snapshotId: entry.snapshotId,
},
diagnostics: [],
})
}
continue
}
const evaluation = config.solver.evaluate(entry.world, clauses)
// Cast across package type boundaries (solver uses slightly different ClauseResult/Proof shapes)
allClauseResults.push(...(evaluation.clauseResults as any))
allGroupResults.push(...(evaluation.groupResults as any))
allProofs.push(...(evaluation.proofs as any))
for (const cr of evaluation.clauseResults) {
trace('clause-evaluated', { clauseId: cr.clauseId }, {
status: cr.status,
truth: cr.truth,
envCaseId: entry.envCaseId,
snapshotId: entry.snapshotId,
})
}
for (const proof of evaluation.proofs) {
trace('proof-created', { proofId: proof.proofId, clauseId: proof.clauseId })
}
// Surface solver diagnostics
allDiagnostics.push(...evaluation.diagnostics.map(d => solverDiagnosticToCore(d)))
}
// --- Step 5: Shrink failing witnesses ---
const failingProofs = allProofs.filter(p => p.outcome === 'fail')
const shrunkWitnesses: ShrinkResult[] = []
for (const proof of failingProofs) {
try {
const witness: Witness = {
proof: {
proofId: proof.proofId,
clauseId: proof.clauseId,
outcome: proof.outcome as 'pass' | 'fail',
truth: proof.truth,
failedPredicate: proof.failedPredicate,
witness: proof.witness,
},
envCases: worlds.map(w => w.envCaseId),
snapshots: worlds.map(w => w.snapshotId),
subjects: Array.from(new Set(clauses.map(c => c.subjectRef).filter((s): s is number => s !== undefined))),
facts: proof.usedFacts,
}
// Build a real re-evaluation oracle: re-run the failing clause against
// every remaining world in the shrunk witness. Only accept the shrink
// if the clause still fails somewhere.
const shrinkOracle = (w: Witness): boolean => {
const remainingWorlds = worlds.filter(world =>
w.envCases.includes(world.envCaseId) && w.snapshots.includes(world.snapshotId)
)
if (remainingWorlds.length === 0) {
return false
}
const clause = clauses.find(c => c.clauseId === proof.clauseId)
if (!clause) {
return false
}
for (const worldEntry of remainingWorlds) {
if (worldEntry.extractionStatus === 'error') {
continue
}
try {
const evaluation = config.solver.evaluate(worldEntry.world, [clause])
const result = evaluation.clauseResults.find(r => r.clauseId === proof.clauseId)
if (result && result.status === 'fail') {
return true
}
} catch {
return false
}
}
return false
}
const shrinkResult = config.reporter.shrinkWitness(witness, shrinkOracle)
shrunkWitnesses.push(shrinkResult)
trace('witness-shrunk', { proofId: proof.proofId }, {
reduced: shrinkResult.reduced,
axes: shrinkResult.axes,
})
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
allDiagnostics.push(createDiagnostic(
'IMH_INTERNAL_EVALUATION_EXCEPTION',
'internal-error',
'warning',
`Witness shrinking failed for proof ${proof.proofId}: ${message}`,
{
source: 'imhotep-pipeline',
clauseId: proof.clauseId,
},
))
}
}
// --- Step 6: Format diagnostics ---
const evaluationOutput: EvaluationOutput = {
clauseResults: allClauseResults as any,
groupResults: allGroupResults as any,
proofs: allProofs as any,
diagnostics: allDiagnostics.map(d => coreDiagnosticToSolver(d)),
trace: traceEvents.map(t => ({
phase: t.phase,
at: t.at,
clauseId: t.refs.clauseId,
})),
}
const formattedDiagnostics = config.reporter.formatDiagnostics(evaluationOutput, allProofs)
allDiagnostics.push(...formattedDiagnostics)
for (const d of formattedDiagnostics) {
trace('diagnostic-emitted', {}, {
code: d.code,
severity: d.severity,
message: d.message,
})
}
// --- Step 7: Aggregate status ---
const hasErrors = allClauseResults.some(r => r.status === 'error')
const hasFails = allClauseResults.some(r => r.status === 'fail')
const hasPartial = worlds.some(w => w.extractionStatus === 'partial')
let status: PipelineResult['status']
if (hasErrors) {
status = 'error'
} else if (hasFails) {
status = 'fail'
} else if (hasPartial) {
status = 'partial'
} else {
status = 'pass'
}
return {
plan,
worlds,
clauseResults: allClauseResults,
groupResults: allGroupResults,
proofs: allProofs,
shrunkWitnesses,
diagnostics: allDiagnostics,
status,
trace: { events: traceEvents },
}
}
// ---------------------------------------------------------------------------
// Execution IR → Clause Descriptor Conversion
// ---------------------------------------------------------------------------
/**
* Convert flattened execution IR into solver ClauseDescriptor objects.
*
* This bridges the compiler's numeric array representation with the
* solver's object-oriented clause descriptors.
*/
export function executionIrToClauseDescriptors(context: ExecutionContext): ClauseDescriptor[] {
const { executionIr } = context
const clauses: ClauseDescriptor[] = []
for (let i = 0; i < executionIr.clauseCount; i++) {
const clauseType = executionIr.clauseType[i]
const clauseKind = CODE_TO_CLAUSE_KIND[clauseType]
if (!clauseKind) {
// Unsupported clause type — emit a diagnostic-skipped descriptor
clauses.push({
clauseId: `clause_${i}`,
clauseKind: 'unknown',
version: 1,
subjectRef: executionIr.clauseSubject[i],
referenceRef: executionIr.clauseReference[i] || undefined,
frameRef: executionIr.clauseFrame[i] || undefined,
options: { unsupported: true, rawType: clauseType },
})
continue
}
// Reconstruct bounds from packed arg0/arg1
const bounds: Record<string, number> = {}
const arg0 = executionIr.clauseArg0[i]
const arg1 = executionIr.clauseArg1[i]
if (clauseKind.startsWith('relation.') || clauseKind.startsWith('alignment.')) {
if (arg0 !== Number.NEGATIVE_INFINITY) bounds.minGap = arg0
if (arg1 !== Number.POSITIVE_INFINITY) bounds.maxGap = arg1
} else if (clauseKind.startsWith('size.')) {
if (arg0 !== Number.NEGATIVE_INFINITY) bounds.min = arg0
if (arg1 !== Number.POSITIVE_INFINITY) bounds.max = arg1
}
// Decode flags into options
const flags = executionIr.clauseFlags[i]
const options: Record<string, unknown> = {}
if (flags & 1) options.quantifier = 'all'
if (flags & 2) options.quantifier = 'any'
if (flags & 4) options.quantifier = 'none'
if (flags & 8) options.inStackingContext = true
const subjectRef = executionIr.clauseSubject[i]
const referenceRef = executionIr.clauseReference[i]
clauses.push({
clauseId: `clause_${i}`,
clauseKind,
version: 1,
subjectRef: subjectRef || undefined,
referenceRef: referenceRef || undefined,
frameRef: executionIr.clauseFrame[i] || undefined,
stateRef: context.stateIds[executionIr.clauseState[i]] || undefined,
timelineRef: context.timelineIds[executionIr.clauseTimeline[i]] || undefined,
envGuardRef: context.envGuardIds[executionIr.clauseEnvGuard[i]] || undefined,
toleranceRef: context.toleranceIds[executionIr.clauseTolerance[i]] || undefined,
bounds: Object.keys(bounds).length > 0 ? bounds : undefined,
options: Object.keys(options).length > 0 ? options : undefined,
})
}
return clauses
}
// ---------------------------------------------------------------------------
// CDP Snapshot → Core GeometryWorld Conversion
// ---------------------------------------------------------------------------
/**
* Convert a CDP GeometryWorldSnapshot (plain arrays) into a core GeometryWorld
* (typed arrays). This is the default normalizer used when no custom
* normalizer is injected.
*/
export function convertSnapshotToWorld(snapshot: GeometryWorldSnapshot): GeometryWorld {
const strings: StringTable = snapshot.strings
const env: WorldEnvironment = {
viewportWidth: snapshot.env.viewportWidth,
viewportHeight: snapshot.env.viewportHeight,
deviceScaleFactor: snapshot.env.deviceScaleFactor,
colorScheme: snapshot.env.colorScheme,
pointer: snapshot.env.pointer,
hover: snapshot.env.hover,
reducedMotion: snapshot.env.reducedMotion,
locale: snapshot.env.locale,
writingMode: snapshot.env.writingMode,
}
const source: WorldSource = {
url: snapshot.source.url,
browserName: snapshot.source.browserName,
browserVersion: snapshot.source.browserVersion,
engine: 'chromium-cdp',
extractedAt: new Date(snapshot.source.extractedAt).getTime(),
}
// Convert DOM nodes from CDP format to core typed arrays
const domNodes = snapshot.dom.nodes as Array<{
nodeId: number
backendNodeId: number
parentNodeId: number
firstChildIndex: number
childCount: number
shadowRootKind: string
tagNameStringId: number
roleStringId: number
ariaNameStringId: number
}>
const dom: DomTable = {
nodeId: new Uint32Array(domNodes.map((n: any) => n.nodeId)),
backendNodeId: new Uint32Array(domNodes.map((n: any) => n.backendNodeId)),
parentNodeId: new Uint32Array(domNodes.map((n: any) => n.parentNodeId)),
firstChildIndex: new Uint32Array(domNodes.map((n: any) => n.firstChildIndex)),
childCount: new Uint16Array(domNodes.map((n: any) => n.childCount)),
shadowRootKind: new Uint8Array(domNodes.map((n: any) => {
if (n.shadowRootKind === 'open') return 1
if (n.shadowRootKind === 'closed') return 2
return 0
})),
tagNameStringId: new Uint32Array(domNodes.map((n: any) => n.tagNameStringId)),
roleStringId: new Uint32Array(domNodes.map((n: any) => n.roleStringId)),
ariaNameStringId: new Uint32Array(domNodes.map((n: any) => n.ariaNameStringId)),
}
const subjects: SubjectsTable = {
ids: new Uint32Array(snapshot.subjects.ids),
domNodeId: new Uint32Array(snapshot.subjects.domNodeId),
subjectKind: new Uint16Array(snapshot.subjects.subjectKind),
primaryBoxId: new Uint32Array(snapshot.subjects.primaryBoxId),
firstFragmentId: new Uint32Array(snapshot.subjects.firstFragmentId),
fragmentCount: new Uint16Array(snapshot.subjects.fragmentCount),
firstTextRunId: new Uint32Array(snapshot.subjects.firstTextRunId),
textRunCount: new Uint16Array(snapshot.subjects.textRunCount),
}
const frames: FramesTable = {
frameId: new Uint32Array(snapshot.frames.frameId),
frameKind: new Uint16Array(snapshot.frames.frameKind),
ownerSubjectId: new Uint32Array(snapshot.frames.ownerSubjectId),
parentFrameId: new Uint32Array(snapshot.frames.parentFrameId),
originX: new Float64Array(snapshot.frames.originX),
originY: new Float64Array(snapshot.frames.originY),
axisMatrixStart: new Uint32Array(snapshot.frames.axisMatrixStart),
clipRectId: new Uint32Array(snapshot.frames.clipRectId),
scrollContainerId: new Uint32Array(snapshot.frames.scrollContainerId),
writingMode: new Uint16Array(snapshot.frames.writingMode),
}
const matrices: MatricesTable = {
values: new Float64Array(snapshot.matrices.values),
}
const rects: RectsTable = {
rectId: new Uint32Array(snapshot.rects.rectId),
left: new Float64Array(snapshot.rects.left),
top: new Float64Array(snapshot.rects.top),
right: new Float64Array(snapshot.rects.right),
bottom: new Float64Array(snapshot.rects.bottom),
}
const boxes: BoxesTable = {
boxId: new Uint32Array(snapshot.boxes.boxId),
subjectId: new Uint32Array(snapshot.boxes.subjectId),
frameId: new Uint32Array(snapshot.boxes.frameId),
borderLeft: new Float64Array(snapshot.boxes.borderLeft),
borderTop: new Float64Array(snapshot.boxes.borderTop),
borderRight: new Float64Array(snapshot.boxes.borderRight),
borderBottom: new Float64Array(snapshot.boxes.borderBottom),
paddingLeft: new Float64Array(snapshot.boxes.paddingLeft),
paddingTop: new Float64Array(snapshot.boxes.paddingTop),
paddingRight: new Float64Array(snapshot.boxes.paddingRight),
paddingBottom: new Float64Array(snapshot.boxes.paddingBottom),
contentLeft: new Float64Array(snapshot.boxes.contentLeft),
contentTop: new Float64Array(snapshot.boxes.contentTop),
contentRight: new Float64Array(snapshot.boxes.contentRight),
contentBottom: new Float64Array(snapshot.boxes.contentBottom),
}
const fragments: FragmentsTable = {
fragmentId: new Uint32Array(snapshot.fragments.fragmentId),
subjectId: new Uint32Array(snapshot.fragments.subjectId),
fragmentKind: new Uint16Array(snapshot.fragments.fragmentKind),
boxLeft: new Float64Array(snapshot.fragments.boxLeft),
boxTop: new Float64Array(snapshot.fragments.boxTop),
boxRight: new Float64Array(snapshot.fragments.boxRight),
boxBottom: new Float64Array(snapshot.fragments.boxBottom),
lineIndex: new Int32Array(snapshot.fragments.lineIndex),
flowIndex: new Int32Array(snapshot.fragments.flowIndex),
parentFragmentId: new Uint32Array(snapshot.fragments.parentFragmentId),
}
const transforms: TransformsTable = {
transformId: new Uint32Array(snapshot.transforms.transformId),
subjectId: new Uint32Array(snapshot.transforms.subjectId),
matrixStart: new Uint32Array(snapshot.transforms.matrixStart),
matrixLength: new Uint16Array(snapshot.transforms.matrixLength),
originX: new Float64Array(snapshot.transforms.originX),
originY: new Float64Array(snapshot.transforms.originY),
}
const styles: StylesTable = {
subjectId: new Uint32Array(snapshot.styles.subjectId),
display: new Uint16Array(snapshot.styles.display),
position: new Uint16Array(snapshot.styles.position),
zIndexKind: new Uint8Array(snapshot.styles.zIndexKind),
zIndexValue: new Int32Array(snapshot.styles.zIndexValue),
overflowX: new Uint16Array(snapshot.styles.overflowX),
overflowY: new Uint16Array(snapshot.styles.overflowY),
opacity: new Float32Array(snapshot.styles.opacity),
visibility: new Uint16Array(snapshot.styles.visibility),
containFlags: new Uint32Array(snapshot.styles.containFlags),
pointerEvents: new Uint16Array(snapshot.styles.pointerEvents),
lineHeight: new Float64Array(snapshot.styles.lineHeight),
fontFamilyStringId: new Uint32Array(snapshot.styles.fontFamilyStringId),
fontSize: new Float64Array(snapshot.styles.fontSize),
fontWeight: new Uint16Array(snapshot.styles.fontWeight),
}
const topology: TopologyTable = {
containingBlockOf: new Uint32Array(snapshot.topology.containingBlockOf),
nearestPositionedAncestorOf: new Uint32Array(snapshot.topology.nearestPositionedAncestorOf),
scrollContainerOf: new Uint32Array(snapshot.topology.scrollContainerOf),
stackingContextOf: new Uint32Array(snapshot.topology.stackingContextOf),
formattingContextOf: new Uint32Array(snapshot.topology.formattingContextOf),
clippingRootOf: new Uint32Array(snapshot.topology.clippingRootOf),
paintOrderBucket: new Uint16Array(snapshot.topology.paintOrderBucket),
paintOrderIndex: new Uint32Array(snapshot.topology.paintOrderIndex),
}
const scroll: ScrollTable = {
containerId: new Uint32Array(snapshot.scroll.map((s: any) => s.containerId)),
scrollLeft: new Float64Array(snapshot.scroll.map((s: any) => s.scrollLeft)),
scrollTop: new Float64Array(snapshot.scroll.map((s: any) => s.scrollTop)),
scrollWidth: new Float64Array(snapshot.scroll.map((s: any) => s.scrollWidth)),
scrollHeight: new Float64Array(snapshot.scroll.map((s: any) => s.scrollHeight)),
clientWidth: new Float64Array(snapshot.scroll.map((s: any) => s.clientWidth)),
clientHeight: new Float64Array(snapshot.scroll.map((s: any) => s.clientHeight)),
}
const clipping: ClippingTable = {
clipNodeId: new Uint32Array(snapshot.clipping.map((c: any) => c.clipNodeId)),
subjectId: new Uint32Array(snapshot.clipping.map((c: any) => c.subjectId)),
clipKind: new Uint16Array(snapshot.clipping.map((c: any) => c.clipKind)),
clipLeft: new Float64Array(snapshot.clipping.map((c: any) => c.clipLeft)),
clipTop: new Float64Array(snapshot.clipping.map((c: any) => c.clipTop)),
clipRight: new Float64Array(snapshot.clipping.map((c: any) => c.clipRight)),
clipBottom: new Float64Array(snapshot.clipping.map((c: any) => c.clipBottom)),
parentClipNodeId: new Uint32Array(snapshot.clipping.map((c: any) => c.parentClipNodeId)),
}
// Empty tables for fields not yet provided by CDP extractor
const text: TextTable = {
runId: new Uint32Array(0),
subjectId: new Uint32Array(0),
contentStringId: new Uint32Array(0),
lineBoxId: new Uint32Array(0),
inkLeft: new Float64Array(0),
inkTop: new Float64Array(0),
inkRight: new Float64Array(0),
inkBottom: new Float64Array(0),
baselineY: new Float64Array(0),
capHeight: new Float64Array(0),
computedLineHeight: new Float64Array(0),
}
const paint: PaintTable = {
paintNodeId: new Uint32Array(0),
subjectId: new Uint32Array(0),
stackingContextId: new Uint32Array(0),
bucket: new Uint16Array(0),
localPaintIndex: new Uint32Array(0),
}
const visibility: VisibilityTable = {
subjectId: new Uint32Array(0),
isRendered: new Uint8Array(0),
isVisible: new Uint8Array(0),
visibleArea: new Float64Array(0),
clippedArea: new Float64Array(0),
}
const provenance: ProvenanceTable = {
factId: new Uint32Array(snapshot.provenance.map((p: any) => p.factId)),
extractionStepId: new Uint32Array(snapshot.provenance.map((p: any) => p.extractionStepId)),
sourceKind: new Uint16Array(snapshot.provenance.map((p: any) => p.sourceKind)),
sourceRef: new Uint32Array(snapshot.provenance.map((p: any) => p.sourceRef)),
}
const confidence: ConfidenceTable = {
factId: new Uint32Array(snapshot.confidence.map((c: any) => c.factId)),
confidence: new Float32Array(snapshot.confidence.map((c: any) => c.confidence)),
reasonCode: new Uint16Array(snapshot.confidence.map((c: any) => c.reasonCode)),
}
return {
sceneId: snapshot.sceneId,
snapshotId: snapshot.snapshotId,
env,
source,
strings,
subjects,
dom,
frames,
matrices,
rects,
boxes,
visualBoxes: boxes,
fragments,
transforms,
styles,
text,
topology,
scroll,
clipping,
paint,
visibility,
provenance,
confidence,
}
}
// ---------------------------------------------------------------------------
// Empty World Factory (for error fallback)
// ---------------------------------------------------------------------------
function createEmptyWorld(sceneId: ImhotepId, snapshotId: ImhotepId): GeometryWorld {
return {
sceneId,
snapshotId,
env: {
viewportWidth: 0,
viewportHeight: 0,
deviceScaleFactor: 1,
colorScheme: 'light',
pointer: 'fine',
hover: false,
reducedMotion: 'no-preference',
locale: 'en',
writingMode: 'horizontal-tb',
},
source: {
url: '',
browserName: '',
browserVersion: '',
engine: 'chromium-cdp',
extractedAt: 0,
},
strings: { values: [] },
subjects: {
ids: new Uint32Array(0),
domNodeId: new Uint32Array(0),
subjectKind: new Uint16Array(0),
primaryBoxId: new Uint32Array(0),
firstFragmentId: new Uint32Array(0),
fragmentCount: new Uint16Array(0),
firstTextRunId: new Uint32Array(0),
textRunCount: new Uint16Array(0),
},
dom: {
nodeId: new Uint32Array(0),
backendNodeId: new Uint32Array(0),
parentNodeId: new Uint32Array(0),
firstChildIndex: new Uint32Array(0),
childCount: new Uint16Array(0),
shadowRootKind: new Uint8Array(0),
tagNameStringId: new Uint32Array(0),
roleStringId: new Uint32Array(0),
ariaNameStringId: new Uint32Array(0),
},
frames: {
frameId: new Uint32Array(0),
frameKind: new Uint16Array(0),
ownerSubjectId: new Uint32Array(0),
parentFrameId: new Uint32Array(0),
originX: new Float64Array(0),
originY: new Float64Array(0),
axisMatrixStart: new Uint32Array(0),
clipRectId: new Uint32Array(0),
scrollContainerId: new Uint32Array(0),
writingMode: new Uint16Array(0),
},
matrices: { values: new Float64Array(0) },
rects: {
rectId: new Uint32Array(0),
left: new Float64Array(0),
top: new Float64Array(0),
right: new Float64Array(0),
bottom: new Float64Array(0),
},
boxes: {
boxId: new Uint32Array(0),
subjectId: new Uint32Array(0),
frameId: new Uint32Array(0),
borderLeft: new Float64Array(0),
borderTop: new Float64Array(0),
borderRight: new Float64Array(0),
borderBottom: new Float64Array(0),
paddingLeft: new Float64Array(0),
paddingTop: new Float64Array(0),
paddingRight: new Float64Array(0),
paddingBottom: new Float64Array(0),
contentLeft: new Float64Array(0),
contentTop: new Float64Array(0),
contentRight: new Float64Array(0),
contentBottom: new Float64Array(0),
},
visualBoxes: {
boxId: new Uint32Array(0),
subjectId: new Uint32Array(0),
frameId: new Uint32Array(0),
borderLeft: new Float64Array(0),
borderTop: new Float64Array(0),
borderRight: new Float64Array(0),
borderBottom: new Float64Array(0),
paddingLeft: new Float64Array(0),
paddingTop: new Float64Array(0),
paddingRight: new Float64Array(0),
paddingBottom: new Float64Array(0),
contentLeft: new Float64Array(0),
contentTop: new Float64Array(0),
contentRight: new Float64Array(0),
contentBottom: new Float64Array(0),
},
fragments: {
fragmentId: new Uint32Array(0),
subjectId: new Uint32Array(0),
fragmentKind: new Uint16Array(0),
boxLeft: new Float64Array(0),
boxTop: new Float64Array(0),
boxRight: new Float64Array(0),
boxBottom: new Float64Array(0),
lineIndex: new Int32Array(0),
flowIndex: new Int32Array(0),
parentFragmentId: new Uint32Array(0),
},
transforms: {
transformId: new Uint32Array(0),
subjectId: new Uint32Array(0),
matrixStart: new Uint32Array(0),
matrixLength: new Uint16Array(0),
originX: new Float64Array(0),
originY: new Float64Array(0),
},
styles: {
subjectId: new Uint32Array(0),
display: new Uint16Array(0),
position: new Uint16Array(0),
zIndexKind: new Uint8Array(0),
zIndexValue: new Int32Array(0),
overflowX: new Uint16Array(0),
overflowY: new Uint16Array(0),
opacity: new Float32Array(0),
visibility: new Uint16Array(0),
containFlags: new Uint32Array(0),
pointerEvents: new Uint16Array(0),
lineHeight: new Float64Array(0),
fontFamilyStringId: new Uint32Array(0),
fontSize: new Float64Array(0),
fontWeight: new Uint16Array(0),
},
text: {
runId: new Uint32Array(0),
subjectId: new Uint32Array(0),
contentStringId: new Uint32Array(0),
lineBoxId: new Uint32Array(0),
inkLeft: new Float64Array(0),
inkTop: new Float64Array(0),
inkRight: new Float64Array(0),
inkBottom: new Float64Array(0),
baselineY: new Float64Array(0),
capHeight: new Float64Array(0),
computedLineHeight: new Float64Array(0),
},
topology: {
containingBlockOf: new Uint32Array(0),
nearestPositionedAncestorOf: new Uint32Array(0),
scrollContainerOf: new Uint32Array(0),
stackingContextOf: new Uint32Array(0),
formattingContextOf: new Uint32Array(0),
clippingRootOf: new Uint32Array(0),
paintOrderBucket: new Uint16Array(0),
paintOrderIndex: new Uint32Array(0),
},
scroll: {
containerId: new Uint32Array(0),
scrollLeft: new Float64Array(0),
scrollTop: new Float64Array(0),
scrollWidth: new Float64Array(0),
scrollHeight: new Float64Array(0),
clientWidth: new Float64Array(0),
clientHeight: new Float64Array(0),
},
clipping: {
clipNodeId: new Uint32Array(0),
subjectId: new Uint32Array(0),
clipKind: new Uint16Array(0),
clipLeft: new Float64Array(0),
clipTop: new Float64Array(0),
clipRight: new Float64Array(0),
clipBottom: new Float64Array(0),
parentClipNodeId: new Uint32Array(0),
},
paint: {
paintNodeId: new Uint32Array(0),
subjectId: new Uint32Array(0),
stackingContextId: new Uint32Array(0),
bucket: new Uint16Array(0),
localPaintIndex: new Uint32Array(0),
},
visibility: {
subjectId: new Uint32Array(0),
isRendered: new Uint8Array(0),
isVisible: new Uint8Array(0),
visibleArea: new Float64Array(0),
clippedArea: new Float64Array(0),
},
provenance: {
factId: new Uint32Array(0),
extractionStepId: new Uint32Array(0),
sourceKind: new Uint16Array(0),
sourceRef: new Uint32Array(0),
},
confidence: {
factId: new Uint32Array(0),
confidence: new Float32Array(0),
reasonCode: new Uint16Array(0),
},
}
}
// ---------------------------------------------------------------------------
// Diagnostic Format Converters
// ---------------------------------------------------------------------------
function plannerDiagnosticToCore(d: { code: string; severity: string; message: string; category?: string }): Diagnostic {
return createDiagnostic(
d.code as Diagnostic['code'],
(d.category as Diagnostic['category']) ?? 'extraction-error',
d.severity as Diagnostic['severity'],
d.message,
{ source: 'imhotep-extractor' },
)
}
function cdpDiagnosticToCore(d: { code: string; severity: string; message: string; subjectId?: string; selector?: string }, requestId: string): Diagnostic {
return createDiagnostic(
d.code as Diagnostic['code'],
'extraction-error',
d.severity as Diagnostic['severity'],
d.message,
{
source: 'imhotep-cdp',
sceneId: requestId,
},
)
}
function solverDiagnosticToCore(d: { code: string; severity: string; category: string; message: string; clauseId?: string }): Diagnostic {
return createDiagnostic(
d.code as Diagnostic['code'],
d.category as Diagnostic['category'],
d.severity as Diagnostic['severity'],
d.message,
{
source: 'imhotep-solver',
clauseId: d.clauseId,
},
)
}
function coreDiagnosticToSolver(d: Diagnostic): { code: string; severity: 'error' | 'warning' | 'info'; category: string; message: string; clauseId?: string } {
return {
code: d.code,
severity: d.severity,
category: d.category,
message: d.message,
clauseId: d.clauseId,
}
}