// World materialization from extraction results // Orchestrates normalization into an immutable geometry world import { GeometryWorld, StringTable, Matrices, buildWorldIndex, WorldIndex, } from './world.js' import { RawExtractionResult, normalizeEnv, normalizeSource, normalizeSubjects, normalizeDom, normalizeFrames, normalizeRects, normalizeBoxes, normalizeFragments, normalizeTransforms, normalizeStyles, normalizeText, normalizeTopology, normalizeScroll, normalizeClipping, normalizePaint, normalizeVisibility, normalizeProvenance, normalizeConfidence, } from './normalize.js' import { IDENTITY_4X4 } from './transforms.js' // --------------------------------------------------------------------------- // Materialization // --------------------------------------------------------------------------- /** * Materialize a geometry world from raw extraction results. * The returned world is immutable and fully indexed. */ export function materializeWorld(raw: RawExtractionResult): GeometryWorld { const strings: StringTable = { values: [] } const env = normalizeEnv(raw.env, strings) const source = normalizeSource(raw.source, strings) const sceneId = strings.values.indexOf(raw.sceneId) const snapshotId = strings.values.indexOf(raw.snapshotId) const subjects = normalizeSubjects(raw.subjects) const dom = normalizeDom(raw.dom, strings) const rects = normalizeRects(raw.rects) const boxes = normalizeBoxes(raw.boxes) const fragments = normalizeFragments(raw.fragments) let matrices: Matrices = { values: new Float64Array(0) } const framesResult = normalizeFrames(raw.frames, strings, matrices) const frames = framesResult.frames matrices = framesResult.matrices const transformsResult = normalizeTransforms(raw.transforms, matrices) const transforms = transformsResult.transforms matrices = transformsResult.matrices const styles = normalizeStyles(raw.styles, strings) const text = normalizeText(raw.text, strings) const topology = normalizeTopology(raw.topology, raw.subjects.length) const scroll = normalizeScroll(raw.scroll) const clipping = normalizeClipping(raw.clipping, strings) const paint = normalizePaint(raw.paint) const visibility = normalizeVisibility(raw.visibility) const provenance = normalizeProvenance(raw.provenance, strings) const confidence = normalizeConfidence(raw.confidence, strings) const world: GeometryWorld = { sceneId: sceneId >= 0 ? sceneId : strings.values.push(raw.sceneId) - 1, snapshotId: snapshotId >= 0 ? snapshotId : strings.values.push(raw.snapshotId) - 1, env, source, strings, subjects, dom, frames, matrices, rects, boxes, fragments, transforms, styles, text, topology, scroll, clipping, paint, visibility, provenance, confidence, selectorIndex: new Map(), ancestorIndex: new Map(), lineBoxIndex: new Map(), textRunIndex: new Map(), } buildDomainIndexes(world) return world } // --------------------------------------------------------------------------- // Domain index construction (V1.1) // --------------------------------------------------------------------------- function buildDomainIndexes(world: GeometryWorld): void { // Build selector index: tag names and class selectors for element subjects. const selectorIndex = world.selectorIndex const domNodeById = new Map() for (let i = 0; i < world.dom.nodeId.length; i++) { domNodeById.set(world.dom.nodeId[i], i) } for (let i = 0; i < world.subjects.ids.length; i++) { const subjectId = world.subjects.ids[i] const kind = world.subjects.subjectKind[i] const domNodeId = world.subjects.domNodeId[i] if (kind !== 1 /* SubjectKind.Element */ || domNodeId === 0) continue const domIdx = domNodeById.get(domNodeId) if (domIdx === undefined) continue // Tag selector const tagName = world.strings.values[world.dom.tagNameStringId[domIdx]] if (tagName) { const tagSelector = tagName.toLowerCase() const tagList = selectorIndex.get(tagSelector) ?? [] tagList.push(subjectId) selectorIndex.set(tagSelector, tagList) } // Class selectors const classIds = world.dom.classNameStringIds[domIdx] for (let c = 0; c < classIds.length; c++) { const className = world.strings.values[classIds[c]] if (className) { const classSelector = '.' + className.toLowerCase() const classList = selectorIndex.get(classSelector) ?? [] classList.push(subjectId) selectorIndex.set(classSelector, classList) } } } // Sort all selector lists for deterministic enumeration for (const [key, ids] of selectorIndex) { ids.sort((a, b) => a - b) selectorIndex.set(key, ids) } // Build ancestor index: element id -> sorted descendant ids (transitive) const ancestorIndex = world.ancestorIndex const childrenByParent = new Map() for (let i = 0; i < world.dom.nodeId.length; i++) { const parentId = world.dom.parentNodeId[i] if (parentId !== 0) { const arr = childrenByParent.get(parentId) ?? [] arr.push(world.dom.nodeId[i]) childrenByParent.set(parentId, arr) } } // Map DOM node IDs to subject IDs for elements const subjectByDomNodeId = new Map() for (let i = 0; i < world.subjects.ids.length; i++) { const domNodeId = world.subjects.domNodeId[i] if (domNodeId !== 0) { subjectByDomNodeId.set(domNodeId, world.subjects.ids[i]) } } function collectDescendants(domNodeId: number, out: number[]): void { const childDomIds = childrenByParent.get(domNodeId) if (!childDomIds) return for (const childDomId of childDomIds) { const childSubjectId = subjectByDomNodeId.get(childDomId) if (childSubjectId !== undefined) { out.push(childSubjectId) collectDescendants(childDomId, out) } } } for (let i = 0; i < world.dom.nodeId.length; i++) { const domNodeId = world.dom.nodeId[i] const subjectId = subjectByDomNodeId.get(domNodeId) if (subjectId !== undefined) { const descendants: number[] = [] collectDescendants(domNodeId, descendants) descendants.sort((a, b) => a - b) ancestorIndex.set(subjectId, descendants) } } // Build lineBoxIndex: text node subject id -> line box fragment ids const lineBoxIndex = world.lineBoxIndex for (let i = 0; i < world.fragments.fragmentId.length; i++) { if (world.fragments.fragmentKind[i] === 1 /* FragmentKind.Line */) { const subjectId = world.fragments.subjectId[i] const arr = lineBoxIndex.get(subjectId) ?? [] arr.push(world.fragments.fragmentId[i]) lineBoxIndex.set(subjectId, arr) } } for (const [key, ids] of lineBoxIndex) { ids.sort((a, b) => a - b) lineBoxIndex.set(key, ids) } // Build textRunIndex: text node subject id -> text run ids const textRunIndex = world.textRunIndex for (let i = 0; i < world.text.runId.length; i++) { const subjectId = world.text.subjectId[i] const arr = textRunIndex.get(subjectId) ?? [] arr.push(world.text.runId[i]) textRunIndex.set(subjectId, arr) } for (const [key, ids] of textRunIndex) { ids.sort((a, b) => a - b) textRunIndex.set(key, ids) } } /** * Materialize a world and build its index in one call. */ export function materializeWorldWithIndex(raw: RawExtractionResult): { world: GeometryWorld index: WorldIndex } { const world = materializeWorld(raw) const index = buildWorldIndex(world) return { world, index } } // --------------------------------------------------------------------------- // World validation // --------------------------------------------------------------------------- export interface WorldValidationError { code: string message: string table: string index: number } /** * Validate a geometry world for structural integrity. * Returns a list of validation errors (empty if valid). */ export function validateWorld(world: GeometryWorld): WorldValidationError[] { const errors: WorldValidationError[] = [] // Check that subjects have valid domNodeId references for (let i = 0; i < world.subjects.ids.length; i++) { const domNodeId = world.subjects.domNodeId[i] if (domNodeId === 0) continue let found = false for (let j = 0; j < world.dom.nodeId.length; j++) { if (world.dom.nodeId[j] === domNodeId) { found = true break } } if (!found) { errors.push({ code: 'INVALID_DOM_REF', message: `Subject ${world.subjects.ids[i]} references missing domNodeId ${domNodeId}`, table: 'subjects', index: i, }) } } // Check that boxes have valid subject and frame references for (let i = 0; i < world.boxes.boxId.length; i++) { const subjectId = world.boxes.subjectId[i] let foundSubject = false for (let j = 0; j < world.subjects.ids.length; j++) { if (world.subjects.ids[j] === subjectId) { foundSubject = true break } } if (!foundSubject) { errors.push({ code: 'INVALID_SUBJECT_REF', message: `Box ${world.boxes.boxId[i]} references missing subjectId ${subjectId}`, table: 'boxes', index: i, }) } const frameId = world.boxes.frameId[i] let foundFrame = false for (let j = 0; j < world.frames.frameId.length; j++) { if (world.frames.frameId[j] === frameId) { foundFrame = true break } } if (!foundFrame) { errors.push({ code: 'INVALID_FRAME_REF', message: `Box ${world.boxes.boxId[i]} references missing frameId ${frameId}`, table: 'boxes', index: i, }) } } // Check that frames have valid parent references for (let i = 0; i < world.frames.frameId.length; i++) { const parentId = world.frames.parentFrameId[i] if (parentId === 0) continue let found = false for (let j = 0; j < world.frames.frameId.length; j++) { if (world.frames.frameId[j] === parentId) { found = true break } } if (!found) { errors.push({ code: 'INVALID_PARENT_FRAME', message: `Frame ${world.frames.frameId[i]} references missing parentFrameId ${parentId}`, table: 'frames', index: i, }) } } return errors }