Files
Imhotep/packages/imhotep-geometry/src/materialize.ts
T

344 lines
10 KiB
TypeScript
Raw Normal View History

// World materialization from extraction results
// Orchestrates normalization into an immutable geometry world
import {
GeometryWorld,
StringTable,
Matrices,
buildWorldIndex,
WorldIndex,
} from './world.js'
import {
RawExtractionResult,
normalizeEnv,
normalizeSource,
normalizeSubjects,
normalizeDom,
normalizeFrames,
normalizeRects,
normalizeBoxes,
normalizeFragments,
normalizeTransforms,
normalizeStyles,
normalizeText,
normalizeTopology,
normalizeScroll,
normalizeClipping,
normalizePaint,
normalizeVisibility,
normalizeProvenance,
normalizeConfidence,
} from './normalize.js'
import { IDENTITY_4X4 } from './transforms.js'
// ---------------------------------------------------------------------------
// Materialization
// ---------------------------------------------------------------------------
/**
* Materialize a geometry world from raw extraction results.
* The returned world is immutable and fully indexed.
*/
export function materializeWorld(raw: RawExtractionResult): GeometryWorld {
const strings: StringTable = { values: [] }
const env = normalizeEnv(raw.env, strings)
const source = normalizeSource(raw.source, strings)
const sceneId = strings.values.indexOf(raw.sceneId)
const snapshotId = strings.values.indexOf(raw.snapshotId)
const subjects = normalizeSubjects(raw.subjects)
const dom = normalizeDom(raw.dom, strings)
const rects = normalizeRects(raw.rects)
const boxes = normalizeBoxes(raw.boxes)
const fragments = normalizeFragments(raw.fragments)
let matrices: Matrices = { values: new Float64Array(0) }
const framesResult = normalizeFrames(raw.frames, strings, matrices)
const frames = framesResult.frames
matrices = framesResult.matrices
const transformsResult = normalizeTransforms(raw.transforms, matrices)
const transforms = transformsResult.transforms
matrices = transformsResult.matrices
const styles = normalizeStyles(raw.styles, strings)
const text = normalizeText(raw.text, strings)
const topology = normalizeTopology(raw.topology, raw.subjects.length)
const scroll = normalizeScroll(raw.scroll)
const clipping = normalizeClipping(raw.clipping, strings)
const paint = normalizePaint(raw.paint)
const visibility = normalizeVisibility(raw.visibility)
const provenance = normalizeProvenance(raw.provenance, strings)
const confidence = normalizeConfidence(raw.confidence, strings)
const world: GeometryWorld = {
sceneId: sceneId >= 0 ? sceneId : strings.values.push(raw.sceneId) - 1,
snapshotId: snapshotId >= 0 ? snapshotId : strings.values.push(raw.snapshotId) - 1,
env,
source,
strings,
subjects,
dom,
frames,
matrices,
rects,
boxes,
fragments,
transforms,
styles,
text,
topology,
scroll,
clipping,
paint,
visibility,
provenance,
confidence,
selectorIndex: new Map<string, number[]>(),
ancestorIndex: new Map<number, number[]>(),
lineBoxIndex: new Map<number, number[]>(),
textRunIndex: new Map<number, number[]>(),
}
buildDomainIndexes(world)
return world
}
// ---------------------------------------------------------------------------
// Domain index construction (V1.1)
// ---------------------------------------------------------------------------
function buildDomainIndexes(world: GeometryWorld): void {
// Build selector index: tag names and class selectors for element subjects.
const selectorIndex = world.selectorIndex
const domNodeById = new Map<number, number>()
for (let i = 0; i < world.dom.nodeId.length; i++) {
domNodeById.set(world.dom.nodeId[i], i)
}
for (let i = 0; i < world.subjects.ids.length; i++) {
const subjectId = world.subjects.ids[i]
const kind = world.subjects.subjectKind[i]
const domNodeId = world.subjects.domNodeId[i]
if (kind !== 1 /* SubjectKind.Element */ || domNodeId === 0) continue
const domIdx = domNodeById.get(domNodeId)
if (domIdx === undefined) continue
// Tag selector
const tagName = world.strings.values[world.dom.tagNameStringId[domIdx]]
if (tagName) {
const tagSelector = tagName.toLowerCase()
const tagList = selectorIndex.get(tagSelector) ?? []
tagList.push(subjectId)
selectorIndex.set(tagSelector, tagList)
}
// Class selectors
const classIds = world.dom.classNameStringIds[domIdx]
for (let c = 0; c < classIds.length; c++) {
const className = world.strings.values[classIds[c]]
if (className) {
const classSelector = '.' + className.toLowerCase()
const classList = selectorIndex.get(classSelector) ?? []
classList.push(subjectId)
selectorIndex.set(classSelector, classList)
}
}
}
// Sort all selector lists for deterministic enumeration
for (const [key, ids] of selectorIndex) {
ids.sort((a, b) => a - b)
selectorIndex.set(key, ids)
}
// Build ancestor index: element id -> sorted descendant ids (transitive)
const ancestorIndex = world.ancestorIndex
const childrenByParent = new Map<number, number[]>()
for (let i = 0; i < world.dom.nodeId.length; i++) {
const parentId = world.dom.parentNodeId[i]
if (parentId !== 0) {
const arr = childrenByParent.get(parentId) ?? []
arr.push(world.dom.nodeId[i])
childrenByParent.set(parentId, arr)
}
}
// Map DOM node IDs to subject IDs for elements
const subjectByDomNodeId = new Map<number, number>()
for (let i = 0; i < world.subjects.ids.length; i++) {
const domNodeId = world.subjects.domNodeId[i]
if (domNodeId !== 0) {
subjectByDomNodeId.set(domNodeId, world.subjects.ids[i])
}
}
function collectDescendants(domNodeId: number, out: number[]): void {
const childDomIds = childrenByParent.get(domNodeId)
if (!childDomIds) return
for (const childDomId of childDomIds) {
const childSubjectId = subjectByDomNodeId.get(childDomId)
if (childSubjectId !== undefined) {
out.push(childSubjectId)
collectDescendants(childDomId, out)
}
}
}
for (let i = 0; i < world.dom.nodeId.length; i++) {
const domNodeId = world.dom.nodeId[i]
const subjectId = subjectByDomNodeId.get(domNodeId)
if (subjectId !== undefined) {
const descendants: number[] = []
collectDescendants(domNodeId, descendants)
descendants.sort((a, b) => a - b)
ancestorIndex.set(subjectId, descendants)
}
}
// Build lineBoxIndex: text node subject id -> line box fragment ids
const lineBoxIndex = world.lineBoxIndex
for (let i = 0; i < world.fragments.fragmentId.length; i++) {
if (world.fragments.fragmentKind[i] === 1 /* FragmentKind.Line */) {
const subjectId = world.fragments.subjectId[i]
const arr = lineBoxIndex.get(subjectId) ?? []
arr.push(world.fragments.fragmentId[i])
lineBoxIndex.set(subjectId, arr)
}
}
for (const [key, ids] of lineBoxIndex) {
ids.sort((a, b) => a - b)
lineBoxIndex.set(key, ids)
}
// Build textRunIndex: text node subject id -> text run ids
const textRunIndex = world.textRunIndex
for (let i = 0; i < world.text.runId.length; i++) {
const subjectId = world.text.subjectId[i]
const arr = textRunIndex.get(subjectId) ?? []
arr.push(world.text.runId[i])
textRunIndex.set(subjectId, arr)
}
for (const [key, ids] of textRunIndex) {
ids.sort((a, b) => a - b)
textRunIndex.set(key, ids)
}
}
/**
* Materialize a world and build its index in one call.
*/
export function materializeWorldWithIndex(raw: RawExtractionResult): {
world: GeometryWorld
index: WorldIndex
} {
const world = materializeWorld(raw)
const index = buildWorldIndex(world)
return { world, index }
}
// ---------------------------------------------------------------------------
// World validation
// ---------------------------------------------------------------------------
export interface WorldValidationError {
code: string
message: string
table: string
index: number
}
/**
* Validate a geometry world for structural integrity.
* Returns a list of validation errors (empty if valid).
*/
export function validateWorld(world: GeometryWorld): WorldValidationError[] {
const errors: WorldValidationError[] = []
// Check that subjects have valid domNodeId references
for (let i = 0; i < world.subjects.ids.length; i++) {
const domNodeId = world.subjects.domNodeId[i]
if (domNodeId === 0) continue
let found = false
for (let j = 0; j < world.dom.nodeId.length; j++) {
if (world.dom.nodeId[j] === domNodeId) {
found = true
break
}
}
if (!found) {
errors.push({
code: 'INVALID_DOM_REF',
message: `Subject ${world.subjects.ids[i]} references missing domNodeId ${domNodeId}`,
table: 'subjects',
index: i,
})
}
}
// Check that boxes have valid subject and frame references
for (let i = 0; i < world.boxes.boxId.length; i++) {
const subjectId = world.boxes.subjectId[i]
let foundSubject = false
for (let j = 0; j < world.subjects.ids.length; j++) {
if (world.subjects.ids[j] === subjectId) {
foundSubject = true
break
}
}
if (!foundSubject) {
errors.push({
code: 'INVALID_SUBJECT_REF',
message: `Box ${world.boxes.boxId[i]} references missing subjectId ${subjectId}`,
table: 'boxes',
index: i,
})
}
const frameId = world.boxes.frameId[i]
let foundFrame = false
for (let j = 0; j < world.frames.frameId.length; j++) {
if (world.frames.frameId[j] === frameId) {
foundFrame = true
break
}
}
if (!foundFrame) {
errors.push({
code: 'INVALID_FRAME_REF',
message: `Box ${world.boxes.boxId[i]} references missing frameId ${frameId}`,
table: 'boxes',
index: i,
})
}
}
// Check that frames have valid parent references
for (let i = 0; i < world.frames.frameId.length; i++) {
const parentId = world.frames.parentFrameId[i]
if (parentId === 0) continue
let found = false
for (let j = 0; j < world.frames.frameId.length; j++) {
if (world.frames.frameId[j] === parentId) {
found = true
break
}
}
if (!found) {
errors.push({
code: 'INVALID_PARENT_FRAME',
message: `Frame ${world.frames.frameId[i]} references missing parentFrameId ${parentId}`,
table: 'frames',
index: i,
})
}
}
return errors
}