v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,343 @@
|
||||
// World materialization from extraction results
|
||||
// Orchestrates normalization into an immutable geometry world
|
||||
|
||||
import {
|
||||
GeometryWorld,
|
||||
StringTable,
|
||||
Matrices,
|
||||
buildWorldIndex,
|
||||
WorldIndex,
|
||||
} from './world.js'
|
||||
import {
|
||||
RawExtractionResult,
|
||||
normalizeEnv,
|
||||
normalizeSource,
|
||||
normalizeSubjects,
|
||||
normalizeDom,
|
||||
normalizeFrames,
|
||||
normalizeRects,
|
||||
normalizeBoxes,
|
||||
normalizeFragments,
|
||||
normalizeTransforms,
|
||||
normalizeStyles,
|
||||
normalizeText,
|
||||
normalizeTopology,
|
||||
normalizeScroll,
|
||||
normalizeClipping,
|
||||
normalizePaint,
|
||||
normalizeVisibility,
|
||||
normalizeProvenance,
|
||||
normalizeConfidence,
|
||||
} from './normalize.js'
|
||||
import { IDENTITY_4X4 } from './transforms.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Materialization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Materialize a geometry world from raw extraction results.
|
||||
* The returned world is immutable and fully indexed.
|
||||
*/
|
||||
export function materializeWorld(raw: RawExtractionResult): GeometryWorld {
|
||||
const strings: StringTable = { values: [] }
|
||||
|
||||
const env = normalizeEnv(raw.env, strings)
|
||||
const source = normalizeSource(raw.source, strings)
|
||||
|
||||
const sceneId = strings.values.indexOf(raw.sceneId)
|
||||
const snapshotId = strings.values.indexOf(raw.snapshotId)
|
||||
|
||||
const subjects = normalizeSubjects(raw.subjects)
|
||||
const dom = normalizeDom(raw.dom, strings)
|
||||
const rects = normalizeRects(raw.rects)
|
||||
const boxes = normalizeBoxes(raw.boxes)
|
||||
const fragments = normalizeFragments(raw.fragments)
|
||||
|
||||
let matrices: Matrices = { values: new Float64Array(0) }
|
||||
|
||||
const framesResult = normalizeFrames(raw.frames, strings, matrices)
|
||||
const frames = framesResult.frames
|
||||
matrices = framesResult.matrices
|
||||
|
||||
const transformsResult = normalizeTransforms(raw.transforms, matrices)
|
||||
const transforms = transformsResult.transforms
|
||||
matrices = transformsResult.matrices
|
||||
|
||||
const styles = normalizeStyles(raw.styles, strings)
|
||||
const text = normalizeText(raw.text, strings)
|
||||
const topology = normalizeTopology(raw.topology, raw.subjects.length)
|
||||
const scroll = normalizeScroll(raw.scroll)
|
||||
const clipping = normalizeClipping(raw.clipping, strings)
|
||||
const paint = normalizePaint(raw.paint)
|
||||
const visibility = normalizeVisibility(raw.visibility)
|
||||
const provenance = normalizeProvenance(raw.provenance, strings)
|
||||
const confidence = normalizeConfidence(raw.confidence, strings)
|
||||
|
||||
const world: GeometryWorld = {
|
||||
sceneId: sceneId >= 0 ? sceneId : strings.values.push(raw.sceneId) - 1,
|
||||
snapshotId: snapshotId >= 0 ? snapshotId : strings.values.push(raw.snapshotId) - 1,
|
||||
env,
|
||||
source,
|
||||
strings,
|
||||
subjects,
|
||||
dom,
|
||||
frames,
|
||||
matrices,
|
||||
rects,
|
||||
boxes,
|
||||
fragments,
|
||||
transforms,
|
||||
styles,
|
||||
text,
|
||||
topology,
|
||||
scroll,
|
||||
clipping,
|
||||
paint,
|
||||
visibility,
|
||||
provenance,
|
||||
confidence,
|
||||
selectorIndex: new Map<string, number[]>(),
|
||||
ancestorIndex: new Map<number, number[]>(),
|
||||
lineBoxIndex: new Map<number, number[]>(),
|
||||
textRunIndex: new Map<number, number[]>(),
|
||||
}
|
||||
|
||||
buildDomainIndexes(world)
|
||||
|
||||
return world
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Domain index construction (V1.1)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildDomainIndexes(world: GeometryWorld): void {
|
||||
// Build selector index: tag names and class selectors for element subjects.
|
||||
const selectorIndex = world.selectorIndex
|
||||
const domNodeById = new Map<number, number>()
|
||||
for (let i = 0; i < world.dom.nodeId.length; i++) {
|
||||
domNodeById.set(world.dom.nodeId[i], i)
|
||||
}
|
||||
|
||||
for (let i = 0; i < world.subjects.ids.length; i++) {
|
||||
const subjectId = world.subjects.ids[i]
|
||||
const kind = world.subjects.subjectKind[i]
|
||||
const domNodeId = world.subjects.domNodeId[i]
|
||||
if (kind !== 1 /* SubjectKind.Element */ || domNodeId === 0) continue
|
||||
|
||||
const domIdx = domNodeById.get(domNodeId)
|
||||
if (domIdx === undefined) continue
|
||||
|
||||
// Tag selector
|
||||
const tagName = world.strings.values[world.dom.tagNameStringId[domIdx]]
|
||||
if (tagName) {
|
||||
const tagSelector = tagName.toLowerCase()
|
||||
const tagList = selectorIndex.get(tagSelector) ?? []
|
||||
tagList.push(subjectId)
|
||||
selectorIndex.set(tagSelector, tagList)
|
||||
}
|
||||
|
||||
// Class selectors
|
||||
const classIds = world.dom.classNameStringIds[domIdx]
|
||||
for (let c = 0; c < classIds.length; c++) {
|
||||
const className = world.strings.values[classIds[c]]
|
||||
if (className) {
|
||||
const classSelector = '.' + className.toLowerCase()
|
||||
const classList = selectorIndex.get(classSelector) ?? []
|
||||
classList.push(subjectId)
|
||||
selectorIndex.set(classSelector, classList)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort all selector lists for deterministic enumeration
|
||||
for (const [key, ids] of selectorIndex) {
|
||||
ids.sort((a, b) => a - b)
|
||||
selectorIndex.set(key, ids)
|
||||
}
|
||||
|
||||
// Build ancestor index: element id -> sorted descendant ids (transitive)
|
||||
const ancestorIndex = world.ancestorIndex
|
||||
const childrenByParent = new Map<number, number[]>()
|
||||
for (let i = 0; i < world.dom.nodeId.length; i++) {
|
||||
const parentId = world.dom.parentNodeId[i]
|
||||
if (parentId !== 0) {
|
||||
const arr = childrenByParent.get(parentId) ?? []
|
||||
arr.push(world.dom.nodeId[i])
|
||||
childrenByParent.set(parentId, arr)
|
||||
}
|
||||
}
|
||||
|
||||
// Map DOM node IDs to subject IDs for elements
|
||||
const subjectByDomNodeId = new Map<number, number>()
|
||||
for (let i = 0; i < world.subjects.ids.length; i++) {
|
||||
const domNodeId = world.subjects.domNodeId[i]
|
||||
if (domNodeId !== 0) {
|
||||
subjectByDomNodeId.set(domNodeId, world.subjects.ids[i])
|
||||
}
|
||||
}
|
||||
|
||||
function collectDescendants(domNodeId: number, out: number[]): void {
|
||||
const childDomIds = childrenByParent.get(domNodeId)
|
||||
if (!childDomIds) return
|
||||
for (const childDomId of childDomIds) {
|
||||
const childSubjectId = subjectByDomNodeId.get(childDomId)
|
||||
if (childSubjectId !== undefined) {
|
||||
out.push(childSubjectId)
|
||||
collectDescendants(childDomId, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < world.dom.nodeId.length; i++) {
|
||||
const domNodeId = world.dom.nodeId[i]
|
||||
const subjectId = subjectByDomNodeId.get(domNodeId)
|
||||
if (subjectId !== undefined) {
|
||||
const descendants: number[] = []
|
||||
collectDescendants(domNodeId, descendants)
|
||||
descendants.sort((a, b) => a - b)
|
||||
ancestorIndex.set(subjectId, descendants)
|
||||
}
|
||||
}
|
||||
|
||||
// Build lineBoxIndex: text node subject id -> line box fragment ids
|
||||
const lineBoxIndex = world.lineBoxIndex
|
||||
for (let i = 0; i < world.fragments.fragmentId.length; i++) {
|
||||
if (world.fragments.fragmentKind[i] === 1 /* FragmentKind.Line */) {
|
||||
const subjectId = world.fragments.subjectId[i]
|
||||
const arr = lineBoxIndex.get(subjectId) ?? []
|
||||
arr.push(world.fragments.fragmentId[i])
|
||||
lineBoxIndex.set(subjectId, arr)
|
||||
}
|
||||
}
|
||||
for (const [key, ids] of lineBoxIndex) {
|
||||
ids.sort((a, b) => a - b)
|
||||
lineBoxIndex.set(key, ids)
|
||||
}
|
||||
|
||||
// Build textRunIndex: text node subject id -> text run ids
|
||||
const textRunIndex = world.textRunIndex
|
||||
for (let i = 0; i < world.text.runId.length; i++) {
|
||||
const subjectId = world.text.subjectId[i]
|
||||
const arr = textRunIndex.get(subjectId) ?? []
|
||||
arr.push(world.text.runId[i])
|
||||
textRunIndex.set(subjectId, arr)
|
||||
}
|
||||
for (const [key, ids] of textRunIndex) {
|
||||
ids.sort((a, b) => a - b)
|
||||
textRunIndex.set(key, ids)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a world and build its index in one call.
|
||||
*/
|
||||
export function materializeWorldWithIndex(raw: RawExtractionResult): {
|
||||
world: GeometryWorld
|
||||
index: WorldIndex
|
||||
} {
|
||||
const world = materializeWorld(raw)
|
||||
const index = buildWorldIndex(world)
|
||||
return { world, index }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// World validation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface WorldValidationError {
|
||||
code: string
|
||||
message: string
|
||||
table: string
|
||||
index: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a geometry world for structural integrity.
|
||||
* Returns a list of validation errors (empty if valid).
|
||||
*/
|
||||
export function validateWorld(world: GeometryWorld): WorldValidationError[] {
|
||||
const errors: WorldValidationError[] = []
|
||||
|
||||
// Check that subjects have valid domNodeId references
|
||||
for (let i = 0; i < world.subjects.ids.length; i++) {
|
||||
const domNodeId = world.subjects.domNodeId[i]
|
||||
if (domNodeId === 0) continue
|
||||
let found = false
|
||||
for (let j = 0; j < world.dom.nodeId.length; j++) {
|
||||
if (world.dom.nodeId[j] === domNodeId) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
errors.push({
|
||||
code: 'INVALID_DOM_REF',
|
||||
message: `Subject ${world.subjects.ids[i]} references missing domNodeId ${domNodeId}`,
|
||||
table: 'subjects',
|
||||
index: i,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Check that boxes have valid subject and frame references
|
||||
for (let i = 0; i < world.boxes.boxId.length; i++) {
|
||||
const subjectId = world.boxes.subjectId[i]
|
||||
let foundSubject = false
|
||||
for (let j = 0; j < world.subjects.ids.length; j++) {
|
||||
if (world.subjects.ids[j] === subjectId) {
|
||||
foundSubject = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!foundSubject) {
|
||||
errors.push({
|
||||
code: 'INVALID_SUBJECT_REF',
|
||||
message: `Box ${world.boxes.boxId[i]} references missing subjectId ${subjectId}`,
|
||||
table: 'boxes',
|
||||
index: i,
|
||||
})
|
||||
}
|
||||
|
||||
const frameId = world.boxes.frameId[i]
|
||||
let foundFrame = false
|
||||
for (let j = 0; j < world.frames.frameId.length; j++) {
|
||||
if (world.frames.frameId[j] === frameId) {
|
||||
foundFrame = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!foundFrame) {
|
||||
errors.push({
|
||||
code: 'INVALID_FRAME_REF',
|
||||
message: `Box ${world.boxes.boxId[i]} references missing frameId ${frameId}`,
|
||||
table: 'boxes',
|
||||
index: i,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Check that frames have valid parent references
|
||||
for (let i = 0; i < world.frames.frameId.length; i++) {
|
||||
const parentId = world.frames.parentFrameId[i]
|
||||
if (parentId === 0) continue
|
||||
let found = false
|
||||
for (let j = 0; j < world.frames.frameId.length; j++) {
|
||||
if (world.frames.frameId[j] === parentId) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
errors.push({
|
||||
code: 'INVALID_PARENT_FRAME',
|
||||
message: `Frame ${world.frames.frameId[i]} references missing parentFrameId ${parentId}`,
|
||||
table: 'frames',
|
||||
index: i,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
Reference in New Issue
Block a user