// Raw fact normalization for Imhotep geometry world // Converts extraction results into flat, indexed, immutable geometry tables import { StringTable, internString, Env, Source, Subjects, SubjectKind, Dom, Frames, FrameKind, Matrices, Rects, Boxes, Fragments, FragmentKind, Transforms, Styles, DisplayValue, PositionValue, OverflowValue, VisibilityValue, TextRuns, Topology, Scroll, Clipping, ClipKind, Paint, Visibility, Provenance, Confidence, GeometryWorld, } from './world.js' import { MATRIX_4X4_STRIDE } from './world.js' import { appendMatrix, IDENTITY_4X4 } from './transforms.js' // --------------------------------------------------------------------------- // Raw extraction types (input to normalization) // --------------------------------------------------------------------------- export interface RawEnv { viewportWidth: number viewportHeight: number deviceScaleFactor: number colorScheme: string pointer: string hover: string reducedMotion: boolean locale: string writingMode: string } export interface RawSource { url: string browserName: string browserVersion: string engine: string extractedAt: number } export interface RawSubject { id: number domNodeId: number kind: string primaryBoxId: number firstFragmentId: number fragmentCount: number firstTextRunId: number textRunCount: number } export interface RawDomNode { nodeId: number backendNodeId: number parentNodeId: number firstChildIndex: number childCount: number shadowRootKind: string tagName: string classNames: string[] role: string ariaName: string } export interface RawFrame { id: number kind: string ownerSubjectId: number parentFrameId: number originX: number originY: number matrix?: number[] // 16 floats, column-major 4x4 clipRectId: number scrollContainerId: number writingMode: string } export interface RawRect { id: number left: number top: number right: number bottom: number } export interface RawBox { id: number subjectId: number frameId: number borderLeft: number borderTop: number borderRight: number borderBottom: number paddingLeft: number paddingTop: number paddingRight: number paddingBottom: number contentLeft: number contentTop: number contentRight: number contentBottom: number } export interface RawFragment { id: number subjectId: number kind: string boxLeft: number boxTop: number boxRight: number boxBottom: number lineIndex: number flowIndex: number parentFragmentId: number } export interface RawTransform { id: number subjectId: number matrices: number[][] // array of 16-float arrays originX: number originY: number } export interface RawStyle { subjectId: number display: string position: string zIndex: string | number overflowX: string overflowY: string opacity: number visibility: string contain: string pointerEvents: string lineHeight: number fontFamily: string fontSize: number fontWeight: number } export interface RawTextRun { id: number subjectId: number content: string lineBoxId: number inkLeft: number inkTop: number inkRight: number inkBottom: number baselineY: number capHeight: number computedLineHeight: number } export interface RawTopology { containingBlockOf: number[] nearestPositionedAncestorOf: number[] scrollContainerOf: number[] stackingContextOf: number[] formattingContextOf: number[] clippingRootOf: number[] paintOrderBucket: number[] paintOrderIndex: number[] } export interface RawScroll { containerId: number scrollLeft: number scrollTop: number scrollWidth: number scrollHeight: number clientWidth: number clientHeight: number } export interface RawClip { nodeId: number subjectId: number kind: string left: number top: number right: number bottom: number parentNodeId: number } export interface RawPaint { nodeId: number subjectId: number stackingContextId: number bucket: number localPaintIndex: number } export interface RawVisibility { subjectId: number isRendered: boolean isVisible: boolean visibleArea: number clippedArea: number } export interface RawProvenance { factId: number extractionStepId: number sourceKind: string sourceRef: number } export interface RawConfidence { factId: number confidence: number reasonCode: string } export interface RawExtractionResult { sceneId: string snapshotId: string env: RawEnv source: RawSource subjects: RawSubject[] dom: RawDomNode[] frames: RawFrame[] rects: RawRect[] boxes: RawBox[] fragments: RawFragment[] transforms: RawTransform[] styles: RawStyle[] text: RawTextRun[] topology: RawTopology scroll: RawScroll[] clipping: RawClip[] paint: RawPaint[] visibility: RawVisibility[] provenance: RawProvenance[] confidence: RawConfidence[] } // --------------------------------------------------------------------------- // Enum mappings // --------------------------------------------------------------------------- const SUBJECT_KIND_MAP: Record = { element: SubjectKind.Element, pseudoElement: SubjectKind.PseudoElement, textRange: SubjectKind.TextRange, lineBox: SubjectKind.LineBox, fragmentBox: SubjectKind.FragmentBox, landmark: SubjectKind.Landmark, gridArea: SubjectKind.GridArea, scrollContainer: SubjectKind.ScrollContainer, syntheticFrame: SubjectKind.SyntheticFrame, } const FRAME_KIND_MAP: Record = { viewport: FrameKind.Viewport, containingBlock: FrameKind.ContainingBlock, nearestPositionedAncestor: FrameKind.NearestPositionedAncestor, scrollContainer: FrameKind.ScrollContainer, synthetic: FrameKind.Synthetic, } const FRAGMENT_KIND_MAP: Record = { line: FragmentKind.Line, column: FragmentKind.Column, region: FragmentKind.Region, page: FragmentKind.Page, } const DISPLAY_MAP: Record = { block: DisplayValue.Block, inline: DisplayValue.Inline, 'inline-block': DisplayValue.InlineBlock, flex: DisplayValue.Flex, grid: DisplayValue.Grid, none: DisplayValue.None, contents: DisplayValue.Contents, } const POSITION_MAP: Record = { static: PositionValue.Static, relative: PositionValue.Relative, absolute: PositionValue.Absolute, fixed: PositionValue.Fixed, sticky: PositionValue.Sticky, } const OVERFLOW_MAP: Record = { visible: OverflowValue.Visible, hidden: OverflowValue.Hidden, scroll: OverflowValue.Scroll, auto: OverflowValue.Auto, clip: OverflowValue.Clip, } const VISIBILITY_MAP: Record = { visible: VisibilityValue.Visible, hidden: VisibilityValue.Hidden, collapse: VisibilityValue.Collapse, } const CONTAIN_MAP: Record = { none: 0, layout: 1, paint: 2, size: 4, style: 8, strict: 15, content: 7, 'inline-size': 16, } const CLIP_KIND_MAP: Record = { overflow: ClipKind.Overflow, clipPath: ClipKind.ClipPath, mask: ClipKind.Mask, svgClip: ClipKind.SvgClip, } // --------------------------------------------------------------------------- // Normalization functions // --------------------------------------------------------------------------- export function normalizeEnv(raw: RawEnv, strings: StringTable): Env { return { viewportWidth: raw.viewportWidth, viewportHeight: raw.viewportHeight, deviceScaleFactor: raw.deviceScaleFactor, colorScheme: internString(strings, raw.colorScheme), pointer: internString(strings, raw.pointer), hover: internString(strings, raw.hover), reducedMotion: raw.reducedMotion, locale: internString(strings, raw.locale), writingMode: internString(strings, raw.writingMode), } } export function normalizeSource(raw: RawSource, strings: StringTable): Source { return { url: internString(strings, raw.url), browserName: internString(strings, raw.browserName), browserVersion: internString(strings, raw.browserVersion), engine: internString(strings, raw.engine), extractedAt: raw.extractedAt, } } export function normalizeSubjects(raw: RawSubject[]): Subjects { const n = raw.length const ids = new Uint32Array(n) const domNodeId = new Uint32Array(n) const subjectKind = new Uint16Array(n) const primaryBoxId = new Uint32Array(n) const firstFragmentId = new Uint32Array(n) const fragmentCount = new Uint16Array(n) const firstTextRunId = new Uint32Array(n) const textRunCount = new Uint16Array(n) for (let i = 0; i < n; i++) { const s = raw[i] ids[i] = s.id domNodeId[i] = s.domNodeId subjectKind[i] = SUBJECT_KIND_MAP[s.kind] ?? SubjectKind.Element primaryBoxId[i] = s.primaryBoxId firstFragmentId[i] = s.firstFragmentId fragmentCount[i] = s.fragmentCount firstTextRunId[i] = s.firstTextRunId textRunCount[i] = s.textRunCount } return { ids, domNodeId, subjectKind, primaryBoxId, firstFragmentId, fragmentCount, firstTextRunId, textRunCount } } export function normalizeDom(raw: RawDomNode[], strings: StringTable): Dom { const n = raw.length const nodeId = new Uint32Array(n) const backendNodeId = new Uint32Array(n) const parentNodeId = new Uint32Array(n) const firstChildIndex = new Uint32Array(n) const childCount = new Uint16Array(n) const shadowRootKind = new Uint8Array(n) const tagNameStringId = new Uint32Array(n) const classNameStringIds: Uint32Array[] = new Array(n) const roleStringId = new Uint32Array(n) const ariaNameStringId = new Uint32Array(n) for (let i = 0; i < n; i++) { const d = raw[i] nodeId[i] = d.nodeId backendNodeId[i] = d.backendNodeId parentNodeId[i] = d.parentNodeId firstChildIndex[i] = d.firstChildIndex childCount[i] = d.childCount shadowRootKind[i] = d.shadowRootKind === 'open' ? 1 : d.shadowRootKind === 'closed' ? 2 : 0 tagNameStringId[i] = internString(strings, d.tagName) const classIds = new Uint32Array(d.classNames.length) for (let c = 0; c < d.classNames.length; c++) { classIds[c] = internString(strings, d.classNames[c]) } classNameStringIds[i] = classIds roleStringId[i] = internString(strings, d.role) ariaNameStringId[i] = internString(strings, d.ariaName) } return { nodeId, backendNodeId, parentNodeId, firstChildIndex, childCount, shadowRootKind, tagNameStringId, classNameStringIds, roleStringId, ariaNameStringId } } export function normalizeFrames( raw: RawFrame[], strings: StringTable, matrices: Matrices ): { frames: Frames; matrices: Matrices } { const n = raw.length const frameId = new Uint32Array(n) const frameKind = new Uint16Array(n) const ownerSubjectId = new Uint32Array(n) const parentFrameId = new Uint32Array(n) const originX = new Float64Array(n) const originY = new Float64Array(n) const axisMatrixStart = new Int32Array(n) const clipRectId = new Uint32Array(n) const scrollContainerId = new Uint32Array(n) const writingMode = new Uint16Array(n) let matBuf = matrices.values let matUsed = matrices.values.length for (let i = 0; i < n; i++) { const f = raw[i] frameId[i] = f.id frameKind[i] = FRAME_KIND_MAP[f.kind] ?? FrameKind.Synthetic ownerSubjectId[i] = f.ownerSubjectId parentFrameId[i] = f.parentFrameId originX[i] = f.originX originY[i] = f.originY clipRectId[i] = f.clipRectId scrollContainerId[i] = f.scrollContainerId writingMode[i] = internString(strings, f.writingMode) if (f.matrix && f.matrix.length === 16) { const result = appendMatrix(matBuf, matUsed, f.matrix) matBuf = result.buffer matUsed = result.used axisMatrixStart[i] = result.start } else { axisMatrixStart[i] = -1 } } return { frames: { frameId, frameKind, ownerSubjectId, parentFrameId, originX, originY, axisMatrixStart, clipRectId, scrollContainerId, writingMode, }, matrices: { values: matBuf.subarray(0, matUsed) }, } } export function normalizeRects(raw: RawRect[]): Rects { const n = raw.length const rectId = new Uint32Array(n) const left = new Float64Array(n) const top = new Float64Array(n) const right = new Float64Array(n) const bottom = new Float64Array(n) for (let i = 0; i < n; i++) { const r = raw[i] rectId[i] = r.id left[i] = r.left top[i] = r.top right[i] = r.right bottom[i] = r.bottom } return { rectId, left, top, right, bottom } } export function normalizeBoxes(raw: RawBox[]): Boxes { const n = raw.length const boxId = new Uint32Array(n) const subjectId = new Uint32Array(n) const frameId = new Uint32Array(n) const borderLeft = new Float64Array(n) const borderTop = new Float64Array(n) const borderRight = new Float64Array(n) const borderBottom = new Float64Array(n) const paddingLeft = new Float64Array(n) const paddingTop = new Float64Array(n) const paddingRight = new Float64Array(n) const paddingBottom = new Float64Array(n) const contentLeft = new Float64Array(n) const contentTop = new Float64Array(n) const contentRight = new Float64Array(n) const contentBottom = new Float64Array(n) for (let i = 0; i < n; i++) { const b = raw[i] boxId[i] = b.id subjectId[i] = b.subjectId frameId[i] = b.frameId borderLeft[i] = b.borderLeft borderTop[i] = b.borderTop borderRight[i] = b.borderRight borderBottom[i] = b.borderBottom paddingLeft[i] = b.paddingLeft paddingTop[i] = b.paddingTop paddingRight[i] = b.paddingRight paddingBottom[i] = b.paddingBottom contentLeft[i] = b.contentLeft contentTop[i] = b.contentTop contentRight[i] = b.contentRight contentBottom[i] = b.contentBottom } return { boxId, subjectId, frameId, borderLeft, borderTop, borderRight, borderBottom, paddingLeft, paddingTop, paddingRight, paddingBottom, contentLeft, contentTop, contentRight, contentBottom } } export function normalizeFragments(raw: RawFragment[]): Fragments { const n = raw.length const fragmentId = new Uint32Array(n) const subjectId = new Uint32Array(n) const fragmentKind = new Uint16Array(n) const boxLeft = new Float64Array(n) const boxTop = new Float64Array(n) const boxRight = new Float64Array(n) const boxBottom = new Float64Array(n) const lineIndex = new Int32Array(n) const flowIndex = new Int32Array(n) const parentFragmentId = new Uint32Array(n) for (let i = 0; i < n; i++) { const f = raw[i] fragmentId[i] = f.id subjectId[i] = f.subjectId fragmentKind[i] = FRAGMENT_KIND_MAP[f.kind] ?? FragmentKind.Line boxLeft[i] = f.boxLeft boxTop[i] = f.boxTop boxRight[i] = f.boxRight boxBottom[i] = f.boxBottom lineIndex[i] = f.lineIndex flowIndex[i] = f.flowIndex parentFragmentId[i] = f.parentFragmentId } return { fragmentId, subjectId, fragmentKind, boxLeft, boxTop, boxRight, boxBottom, lineIndex, flowIndex, parentFragmentId } } export function normalizeTransforms( raw: RawTransform[], matrices: Matrices ): { transforms: Transforms; matrices: Matrices } { const n = raw.length const transformId = new Uint32Array(n) const subjectId = new Uint32Array(n) const matrixStart = new Int32Array(n) const matrixLength = new Uint16Array(n) const originX = new Float64Array(n) const originY = new Float64Array(n) let matBuf = matrices.values let matUsed = matrices.values.length for (let i = 0; i < n; i++) { const t = raw[i] transformId[i] = t.id subjectId[i] = t.subjectId originX[i] = t.originX originY[i] = t.originY if (t.matrices && t.matrices.length > 0) { const start = matUsed for (const m of t.matrices) { const result = appendMatrix(matBuf, matUsed, m) matBuf = result.buffer matUsed = result.used } matrixStart[i] = start matrixLength[i] = t.matrices.length } else { matrixStart[i] = -1 matrixLength[i] = 0 } } return { transforms: { transformId, subjectId, matrixStart, matrixLength, originX, originY }, matrices: { values: matBuf.subarray(0, matUsed) }, } } export function normalizeStyles(raw: RawStyle[], strings: StringTable): Styles { const n = raw.length const subjectId = new Uint32Array(n) const display = new Uint16Array(n) const position = new Uint16Array(n) const zIndexKind = new Uint8Array(n) const zIndexValue = new Int32Array(n) const overflowX = new Uint16Array(n) const overflowY = new Uint16Array(n) const opacity = new Float32Array(n) const visibility = new Uint16Array(n) const containFlags = new Uint32Array(n) const pointerEvents = new Uint16Array(n) const lineHeight = new Float64Array(n) const fontFamilyStringId = new Uint32Array(n) const fontSize = new Float64Array(n) const fontWeight = new Uint16Array(n) for (let i = 0; i < n; i++) { const s = raw[i] subjectId[i] = s.subjectId display[i] = DISPLAY_MAP[s.display] ?? DisplayValue.Block position[i] = POSITION_MAP[s.position] ?? PositionValue.Static if (typeof s.zIndex === 'number') { zIndexKind[i] = 1 // explicit integer zIndexValue[i] = s.zIndex } else if (s.zIndex === 'auto') { zIndexKind[i] = 0 zIndexValue[i] = 0 } else { zIndexKind[i] = 0 zIndexValue[i] = 0 } overflowX[i] = OVERFLOW_MAP[s.overflowX] ?? OverflowValue.Visible overflowY[i] = OVERFLOW_MAP[s.overflowY] ?? OverflowValue.Visible opacity[i] = s.opacity visibility[i] = VISIBILITY_MAP[s.visibility] ?? VisibilityValue.Visible if (s.contain) { for (const part of s.contain.split(/\s+/)) { containFlags[i] |= CONTAIN_MAP[part.toLowerCase()] ?? 0 } } pointerEvents[i] = internString(strings, s.pointerEvents) lineHeight[i] = s.lineHeight fontFamilyStringId[i] = internString(strings, s.fontFamily) fontSize[i] = s.fontSize fontWeight[i] = s.fontWeight } return { subjectId, display, position, zIndexKind, zIndexValue, overflowX, overflowY, opacity, visibility, containFlags, pointerEvents, lineHeight, fontFamilyStringId, fontSize, fontWeight } } export function normalizeText(raw: RawTextRun[], strings: StringTable): TextRuns { const n = raw.length const runId = new Uint32Array(n) const subjectId = new Uint32Array(n) const contentStringId = new Uint32Array(n) const lineBoxId = new Uint32Array(n) const inkLeft = new Float64Array(n) const inkTop = new Float64Array(n) const inkRight = new Float64Array(n) const inkBottom = new Float64Array(n) const baselineY = new Float64Array(n) const capHeight = new Float64Array(n) const computedLineHeight = new Float64Array(n) for (let i = 0; i < n; i++) { const t = raw[i] runId[i] = t.id subjectId[i] = t.subjectId contentStringId[i] = internString(strings, t.content) lineBoxId[i] = t.lineBoxId inkLeft[i] = t.inkLeft inkTop[i] = t.inkTop inkRight[i] = t.inkRight inkBottom[i] = t.inkBottom baselineY[i] = t.baselineY capHeight[i] = t.capHeight computedLineHeight[i] = t.computedLineHeight } return { runId, subjectId, contentStringId, lineBoxId, inkLeft, inkTop, inkRight, inkBottom, baselineY, capHeight, computedLineHeight } } export function normalizeTopology(raw: RawTopology, subjectCount: number): Topology { // Avoid intermediate Array.fill allocations — Uint32Array/Uint16Array constructors // zero-initialize automatically when given a length. return { containingBlockOf: raw.containingBlockOf.length > 0 ? new Uint32Array(raw.containingBlockOf) : new Uint32Array(subjectCount), nearestPositionedAncestorOf: raw.nearestPositionedAncestorOf.length > 0 ? new Uint32Array(raw.nearestPositionedAncestorOf) : new Uint32Array(subjectCount), scrollContainerOf: raw.scrollContainerOf.length > 0 ? new Uint32Array(raw.scrollContainerOf) : new Uint32Array(subjectCount), stackingContextOf: raw.stackingContextOf.length > 0 ? new Uint32Array(raw.stackingContextOf) : new Uint32Array(subjectCount), formattingContextOf: raw.formattingContextOf.length > 0 ? new Uint32Array(raw.formattingContextOf) : new Uint32Array(subjectCount), clippingRootOf: raw.clippingRootOf.length > 0 ? new Uint32Array(raw.clippingRootOf) : new Uint32Array(subjectCount), paintOrderBucket: raw.paintOrderBucket.length > 0 ? new Uint16Array(raw.paintOrderBucket) : new Uint16Array(subjectCount), paintOrderIndex: raw.paintOrderIndex.length > 0 ? new Uint32Array(raw.paintOrderIndex) : new Uint32Array(subjectCount), } } export function normalizeScroll(raw: RawScroll[]): Scroll { const n = raw.length const containerId = new Uint32Array(n) const scrollLeft = new Float64Array(n) const scrollTop = new Float64Array(n) const scrollWidth = new Float64Array(n) const scrollHeight = new Float64Array(n) const clientWidth = new Float64Array(n) const clientHeight = new Float64Array(n) for (let i = 0; i < n; i++) { const s = raw[i] containerId[i] = s.containerId scrollLeft[i] = s.scrollLeft scrollTop[i] = s.scrollTop scrollWidth[i] = s.scrollWidth scrollHeight[i] = s.scrollHeight clientWidth[i] = s.clientWidth clientHeight[i] = s.clientHeight } return { containerId, scrollLeft, scrollTop, scrollWidth, scrollHeight, clientWidth, clientHeight } } export function normalizeClipping(raw: RawClip[], strings: StringTable): Clipping { const n = raw.length const clipNodeId = new Uint32Array(n) const subjectId = new Uint32Array(n) const clipKind = new Uint16Array(n) const clipLeft = new Float64Array(n) const clipTop = new Float64Array(n) const clipRight = new Float64Array(n) const clipBottom = new Float64Array(n) const parentClipNodeId = new Uint32Array(n) for (let i = 0; i < n; i++) { const c = raw[i] clipNodeId[i] = c.nodeId subjectId[i] = c.subjectId clipKind[i] = CLIP_KIND_MAP[c.kind] ?? ClipKind.Overflow clipLeft[i] = c.left clipTop[i] = c.top clipRight[i] = c.right clipBottom[i] = c.bottom parentClipNodeId[i] = c.parentNodeId } return { clipNodeId, subjectId, clipKind, clipLeft, clipTop, clipRight, clipBottom, parentClipNodeId } } export function normalizePaint(raw: RawPaint[]): Paint { const n = raw.length const paintNodeId = new Uint32Array(n) const subjectId = new Uint32Array(n) const stackingContextId = new Uint32Array(n) const bucket = new Uint16Array(n) const localPaintIndex = new Uint32Array(n) for (let i = 0; i < n; i++) { const p = raw[i] paintNodeId[i] = p.nodeId subjectId[i] = p.subjectId stackingContextId[i] = p.stackingContextId bucket[i] = p.bucket localPaintIndex[i] = p.localPaintIndex } return { paintNodeId, subjectId, stackingContextId, bucket, localPaintIndex } } export function normalizeVisibility(raw: RawVisibility[]): Visibility { const n = raw.length const subjectId = new Uint32Array(n) const isRendered = new Uint8Array(n) const isVisible = new Uint8Array(n) const visibleArea = new Float64Array(n) const clippedArea = new Float64Array(n) for (let i = 0; i < n; i++) { const v = raw[i] subjectId[i] = v.subjectId isRendered[i] = v.isRendered ? 1 : 0 isVisible[i] = v.isVisible ? 1 : 0 visibleArea[i] = v.visibleArea clippedArea[i] = v.clippedArea } return { subjectId, isRendered, isVisible, visibleArea, clippedArea } } export function normalizeProvenance(raw: RawProvenance[], strings: StringTable): Provenance { const n = raw.length const factId = new Uint32Array(n) const extractionStepId = new Uint32Array(n) const sourceKind = new Uint16Array(n) const sourceRef = new Uint32Array(n) for (let i = 0; i < n; i++) { const p = raw[i] factId[i] = p.factId extractionStepId[i] = p.extractionStepId sourceKind[i] = internString(strings, p.sourceKind) sourceRef[i] = p.sourceRef } return { factId, extractionStepId, sourceKind, sourceRef } } export function normalizeConfidence(raw: RawConfidence[], strings: StringTable): Confidence { const n = raw.length const factId = new Uint32Array(n) const confidence = new Float32Array(n) const reasonCode = new Uint16Array(n) for (let i = 0; i < n; i++) { const c = raw[i] factId[i] = c.factId confidence[i] = c.confidence reasonCode[i] = internString(strings, c.reasonCode) } return { factId, confidence, reasonCode } }