2025-08-15 10:00:00 -07:00
|
|
|
// Raw fact normalization for Imhotep geometry world
|
|
|
|
|
// Converts extraction results into flat, indexed, immutable geometry tables
|
|
|
|
|
|
|
|
|
|
import {
|
|
|
|
|
StringTable,
|
|
|
|
|
internString,
|
|
|
|
|
Env,
|
|
|
|
|
Source,
|
|
|
|
|
Subjects,
|
|
|
|
|
SubjectKind,
|
|
|
|
|
Dom,
|
|
|
|
|
Frames,
|
|
|
|
|
FrameKind,
|
|
|
|
|
Matrices,
|
|
|
|
|
Rects,
|
|
|
|
|
Boxes,
|
|
|
|
|
Fragments,
|
|
|
|
|
FragmentKind,
|
|
|
|
|
Transforms,
|
|
|
|
|
Styles,
|
|
|
|
|
DisplayValue,
|
|
|
|
|
PositionValue,
|
|
|
|
|
OverflowValue,
|
|
|
|
|
VisibilityValue,
|
|
|
|
|
TextRuns,
|
|
|
|
|
Topology,
|
|
|
|
|
Scroll,
|
|
|
|
|
Clipping,
|
|
|
|
|
ClipKind,
|
|
|
|
|
Paint,
|
|
|
|
|
Visibility,
|
|
|
|
|
Provenance,
|
|
|
|
|
Confidence,
|
|
|
|
|
GeometryWorld,
|
|
|
|
|
} from './world.js'
|
|
|
|
|
import { MATRIX_4X4_STRIDE } from './world.js'
|
|
|
|
|
import { appendMatrix, IDENTITY_4X4 } from './transforms.js'
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Raw extraction types (input to normalization)
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
export interface RawEnv {
|
|
|
|
|
viewportWidth: number
|
|
|
|
|
viewportHeight: number
|
|
|
|
|
deviceScaleFactor: number
|
|
|
|
|
colorScheme: string
|
|
|
|
|
pointer: string
|
|
|
|
|
hover: string
|
|
|
|
|
reducedMotion: boolean
|
|
|
|
|
locale: string
|
|
|
|
|
writingMode: string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawSource {
|
|
|
|
|
url: string
|
|
|
|
|
browserName: string
|
|
|
|
|
browserVersion: string
|
|
|
|
|
engine: string
|
|
|
|
|
extractedAt: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawSubject {
|
|
|
|
|
id: number
|
|
|
|
|
domNodeId: number
|
|
|
|
|
kind: string
|
|
|
|
|
primaryBoxId: number
|
|
|
|
|
firstFragmentId: number
|
|
|
|
|
fragmentCount: number
|
|
|
|
|
firstTextRunId: number
|
|
|
|
|
textRunCount: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawDomNode {
|
|
|
|
|
nodeId: number
|
|
|
|
|
backendNodeId: number
|
|
|
|
|
parentNodeId: number
|
|
|
|
|
firstChildIndex: number
|
|
|
|
|
childCount: number
|
|
|
|
|
shadowRootKind: string
|
|
|
|
|
tagName: string
|
|
|
|
|
classNames: string[]
|
|
|
|
|
role: string
|
|
|
|
|
ariaName: string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawFrame {
|
|
|
|
|
id: number
|
|
|
|
|
kind: string
|
|
|
|
|
ownerSubjectId: number
|
|
|
|
|
parentFrameId: number
|
|
|
|
|
originX: number
|
|
|
|
|
originY: number
|
|
|
|
|
matrix?: number[] // 16 floats, column-major 4x4
|
|
|
|
|
clipRectId: number
|
|
|
|
|
scrollContainerId: number
|
|
|
|
|
writingMode: string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawRect {
|
|
|
|
|
id: number
|
|
|
|
|
left: number
|
|
|
|
|
top: number
|
|
|
|
|
right: number
|
|
|
|
|
bottom: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawBox {
|
|
|
|
|
id: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
frameId: number
|
|
|
|
|
borderLeft: number
|
|
|
|
|
borderTop: number
|
|
|
|
|
borderRight: number
|
|
|
|
|
borderBottom: number
|
|
|
|
|
paddingLeft: number
|
|
|
|
|
paddingTop: number
|
|
|
|
|
paddingRight: number
|
|
|
|
|
paddingBottom: number
|
|
|
|
|
contentLeft: number
|
|
|
|
|
contentTop: number
|
|
|
|
|
contentRight: number
|
|
|
|
|
contentBottom: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawFragment {
|
|
|
|
|
id: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
kind: string
|
|
|
|
|
boxLeft: number
|
|
|
|
|
boxTop: number
|
|
|
|
|
boxRight: number
|
|
|
|
|
boxBottom: number
|
|
|
|
|
lineIndex: number
|
|
|
|
|
flowIndex: number
|
|
|
|
|
parentFragmentId: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawTransform {
|
|
|
|
|
id: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
matrices: number[][] // array of 16-float arrays
|
|
|
|
|
originX: number
|
|
|
|
|
originY: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawStyle {
|
|
|
|
|
subjectId: number
|
|
|
|
|
display: string
|
|
|
|
|
position: string
|
|
|
|
|
zIndex: string | number
|
|
|
|
|
overflowX: string
|
|
|
|
|
overflowY: string
|
|
|
|
|
opacity: number
|
|
|
|
|
visibility: string
|
|
|
|
|
contain: string
|
|
|
|
|
pointerEvents: string
|
|
|
|
|
lineHeight: number
|
|
|
|
|
fontFamily: string
|
|
|
|
|
fontSize: number
|
|
|
|
|
fontWeight: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawTextRun {
|
|
|
|
|
id: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
content: string
|
|
|
|
|
lineBoxId: number
|
|
|
|
|
inkLeft: number
|
|
|
|
|
inkTop: number
|
|
|
|
|
inkRight: number
|
|
|
|
|
inkBottom: number
|
|
|
|
|
baselineY: number
|
|
|
|
|
capHeight: number
|
|
|
|
|
computedLineHeight: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawTopology {
|
|
|
|
|
containingBlockOf: number[]
|
|
|
|
|
nearestPositionedAncestorOf: number[]
|
|
|
|
|
scrollContainerOf: number[]
|
|
|
|
|
stackingContextOf: number[]
|
|
|
|
|
formattingContextOf: number[]
|
|
|
|
|
clippingRootOf: number[]
|
|
|
|
|
paintOrderBucket: number[]
|
|
|
|
|
paintOrderIndex: number[]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawScroll {
|
|
|
|
|
containerId: number
|
|
|
|
|
scrollLeft: number
|
|
|
|
|
scrollTop: number
|
|
|
|
|
scrollWidth: number
|
|
|
|
|
scrollHeight: number
|
|
|
|
|
clientWidth: number
|
|
|
|
|
clientHeight: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawClip {
|
|
|
|
|
nodeId: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
kind: string
|
|
|
|
|
left: number
|
|
|
|
|
top: number
|
|
|
|
|
right: number
|
|
|
|
|
bottom: number
|
|
|
|
|
parentNodeId: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawPaint {
|
|
|
|
|
nodeId: number
|
|
|
|
|
subjectId: number
|
|
|
|
|
stackingContextId: number
|
|
|
|
|
bucket: number
|
|
|
|
|
localPaintIndex: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawVisibility {
|
|
|
|
|
subjectId: number
|
|
|
|
|
isRendered: boolean
|
|
|
|
|
isVisible: boolean
|
|
|
|
|
visibleArea: number
|
|
|
|
|
clippedArea: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawProvenance {
|
|
|
|
|
factId: number
|
|
|
|
|
extractionStepId: number
|
|
|
|
|
sourceKind: string
|
|
|
|
|
sourceRef: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawConfidence {
|
|
|
|
|
factId: number
|
|
|
|
|
confidence: number
|
|
|
|
|
reasonCode: string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface RawExtractionResult {
|
|
|
|
|
sceneId: string
|
|
|
|
|
snapshotId: string
|
|
|
|
|
env: RawEnv
|
|
|
|
|
source: RawSource
|
|
|
|
|
subjects: RawSubject[]
|
|
|
|
|
dom: RawDomNode[]
|
|
|
|
|
frames: RawFrame[]
|
|
|
|
|
rects: RawRect[]
|
|
|
|
|
boxes: RawBox[]
|
|
|
|
|
fragments: RawFragment[]
|
|
|
|
|
transforms: RawTransform[]
|
|
|
|
|
styles: RawStyle[]
|
|
|
|
|
text: RawTextRun[]
|
|
|
|
|
topology: RawTopology
|
|
|
|
|
scroll: RawScroll[]
|
|
|
|
|
clipping: RawClip[]
|
|
|
|
|
paint: RawPaint[]
|
|
|
|
|
visibility: RawVisibility[]
|
|
|
|
|
provenance: RawProvenance[]
|
|
|
|
|
confidence: RawConfidence[]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Enum mappings
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
const SUBJECT_KIND_MAP: Record<string, SubjectKind> = {
|
|
|
|
|
element: SubjectKind.Element,
|
|
|
|
|
pseudoElement: SubjectKind.PseudoElement,
|
|
|
|
|
textRange: SubjectKind.TextRange,
|
|
|
|
|
lineBox: SubjectKind.LineBox,
|
|
|
|
|
fragmentBox: SubjectKind.FragmentBox,
|
|
|
|
|
landmark: SubjectKind.Landmark,
|
|
|
|
|
gridArea: SubjectKind.GridArea,
|
|
|
|
|
scrollContainer: SubjectKind.ScrollContainer,
|
|
|
|
|
syntheticFrame: SubjectKind.SyntheticFrame,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const FRAME_KIND_MAP: Record<string, FrameKind> = {
|
|
|
|
|
viewport: FrameKind.Viewport,
|
|
|
|
|
containingBlock: FrameKind.ContainingBlock,
|
|
|
|
|
nearestPositionedAncestor: FrameKind.NearestPositionedAncestor,
|
|
|
|
|
scrollContainer: FrameKind.ScrollContainer,
|
|
|
|
|
synthetic: FrameKind.Synthetic,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const FRAGMENT_KIND_MAP: Record<string, FragmentKind> = {
|
|
|
|
|
line: FragmentKind.Line,
|
|
|
|
|
column: FragmentKind.Column,
|
|
|
|
|
region: FragmentKind.Region,
|
|
|
|
|
page: FragmentKind.Page,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const DISPLAY_MAP: Record<string, DisplayValue> = {
|
|
|
|
|
block: DisplayValue.Block,
|
|
|
|
|
inline: DisplayValue.Inline,
|
|
|
|
|
'inline-block': DisplayValue.InlineBlock,
|
|
|
|
|
flex: DisplayValue.Flex,
|
|
|
|
|
grid: DisplayValue.Grid,
|
|
|
|
|
none: DisplayValue.None,
|
|
|
|
|
contents: DisplayValue.Contents,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const POSITION_MAP: Record<string, PositionValue> = {
|
|
|
|
|
static: PositionValue.Static,
|
|
|
|
|
relative: PositionValue.Relative,
|
|
|
|
|
absolute: PositionValue.Absolute,
|
|
|
|
|
fixed: PositionValue.Fixed,
|
|
|
|
|
sticky: PositionValue.Sticky,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const OVERFLOW_MAP: Record<string, OverflowValue> = {
|
|
|
|
|
visible: OverflowValue.Visible,
|
|
|
|
|
hidden: OverflowValue.Hidden,
|
|
|
|
|
scroll: OverflowValue.Scroll,
|
|
|
|
|
auto: OverflowValue.Auto,
|
|
|
|
|
clip: OverflowValue.Clip,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const VISIBILITY_MAP: Record<string, VisibilityValue> = {
|
|
|
|
|
visible: VisibilityValue.Visible,
|
|
|
|
|
hidden: VisibilityValue.Hidden,
|
|
|
|
|
collapse: VisibilityValue.Collapse,
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-21 13:20:57 -07:00
|
|
|
const CONTAIN_MAP: Record<string, number> = {
|
|
|
|
|
none: 0,
|
|
|
|
|
layout: 1,
|
|
|
|
|
paint: 2,
|
|
|
|
|
size: 4,
|
|
|
|
|
style: 8,
|
|
|
|
|
strict: 15,
|
|
|
|
|
content: 7,
|
|
|
|
|
'inline-size': 16,
|
|
|
|
|
}
|
2025-08-15 10:00:00 -07:00
|
|
|
const CLIP_KIND_MAP: Record<string, ClipKind> = {
|
|
|
|
|
overflow: ClipKind.Overflow,
|
|
|
|
|
clipPath: ClipKind.ClipPath,
|
|
|
|
|
mask: ClipKind.Mask,
|
|
|
|
|
svgClip: ClipKind.SvgClip,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Normalization functions
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
export function normalizeEnv(raw: RawEnv, strings: StringTable): Env {
|
|
|
|
|
return {
|
|
|
|
|
viewportWidth: raw.viewportWidth,
|
|
|
|
|
viewportHeight: raw.viewportHeight,
|
|
|
|
|
deviceScaleFactor: raw.deviceScaleFactor,
|
|
|
|
|
colorScheme: internString(strings, raw.colorScheme),
|
|
|
|
|
pointer: internString(strings, raw.pointer),
|
|
|
|
|
hover: internString(strings, raw.hover),
|
|
|
|
|
reducedMotion: raw.reducedMotion,
|
|
|
|
|
locale: internString(strings, raw.locale),
|
|
|
|
|
writingMode: internString(strings, raw.writingMode),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeSource(raw: RawSource, strings: StringTable): Source {
|
|
|
|
|
return {
|
|
|
|
|
url: internString(strings, raw.url),
|
|
|
|
|
browserName: internString(strings, raw.browserName),
|
|
|
|
|
browserVersion: internString(strings, raw.browserVersion),
|
|
|
|
|
engine: internString(strings, raw.engine),
|
|
|
|
|
extractedAt: raw.extractedAt,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeSubjects(raw: RawSubject[]): Subjects {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const ids = new Uint32Array(n)
|
|
|
|
|
const domNodeId = new Uint32Array(n)
|
|
|
|
|
const subjectKind = new Uint16Array(n)
|
|
|
|
|
const primaryBoxId = new Uint32Array(n)
|
|
|
|
|
const firstFragmentId = new Uint32Array(n)
|
|
|
|
|
const fragmentCount = new Uint16Array(n)
|
|
|
|
|
const firstTextRunId = new Uint32Array(n)
|
|
|
|
|
const textRunCount = new Uint16Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const s = raw[i]
|
|
|
|
|
ids[i] = s.id
|
|
|
|
|
domNodeId[i] = s.domNodeId
|
|
|
|
|
subjectKind[i] = SUBJECT_KIND_MAP[s.kind] ?? SubjectKind.Element
|
|
|
|
|
primaryBoxId[i] = s.primaryBoxId
|
|
|
|
|
firstFragmentId[i] = s.firstFragmentId
|
|
|
|
|
fragmentCount[i] = s.fragmentCount
|
|
|
|
|
firstTextRunId[i] = s.firstTextRunId
|
|
|
|
|
textRunCount[i] = s.textRunCount
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { ids, domNodeId, subjectKind, primaryBoxId, firstFragmentId, fragmentCount, firstTextRunId, textRunCount }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeDom(raw: RawDomNode[], strings: StringTable): Dom {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const nodeId = new Uint32Array(n)
|
|
|
|
|
const backendNodeId = new Uint32Array(n)
|
|
|
|
|
const parentNodeId = new Uint32Array(n)
|
|
|
|
|
const firstChildIndex = new Uint32Array(n)
|
|
|
|
|
const childCount = new Uint16Array(n)
|
|
|
|
|
const shadowRootKind = new Uint8Array(n)
|
|
|
|
|
const tagNameStringId = new Uint32Array(n)
|
|
|
|
|
const classNameStringIds: Uint32Array[] = new Array(n)
|
|
|
|
|
const roleStringId = new Uint32Array(n)
|
|
|
|
|
const ariaNameStringId = new Uint32Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const d = raw[i]
|
|
|
|
|
nodeId[i] = d.nodeId
|
|
|
|
|
backendNodeId[i] = d.backendNodeId
|
|
|
|
|
parentNodeId[i] = d.parentNodeId
|
|
|
|
|
firstChildIndex[i] = d.firstChildIndex
|
|
|
|
|
childCount[i] = d.childCount
|
|
|
|
|
shadowRootKind[i] = d.shadowRootKind === 'open' ? 1 : d.shadowRootKind === 'closed' ? 2 : 0
|
|
|
|
|
tagNameStringId[i] = internString(strings, d.tagName)
|
|
|
|
|
const classIds = new Uint32Array(d.classNames.length)
|
|
|
|
|
for (let c = 0; c < d.classNames.length; c++) {
|
|
|
|
|
classIds[c] = internString(strings, d.classNames[c])
|
|
|
|
|
}
|
|
|
|
|
classNameStringIds[i] = classIds
|
|
|
|
|
roleStringId[i] = internString(strings, d.role)
|
|
|
|
|
ariaNameStringId[i] = internString(strings, d.ariaName)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { nodeId, backendNodeId, parentNodeId, firstChildIndex, childCount, shadowRootKind, tagNameStringId, classNameStringIds, roleStringId, ariaNameStringId }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeFrames(
|
|
|
|
|
raw: RawFrame[],
|
|
|
|
|
strings: StringTable,
|
|
|
|
|
matrices: Matrices
|
|
|
|
|
): { frames: Frames; matrices: Matrices } {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const frameId = new Uint32Array(n)
|
|
|
|
|
const frameKind = new Uint16Array(n)
|
|
|
|
|
const ownerSubjectId = new Uint32Array(n)
|
|
|
|
|
const parentFrameId = new Uint32Array(n)
|
|
|
|
|
const originX = new Float64Array(n)
|
|
|
|
|
const originY = new Float64Array(n)
|
|
|
|
|
const axisMatrixStart = new Int32Array(n)
|
|
|
|
|
const clipRectId = new Uint32Array(n)
|
|
|
|
|
const scrollContainerId = new Uint32Array(n)
|
|
|
|
|
const writingMode = new Uint16Array(n)
|
|
|
|
|
|
|
|
|
|
let matBuf = matrices.values
|
|
|
|
|
let matUsed = matrices.values.length
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const f = raw[i]
|
|
|
|
|
frameId[i] = f.id
|
|
|
|
|
frameKind[i] = FRAME_KIND_MAP[f.kind] ?? FrameKind.Synthetic
|
|
|
|
|
ownerSubjectId[i] = f.ownerSubjectId
|
|
|
|
|
parentFrameId[i] = f.parentFrameId
|
|
|
|
|
originX[i] = f.originX
|
|
|
|
|
originY[i] = f.originY
|
|
|
|
|
clipRectId[i] = f.clipRectId
|
|
|
|
|
scrollContainerId[i] = f.scrollContainerId
|
|
|
|
|
writingMode[i] = internString(strings, f.writingMode)
|
|
|
|
|
|
|
|
|
|
if (f.matrix && f.matrix.length === 16) {
|
|
|
|
|
const result = appendMatrix(matBuf, matUsed, f.matrix)
|
|
|
|
|
matBuf = result.buffer
|
|
|
|
|
matUsed = result.used
|
|
|
|
|
axisMatrixStart[i] = result.start
|
|
|
|
|
} else {
|
|
|
|
|
axisMatrixStart[i] = -1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
frames: {
|
|
|
|
|
frameId,
|
|
|
|
|
frameKind,
|
|
|
|
|
ownerSubjectId,
|
|
|
|
|
parentFrameId,
|
|
|
|
|
originX,
|
|
|
|
|
originY,
|
|
|
|
|
axisMatrixStart,
|
|
|
|
|
clipRectId,
|
|
|
|
|
scrollContainerId,
|
|
|
|
|
writingMode,
|
|
|
|
|
},
|
|
|
|
|
matrices: { values: matBuf.subarray(0, matUsed) },
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeRects(raw: RawRect[]): Rects {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const rectId = new Uint32Array(n)
|
|
|
|
|
const left = new Float64Array(n)
|
|
|
|
|
const top = new Float64Array(n)
|
|
|
|
|
const right = new Float64Array(n)
|
|
|
|
|
const bottom = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const r = raw[i]
|
|
|
|
|
rectId[i] = r.id
|
|
|
|
|
left[i] = r.left
|
|
|
|
|
top[i] = r.top
|
|
|
|
|
right[i] = r.right
|
|
|
|
|
bottom[i] = r.bottom
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { rectId, left, top, right, bottom }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeBoxes(raw: RawBox[]): Boxes {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const boxId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const frameId = new Uint32Array(n)
|
|
|
|
|
const borderLeft = new Float64Array(n)
|
|
|
|
|
const borderTop = new Float64Array(n)
|
|
|
|
|
const borderRight = new Float64Array(n)
|
|
|
|
|
const borderBottom = new Float64Array(n)
|
|
|
|
|
const paddingLeft = new Float64Array(n)
|
|
|
|
|
const paddingTop = new Float64Array(n)
|
|
|
|
|
const paddingRight = new Float64Array(n)
|
|
|
|
|
const paddingBottom = new Float64Array(n)
|
|
|
|
|
const contentLeft = new Float64Array(n)
|
|
|
|
|
const contentTop = new Float64Array(n)
|
|
|
|
|
const contentRight = new Float64Array(n)
|
|
|
|
|
const contentBottom = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const b = raw[i]
|
|
|
|
|
boxId[i] = b.id
|
|
|
|
|
subjectId[i] = b.subjectId
|
|
|
|
|
frameId[i] = b.frameId
|
|
|
|
|
borderLeft[i] = b.borderLeft
|
|
|
|
|
borderTop[i] = b.borderTop
|
|
|
|
|
borderRight[i] = b.borderRight
|
|
|
|
|
borderBottom[i] = b.borderBottom
|
|
|
|
|
paddingLeft[i] = b.paddingLeft
|
|
|
|
|
paddingTop[i] = b.paddingTop
|
|
|
|
|
paddingRight[i] = b.paddingRight
|
|
|
|
|
paddingBottom[i] = b.paddingBottom
|
|
|
|
|
contentLeft[i] = b.contentLeft
|
|
|
|
|
contentTop[i] = b.contentTop
|
|
|
|
|
contentRight[i] = b.contentRight
|
|
|
|
|
contentBottom[i] = b.contentBottom
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { boxId, subjectId, frameId, borderLeft, borderTop, borderRight, borderBottom, paddingLeft, paddingTop, paddingRight, paddingBottom, contentLeft, contentTop, contentRight, contentBottom }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeFragments(raw: RawFragment[]): Fragments {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const fragmentId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const fragmentKind = new Uint16Array(n)
|
|
|
|
|
const boxLeft = new Float64Array(n)
|
|
|
|
|
const boxTop = new Float64Array(n)
|
|
|
|
|
const boxRight = new Float64Array(n)
|
|
|
|
|
const boxBottom = new Float64Array(n)
|
|
|
|
|
const lineIndex = new Int32Array(n)
|
|
|
|
|
const flowIndex = new Int32Array(n)
|
|
|
|
|
const parentFragmentId = new Uint32Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const f = raw[i]
|
|
|
|
|
fragmentId[i] = f.id
|
|
|
|
|
subjectId[i] = f.subjectId
|
|
|
|
|
fragmentKind[i] = FRAGMENT_KIND_MAP[f.kind] ?? FragmentKind.Line
|
|
|
|
|
boxLeft[i] = f.boxLeft
|
|
|
|
|
boxTop[i] = f.boxTop
|
|
|
|
|
boxRight[i] = f.boxRight
|
|
|
|
|
boxBottom[i] = f.boxBottom
|
|
|
|
|
lineIndex[i] = f.lineIndex
|
|
|
|
|
flowIndex[i] = f.flowIndex
|
|
|
|
|
parentFragmentId[i] = f.parentFragmentId
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { fragmentId, subjectId, fragmentKind, boxLeft, boxTop, boxRight, boxBottom, lineIndex, flowIndex, parentFragmentId }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeTransforms(
|
|
|
|
|
raw: RawTransform[],
|
|
|
|
|
matrices: Matrices
|
|
|
|
|
): { transforms: Transforms; matrices: Matrices } {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const transformId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const matrixStart = new Int32Array(n)
|
|
|
|
|
const matrixLength = new Uint16Array(n)
|
|
|
|
|
const originX = new Float64Array(n)
|
|
|
|
|
const originY = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
let matBuf = matrices.values
|
|
|
|
|
let matUsed = matrices.values.length
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const t = raw[i]
|
|
|
|
|
transformId[i] = t.id
|
|
|
|
|
subjectId[i] = t.subjectId
|
|
|
|
|
originX[i] = t.originX
|
|
|
|
|
originY[i] = t.originY
|
|
|
|
|
|
|
|
|
|
if (t.matrices && t.matrices.length > 0) {
|
|
|
|
|
const start = matUsed
|
|
|
|
|
for (const m of t.matrices) {
|
|
|
|
|
const result = appendMatrix(matBuf, matUsed, m)
|
|
|
|
|
matBuf = result.buffer
|
|
|
|
|
matUsed = result.used
|
|
|
|
|
}
|
|
|
|
|
matrixStart[i] = start
|
|
|
|
|
matrixLength[i] = t.matrices.length
|
|
|
|
|
} else {
|
|
|
|
|
matrixStart[i] = -1
|
|
|
|
|
matrixLength[i] = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
transforms: { transformId, subjectId, matrixStart, matrixLength, originX, originY },
|
|
|
|
|
matrices: { values: matBuf.subarray(0, matUsed) },
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeStyles(raw: RawStyle[], strings: StringTable): Styles {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const display = new Uint16Array(n)
|
|
|
|
|
const position = new Uint16Array(n)
|
|
|
|
|
const zIndexKind = new Uint8Array(n)
|
|
|
|
|
const zIndexValue = new Int32Array(n)
|
|
|
|
|
const overflowX = new Uint16Array(n)
|
|
|
|
|
const overflowY = new Uint16Array(n)
|
|
|
|
|
const opacity = new Float32Array(n)
|
|
|
|
|
const visibility = new Uint16Array(n)
|
|
|
|
|
const containFlags = new Uint32Array(n)
|
|
|
|
|
const pointerEvents = new Uint16Array(n)
|
|
|
|
|
const lineHeight = new Float64Array(n)
|
|
|
|
|
const fontFamilyStringId = new Uint32Array(n)
|
|
|
|
|
const fontSize = new Float64Array(n)
|
|
|
|
|
const fontWeight = new Uint16Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const s = raw[i]
|
|
|
|
|
subjectId[i] = s.subjectId
|
|
|
|
|
display[i] = DISPLAY_MAP[s.display] ?? DisplayValue.Block
|
|
|
|
|
position[i] = POSITION_MAP[s.position] ?? PositionValue.Static
|
|
|
|
|
|
|
|
|
|
if (typeof s.zIndex === 'number') {
|
|
|
|
|
zIndexKind[i] = 1 // explicit integer
|
|
|
|
|
zIndexValue[i] = s.zIndex
|
|
|
|
|
} else if (s.zIndex === 'auto') {
|
|
|
|
|
zIndexKind[i] = 0
|
|
|
|
|
zIndexValue[i] = 0
|
|
|
|
|
} else {
|
|
|
|
|
zIndexKind[i] = 0
|
|
|
|
|
zIndexValue[i] = 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
overflowX[i] = OVERFLOW_MAP[s.overflowX] ?? OverflowValue.Visible
|
|
|
|
|
overflowY[i] = OVERFLOW_MAP[s.overflowY] ?? OverflowValue.Visible
|
|
|
|
|
opacity[i] = s.opacity
|
|
|
|
|
visibility[i] = VISIBILITY_MAP[s.visibility] ?? VisibilityValue.Visible
|
2026-05-21 13:20:57 -07:00
|
|
|
if (s.contain) {
|
|
|
|
|
for (const part of s.contain.split(/\s+/)) {
|
|
|
|
|
containFlags[i] |= CONTAIN_MAP[part.toLowerCase()] ?? 0
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-08-15 10:00:00 -07:00
|
|
|
pointerEvents[i] = internString(strings, s.pointerEvents)
|
|
|
|
|
lineHeight[i] = s.lineHeight
|
|
|
|
|
fontFamilyStringId[i] = internString(strings, s.fontFamily)
|
|
|
|
|
fontSize[i] = s.fontSize
|
|
|
|
|
fontWeight[i] = s.fontWeight
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { subjectId, display, position, zIndexKind, zIndexValue, overflowX, overflowY, opacity, visibility, containFlags, pointerEvents, lineHeight, fontFamilyStringId, fontSize, fontWeight }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeText(raw: RawTextRun[], strings: StringTable): TextRuns {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const runId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const contentStringId = new Uint32Array(n)
|
|
|
|
|
const lineBoxId = new Uint32Array(n)
|
|
|
|
|
const inkLeft = new Float64Array(n)
|
|
|
|
|
const inkTop = new Float64Array(n)
|
|
|
|
|
const inkRight = new Float64Array(n)
|
|
|
|
|
const inkBottom = new Float64Array(n)
|
|
|
|
|
const baselineY = new Float64Array(n)
|
|
|
|
|
const capHeight = new Float64Array(n)
|
|
|
|
|
const computedLineHeight = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const t = raw[i]
|
|
|
|
|
runId[i] = t.id
|
|
|
|
|
subjectId[i] = t.subjectId
|
|
|
|
|
contentStringId[i] = internString(strings, t.content)
|
|
|
|
|
lineBoxId[i] = t.lineBoxId
|
|
|
|
|
inkLeft[i] = t.inkLeft
|
|
|
|
|
inkTop[i] = t.inkTop
|
|
|
|
|
inkRight[i] = t.inkRight
|
|
|
|
|
inkBottom[i] = t.inkBottom
|
|
|
|
|
baselineY[i] = t.baselineY
|
|
|
|
|
capHeight[i] = t.capHeight
|
|
|
|
|
computedLineHeight[i] = t.computedLineHeight
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { runId, subjectId, contentStringId, lineBoxId, inkLeft, inkTop, inkRight, inkBottom, baselineY, capHeight, computedLineHeight }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeTopology(raw: RawTopology, subjectCount: number): Topology {
|
|
|
|
|
// Avoid intermediate Array.fill allocations — Uint32Array/Uint16Array constructors
|
|
|
|
|
// zero-initialize automatically when given a length.
|
|
|
|
|
return {
|
|
|
|
|
containingBlockOf: raw.containingBlockOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.containingBlockOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
nearestPositionedAncestorOf: raw.nearestPositionedAncestorOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.nearestPositionedAncestorOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
scrollContainerOf: raw.scrollContainerOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.scrollContainerOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
stackingContextOf: raw.stackingContextOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.stackingContextOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
formattingContextOf: raw.formattingContextOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.formattingContextOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
clippingRootOf: raw.clippingRootOf.length > 0
|
|
|
|
|
? new Uint32Array(raw.clippingRootOf)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
paintOrderBucket: raw.paintOrderBucket.length > 0
|
|
|
|
|
? new Uint16Array(raw.paintOrderBucket)
|
|
|
|
|
: new Uint16Array(subjectCount),
|
|
|
|
|
paintOrderIndex: raw.paintOrderIndex.length > 0
|
|
|
|
|
? new Uint32Array(raw.paintOrderIndex)
|
|
|
|
|
: new Uint32Array(subjectCount),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeScroll(raw: RawScroll[]): Scroll {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const containerId = new Uint32Array(n)
|
|
|
|
|
const scrollLeft = new Float64Array(n)
|
|
|
|
|
const scrollTop = new Float64Array(n)
|
|
|
|
|
const scrollWidth = new Float64Array(n)
|
|
|
|
|
const scrollHeight = new Float64Array(n)
|
|
|
|
|
const clientWidth = new Float64Array(n)
|
|
|
|
|
const clientHeight = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const s = raw[i]
|
|
|
|
|
containerId[i] = s.containerId
|
|
|
|
|
scrollLeft[i] = s.scrollLeft
|
|
|
|
|
scrollTop[i] = s.scrollTop
|
|
|
|
|
scrollWidth[i] = s.scrollWidth
|
|
|
|
|
scrollHeight[i] = s.scrollHeight
|
|
|
|
|
clientWidth[i] = s.clientWidth
|
|
|
|
|
clientHeight[i] = s.clientHeight
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { containerId, scrollLeft, scrollTop, scrollWidth, scrollHeight, clientWidth, clientHeight }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeClipping(raw: RawClip[], strings: StringTable): Clipping {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const clipNodeId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const clipKind = new Uint16Array(n)
|
|
|
|
|
const clipLeft = new Float64Array(n)
|
|
|
|
|
const clipTop = new Float64Array(n)
|
|
|
|
|
const clipRight = new Float64Array(n)
|
|
|
|
|
const clipBottom = new Float64Array(n)
|
|
|
|
|
const parentClipNodeId = new Uint32Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const c = raw[i]
|
|
|
|
|
clipNodeId[i] = c.nodeId
|
|
|
|
|
subjectId[i] = c.subjectId
|
|
|
|
|
clipKind[i] = CLIP_KIND_MAP[c.kind] ?? ClipKind.Overflow
|
|
|
|
|
clipLeft[i] = c.left
|
|
|
|
|
clipTop[i] = c.top
|
|
|
|
|
clipRight[i] = c.right
|
|
|
|
|
clipBottom[i] = c.bottom
|
|
|
|
|
parentClipNodeId[i] = c.parentNodeId
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { clipNodeId, subjectId, clipKind, clipLeft, clipTop, clipRight, clipBottom, parentClipNodeId }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizePaint(raw: RawPaint[]): Paint {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const paintNodeId = new Uint32Array(n)
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const stackingContextId = new Uint32Array(n)
|
|
|
|
|
const bucket = new Uint16Array(n)
|
|
|
|
|
const localPaintIndex = new Uint32Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const p = raw[i]
|
|
|
|
|
paintNodeId[i] = p.nodeId
|
|
|
|
|
subjectId[i] = p.subjectId
|
|
|
|
|
stackingContextId[i] = p.stackingContextId
|
|
|
|
|
bucket[i] = p.bucket
|
|
|
|
|
localPaintIndex[i] = p.localPaintIndex
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { paintNodeId, subjectId, stackingContextId, bucket, localPaintIndex }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeVisibility(raw: RawVisibility[]): Visibility {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const subjectId = new Uint32Array(n)
|
|
|
|
|
const isRendered = new Uint8Array(n)
|
|
|
|
|
const isVisible = new Uint8Array(n)
|
|
|
|
|
const visibleArea = new Float64Array(n)
|
|
|
|
|
const clippedArea = new Float64Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const v = raw[i]
|
|
|
|
|
subjectId[i] = v.subjectId
|
|
|
|
|
isRendered[i] = v.isRendered ? 1 : 0
|
|
|
|
|
isVisible[i] = v.isVisible ? 1 : 0
|
|
|
|
|
visibleArea[i] = v.visibleArea
|
|
|
|
|
clippedArea[i] = v.clippedArea
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { subjectId, isRendered, isVisible, visibleArea, clippedArea }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeProvenance(raw: RawProvenance[], strings: StringTable): Provenance {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const factId = new Uint32Array(n)
|
|
|
|
|
const extractionStepId = new Uint32Array(n)
|
|
|
|
|
const sourceKind = new Uint16Array(n)
|
|
|
|
|
const sourceRef = new Uint32Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const p = raw[i]
|
|
|
|
|
factId[i] = p.factId
|
|
|
|
|
extractionStepId[i] = p.extractionStepId
|
|
|
|
|
sourceKind[i] = internString(strings, p.sourceKind)
|
|
|
|
|
sourceRef[i] = p.sourceRef
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { factId, extractionStepId, sourceKind, sourceRef }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function normalizeConfidence(raw: RawConfidence[], strings: StringTable): Confidence {
|
|
|
|
|
const n = raw.length
|
|
|
|
|
const factId = new Uint32Array(n)
|
|
|
|
|
const confidence = new Float32Array(n)
|
|
|
|
|
const reasonCode = new Uint16Array(n)
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < n; i++) {
|
|
|
|
|
const c = raw[i]
|
|
|
|
|
factId[i] = c.factId
|
|
|
|
|
confidence[i] = c.confidence
|
|
|
|
|
reasonCode[i] = internString(strings, c.reasonCode)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { factId, confidence, reasonCode }
|
|
|
|
|
}
|