v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,392 @@
|
||||
/**
|
||||
* Canonical Adapter
|
||||
*
|
||||
* Converts raw CDP extraction output into the canonical GeometryWorld
|
||||
* shape defined by imhotep-core. This adapter isolates CDP-specific
|
||||
* structures from the canonical representation.
|
||||
*/
|
||||
|
||||
import type { GeometryWorldSnapshot } from './extractor.js'
|
||||
import type { DOMExtractionResult } from './dom.js'
|
||||
|
||||
// We define local canonical interfaces to avoid cross-package import
|
||||
// failures when imhotep-core dist is stale. These mirror the core
|
||||
// canonical contracts exactly.
|
||||
|
||||
export interface CanonicalWorldEnvironment {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
deviceScaleFactor: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
hover: boolean
|
||||
reducedMotion: string
|
||||
locale: string
|
||||
writingMode: string
|
||||
}
|
||||
|
||||
export interface CanonicalWorldSource {
|
||||
url: string
|
||||
browserName: string
|
||||
browserVersion: string
|
||||
engine: 'chromium-cdp'
|
||||
extractedAt: number
|
||||
}
|
||||
|
||||
export interface CanonicalStringTable {
|
||||
values: string[]
|
||||
}
|
||||
|
||||
export interface CanonicalSubjectsTable {
|
||||
ids: number[]
|
||||
domNodeId: number[]
|
||||
subjectKind: number[]
|
||||
primaryBoxId: number[]
|
||||
firstFragmentId: number[]
|
||||
fragmentCount: number[]
|
||||
firstTextRunId: number[]
|
||||
textRunCount: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalDomTable {
|
||||
nodeId: number[]
|
||||
backendNodeId: number[]
|
||||
parentNodeId: number[]
|
||||
firstChildIndex: number[]
|
||||
childCount: number[]
|
||||
shadowRootKind: number[]
|
||||
tagNameStringId: number[]
|
||||
roleStringId: number[]
|
||||
ariaNameStringId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalFramesTable {
|
||||
frameId: number[]
|
||||
frameKind: number[]
|
||||
ownerSubjectId: number[]
|
||||
parentFrameId: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
axisMatrixStart: number[]
|
||||
clipRectId: number[]
|
||||
scrollContainerId: number[]
|
||||
writingMode: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalMatricesTable {
|
||||
values: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalRectsTable {
|
||||
rectId: number[]
|
||||
left: number[]
|
||||
top: number[]
|
||||
right: number[]
|
||||
bottom: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalBoxesTable {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalFragmentsTable {
|
||||
fragmentId: number[]
|
||||
subjectId: number[]
|
||||
fragmentKind: number[]
|
||||
boxLeft: number[]
|
||||
boxTop: number[]
|
||||
boxRight: number[]
|
||||
boxBottom: number[]
|
||||
lineIndex: number[]
|
||||
flowIndex: number[]
|
||||
parentFragmentId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTransformsTable {
|
||||
transformId: number[]
|
||||
subjectId: number[]
|
||||
matrixStart: number[]
|
||||
matrixLength: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalStylesTable {
|
||||
subjectId: number[]
|
||||
display: number[]
|
||||
position: number[]
|
||||
zIndexKind: number[]
|
||||
zIndexValue: number[]
|
||||
overflowX: number[]
|
||||
overflowY: number[]
|
||||
opacity: number[]
|
||||
visibility: number[]
|
||||
containFlags: number[]
|
||||
pointerEvents: number[]
|
||||
lineHeight: number[]
|
||||
fontFamilyStringId: number[]
|
||||
fontSize: number[]
|
||||
fontWeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTextTable {
|
||||
runId: number[]
|
||||
subjectId: number[]
|
||||
contentStringId: number[]
|
||||
lineBoxId: number[]
|
||||
inkLeft: number[]
|
||||
inkTop: number[]
|
||||
inkRight: number[]
|
||||
inkBottom: number[]
|
||||
baselineY: number[]
|
||||
capHeight: number[]
|
||||
computedLineHeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTopologyTable {
|
||||
containingBlockOf: number[]
|
||||
nearestPositionedAncestorOf: number[]
|
||||
scrollContainerOf: number[]
|
||||
stackingContextOf: number[]
|
||||
formattingContextOf: number[]
|
||||
clippingRootOf: number[]
|
||||
paintOrderBucket: number[]
|
||||
paintOrderIndex: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalScrollTable {
|
||||
containerId: number[]
|
||||
scrollLeft: number[]
|
||||
scrollTop: number[]
|
||||
scrollWidth: number[]
|
||||
scrollHeight: number[]
|
||||
clientWidth: number[]
|
||||
clientHeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalClippingTable {
|
||||
clipNodeId: number[]
|
||||
subjectId: number[]
|
||||
clipKind: number[]
|
||||
clipLeft: number[]
|
||||
clipTop: number[]
|
||||
clipRight: number[]
|
||||
clipBottom: number[]
|
||||
parentClipNodeId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalPaintTable {
|
||||
paintNodeId: number[]
|
||||
subjectId: number[]
|
||||
stackingContextId: number[]
|
||||
bucket: number[]
|
||||
localPaintIndex: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalVisibilityTable {
|
||||
subjectId: number[]
|
||||
isRendered: number[]
|
||||
isVisible: number[]
|
||||
visibleArea: number[]
|
||||
clippedArea: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalProvenanceTable {
|
||||
factId: number[]
|
||||
extractionStepId: number[]
|
||||
sourceKind: number[]
|
||||
sourceRef: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalConfidenceTable {
|
||||
factId: number[]
|
||||
confidence: number[]
|
||||
reasonCode: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalGeometryWorld {
|
||||
sceneId: string
|
||||
snapshotId: string
|
||||
env: CanonicalWorldEnvironment
|
||||
source: CanonicalWorldSource
|
||||
strings: CanonicalStringTable
|
||||
subjects: CanonicalSubjectsTable
|
||||
dom: CanonicalDomTable
|
||||
frames: CanonicalFramesTable
|
||||
matrices: CanonicalMatricesTable
|
||||
rects: CanonicalRectsTable
|
||||
boxes: CanonicalBoxesTable
|
||||
visualBoxes: CanonicalBoxesTable
|
||||
fragments: CanonicalFragmentsTable
|
||||
transforms: CanonicalTransformsTable
|
||||
styles: CanonicalStylesTable
|
||||
text: CanonicalTextTable
|
||||
topology: CanonicalTopologyTable
|
||||
scroll: CanonicalScrollTable
|
||||
clipping: CanonicalClippingTable
|
||||
paint: CanonicalPaintTable
|
||||
visibility: CanonicalVisibilityTable
|
||||
provenance: CanonicalProvenanceTable
|
||||
confidence: CanonicalConfidenceTable
|
||||
}
|
||||
|
||||
function adaptDom(dom: DOMExtractionResult): CanonicalDomTable {
|
||||
const nodeId: number[] = []
|
||||
const backendNodeId: number[] = []
|
||||
const parentNodeId: number[] = []
|
||||
const firstChildIndex: number[] = []
|
||||
const childCount: number[] = []
|
||||
const shadowRootKind: number[] = []
|
||||
const tagNameStringId: number[] = []
|
||||
const roleStringId: number[] = []
|
||||
const ariaNameStringId: number[] = []
|
||||
|
||||
for (const node of dom.nodes) {
|
||||
nodeId.push(node.nodeId)
|
||||
backendNodeId.push(node.backendNodeId)
|
||||
parentNodeId.push(node.parentNodeId)
|
||||
firstChildIndex.push(node.firstChildIndex)
|
||||
childCount.push(node.childCount)
|
||||
shadowRootKind.push(node.shadowRootKind === 'open' ? 1 : node.shadowRootKind === 'closed' ? 2 : 0)
|
||||
tagNameStringId.push(node.tagNameStringId)
|
||||
roleStringId.push(node.roleStringId)
|
||||
ariaNameStringId.push(node.ariaNameStringId)
|
||||
}
|
||||
|
||||
return {
|
||||
nodeId,
|
||||
backendNodeId,
|
||||
parentNodeId,
|
||||
firstChildIndex,
|
||||
childCount,
|
||||
shadowRootKind,
|
||||
tagNameStringId,
|
||||
roleStringId,
|
||||
ariaNameStringId,
|
||||
}
|
||||
}
|
||||
|
||||
function adaptStringTable(strings: string[]): CanonicalStringTable {
|
||||
return { values: strings }
|
||||
}
|
||||
|
||||
function adaptTimestamp(iso: string): number {
|
||||
return new Date(iso).getTime()
|
||||
}
|
||||
|
||||
function emptyTextTable(): CanonicalTextTable {
|
||||
return {
|
||||
runId: [],
|
||||
subjectId: [],
|
||||
contentStringId: [],
|
||||
lineBoxId: [],
|
||||
inkLeft: [],
|
||||
inkTop: [],
|
||||
inkRight: [],
|
||||
inkBottom: [],
|
||||
baselineY: [],
|
||||
capHeight: [],
|
||||
computedLineHeight: [],
|
||||
}
|
||||
}
|
||||
|
||||
function emptyPaintTable(): CanonicalPaintTable {
|
||||
return {
|
||||
paintNodeId: [],
|
||||
subjectId: [],
|
||||
stackingContextId: [],
|
||||
bucket: [],
|
||||
localPaintIndex: [],
|
||||
}
|
||||
}
|
||||
|
||||
function emptyVisibilityTable(subjectCount: number): CanonicalVisibilityTable {
|
||||
return {
|
||||
subjectId: Array.from({ length: subjectCount }, (_, i) => i),
|
||||
isRendered: Array(subjectCount).fill(1),
|
||||
isVisible: Array(subjectCount).fill(1),
|
||||
visibleArea: Array(subjectCount).fill(0),
|
||||
clippedArea: Array(subjectCount).fill(0),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a CDP GeometryWorldSnapshot into the canonical GeometryWorld shape.
|
||||
*
|
||||
* This is a lossless structural mapping: CDP-specific structures (like the
|
||||
* DOM tree object) are flattened into columnar arrays, and ISO timestamps
|
||||
* are converted to epoch millis.
|
||||
*/
|
||||
export function adaptSnapshotToCanonical(snapshot: GeometryWorldSnapshot): CanonicalGeometryWorld {
|
||||
return {
|
||||
sceneId: snapshot.sceneId,
|
||||
snapshotId: snapshot.snapshotId,
|
||||
env: snapshot.env,
|
||||
source: {
|
||||
url: snapshot.source.url,
|
||||
browserName: snapshot.source.browserName,
|
||||
browserVersion: snapshot.source.browserVersion,
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: adaptTimestamp(snapshot.source.extractedAt),
|
||||
},
|
||||
strings: adaptStringTable(snapshot.strings),
|
||||
subjects: snapshot.subjects,
|
||||
dom: adaptDom(snapshot.dom),
|
||||
frames: snapshot.frames,
|
||||
matrices: snapshot.matrices,
|
||||
rects: snapshot.rects,
|
||||
boxes: snapshot.boxes,
|
||||
visualBoxes: snapshot.visualBoxes,
|
||||
fragments: snapshot.fragments,
|
||||
transforms: snapshot.transforms,
|
||||
styles: snapshot.styles,
|
||||
text: emptyTextTable(),
|
||||
topology: snapshot.topology,
|
||||
scroll: {
|
||||
containerId: snapshot.scroll.map((s) => s.containerId),
|
||||
scrollLeft: snapshot.scroll.map((s) => s.scrollLeft),
|
||||
scrollTop: snapshot.scroll.map((s) => s.scrollTop),
|
||||
scrollWidth: snapshot.scroll.map((s) => s.scrollWidth),
|
||||
scrollHeight: snapshot.scroll.map((s) => s.scrollHeight),
|
||||
clientWidth: snapshot.scroll.map((s) => s.clientWidth),
|
||||
clientHeight: snapshot.scroll.map((s) => s.clientHeight),
|
||||
},
|
||||
clipping: {
|
||||
clipNodeId: snapshot.clipping.map((c) => c.clipNodeId),
|
||||
subjectId: snapshot.clipping.map((c) => c.subjectId),
|
||||
clipKind: snapshot.clipping.map((c) => c.clipKind),
|
||||
clipLeft: snapshot.clipping.map((c) => c.clipLeft),
|
||||
clipTop: snapshot.clipping.map((c) => c.clipTop),
|
||||
clipRight: snapshot.clipping.map((c) => c.clipRight),
|
||||
clipBottom: snapshot.clipping.map((c) => c.clipBottom),
|
||||
parentClipNodeId: snapshot.clipping.map((c) => c.parentClipNodeId),
|
||||
},
|
||||
paint: emptyPaintTable(),
|
||||
visibility: emptyVisibilityTable(snapshot.subjects.ids.length),
|
||||
provenance: {
|
||||
factId: snapshot.provenance.map((p) => p.factId),
|
||||
extractionStepId: snapshot.provenance.map((p) => p.extractionStepId),
|
||||
sourceKind: snapshot.provenance.map((p) => p.sourceKind),
|
||||
sourceRef: snapshot.provenance.map((p) => p.sourceRef),
|
||||
},
|
||||
confidence: {
|
||||
factId: snapshot.confidence.map((c) => c.factId),
|
||||
confidence: snapshot.confidence.map((c) => c.confidence),
|
||||
reasonCode: snapshot.confidence.map((c) => c.reasonCode),
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user