v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"name": "imhotep-cdp",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/anomalyco/imhotep.git"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"files": [
|
||||
"dist"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"test": "node --test dist/**/*.test.js"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"default": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"dependencies": {
|
||||
"imhotep-core": "^1.0.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,392 @@
|
||||
/**
|
||||
* Canonical Adapter
|
||||
*
|
||||
* Converts raw CDP extraction output into the canonical GeometryWorld
|
||||
* shape defined by imhotep-core. This adapter isolates CDP-specific
|
||||
* structures from the canonical representation.
|
||||
*/
|
||||
|
||||
import type { GeometryWorldSnapshot } from './extractor.js'
|
||||
import type { DOMExtractionResult } from './dom.js'
|
||||
|
||||
// We define local canonical interfaces to avoid cross-package import
|
||||
// failures when imhotep-core dist is stale. These mirror the core
|
||||
// canonical contracts exactly.
|
||||
|
||||
export interface CanonicalWorldEnvironment {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
deviceScaleFactor: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
hover: boolean
|
||||
reducedMotion: string
|
||||
locale: string
|
||||
writingMode: string
|
||||
}
|
||||
|
||||
export interface CanonicalWorldSource {
|
||||
url: string
|
||||
browserName: string
|
||||
browserVersion: string
|
||||
engine: 'chromium-cdp'
|
||||
extractedAt: number
|
||||
}
|
||||
|
||||
export interface CanonicalStringTable {
|
||||
values: string[]
|
||||
}
|
||||
|
||||
export interface CanonicalSubjectsTable {
|
||||
ids: number[]
|
||||
domNodeId: number[]
|
||||
subjectKind: number[]
|
||||
primaryBoxId: number[]
|
||||
firstFragmentId: number[]
|
||||
fragmentCount: number[]
|
||||
firstTextRunId: number[]
|
||||
textRunCount: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalDomTable {
|
||||
nodeId: number[]
|
||||
backendNodeId: number[]
|
||||
parentNodeId: number[]
|
||||
firstChildIndex: number[]
|
||||
childCount: number[]
|
||||
shadowRootKind: number[]
|
||||
tagNameStringId: number[]
|
||||
roleStringId: number[]
|
||||
ariaNameStringId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalFramesTable {
|
||||
frameId: number[]
|
||||
frameKind: number[]
|
||||
ownerSubjectId: number[]
|
||||
parentFrameId: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
axisMatrixStart: number[]
|
||||
clipRectId: number[]
|
||||
scrollContainerId: number[]
|
||||
writingMode: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalMatricesTable {
|
||||
values: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalRectsTable {
|
||||
rectId: number[]
|
||||
left: number[]
|
||||
top: number[]
|
||||
right: number[]
|
||||
bottom: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalBoxesTable {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalFragmentsTable {
|
||||
fragmentId: number[]
|
||||
subjectId: number[]
|
||||
fragmentKind: number[]
|
||||
boxLeft: number[]
|
||||
boxTop: number[]
|
||||
boxRight: number[]
|
||||
boxBottom: number[]
|
||||
lineIndex: number[]
|
||||
flowIndex: number[]
|
||||
parentFragmentId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTransformsTable {
|
||||
transformId: number[]
|
||||
subjectId: number[]
|
||||
matrixStart: number[]
|
||||
matrixLength: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalStylesTable {
|
||||
subjectId: number[]
|
||||
display: number[]
|
||||
position: number[]
|
||||
zIndexKind: number[]
|
||||
zIndexValue: number[]
|
||||
overflowX: number[]
|
||||
overflowY: number[]
|
||||
opacity: number[]
|
||||
visibility: number[]
|
||||
containFlags: number[]
|
||||
pointerEvents: number[]
|
||||
lineHeight: number[]
|
||||
fontFamilyStringId: number[]
|
||||
fontSize: number[]
|
||||
fontWeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTextTable {
|
||||
runId: number[]
|
||||
subjectId: number[]
|
||||
contentStringId: number[]
|
||||
lineBoxId: number[]
|
||||
inkLeft: number[]
|
||||
inkTop: number[]
|
||||
inkRight: number[]
|
||||
inkBottom: number[]
|
||||
baselineY: number[]
|
||||
capHeight: number[]
|
||||
computedLineHeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalTopologyTable {
|
||||
containingBlockOf: number[]
|
||||
nearestPositionedAncestorOf: number[]
|
||||
scrollContainerOf: number[]
|
||||
stackingContextOf: number[]
|
||||
formattingContextOf: number[]
|
||||
clippingRootOf: number[]
|
||||
paintOrderBucket: number[]
|
||||
paintOrderIndex: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalScrollTable {
|
||||
containerId: number[]
|
||||
scrollLeft: number[]
|
||||
scrollTop: number[]
|
||||
scrollWidth: number[]
|
||||
scrollHeight: number[]
|
||||
clientWidth: number[]
|
||||
clientHeight: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalClippingTable {
|
||||
clipNodeId: number[]
|
||||
subjectId: number[]
|
||||
clipKind: number[]
|
||||
clipLeft: number[]
|
||||
clipTop: number[]
|
||||
clipRight: number[]
|
||||
clipBottom: number[]
|
||||
parentClipNodeId: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalPaintTable {
|
||||
paintNodeId: number[]
|
||||
subjectId: number[]
|
||||
stackingContextId: number[]
|
||||
bucket: number[]
|
||||
localPaintIndex: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalVisibilityTable {
|
||||
subjectId: number[]
|
||||
isRendered: number[]
|
||||
isVisible: number[]
|
||||
visibleArea: number[]
|
||||
clippedArea: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalProvenanceTable {
|
||||
factId: number[]
|
||||
extractionStepId: number[]
|
||||
sourceKind: number[]
|
||||
sourceRef: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalConfidenceTable {
|
||||
factId: number[]
|
||||
confidence: number[]
|
||||
reasonCode: number[]
|
||||
}
|
||||
|
||||
export interface CanonicalGeometryWorld {
|
||||
sceneId: string
|
||||
snapshotId: string
|
||||
env: CanonicalWorldEnvironment
|
||||
source: CanonicalWorldSource
|
||||
strings: CanonicalStringTable
|
||||
subjects: CanonicalSubjectsTable
|
||||
dom: CanonicalDomTable
|
||||
frames: CanonicalFramesTable
|
||||
matrices: CanonicalMatricesTable
|
||||
rects: CanonicalRectsTable
|
||||
boxes: CanonicalBoxesTable
|
||||
visualBoxes: CanonicalBoxesTable
|
||||
fragments: CanonicalFragmentsTable
|
||||
transforms: CanonicalTransformsTable
|
||||
styles: CanonicalStylesTable
|
||||
text: CanonicalTextTable
|
||||
topology: CanonicalTopologyTable
|
||||
scroll: CanonicalScrollTable
|
||||
clipping: CanonicalClippingTable
|
||||
paint: CanonicalPaintTable
|
||||
visibility: CanonicalVisibilityTable
|
||||
provenance: CanonicalProvenanceTable
|
||||
confidence: CanonicalConfidenceTable
|
||||
}
|
||||
|
||||
function adaptDom(dom: DOMExtractionResult): CanonicalDomTable {
|
||||
const nodeId: number[] = []
|
||||
const backendNodeId: number[] = []
|
||||
const parentNodeId: number[] = []
|
||||
const firstChildIndex: number[] = []
|
||||
const childCount: number[] = []
|
||||
const shadowRootKind: number[] = []
|
||||
const tagNameStringId: number[] = []
|
||||
const roleStringId: number[] = []
|
||||
const ariaNameStringId: number[] = []
|
||||
|
||||
for (const node of dom.nodes) {
|
||||
nodeId.push(node.nodeId)
|
||||
backendNodeId.push(node.backendNodeId)
|
||||
parentNodeId.push(node.parentNodeId)
|
||||
firstChildIndex.push(node.firstChildIndex)
|
||||
childCount.push(node.childCount)
|
||||
shadowRootKind.push(node.shadowRootKind === 'open' ? 1 : node.shadowRootKind === 'closed' ? 2 : 0)
|
||||
tagNameStringId.push(node.tagNameStringId)
|
||||
roleStringId.push(node.roleStringId)
|
||||
ariaNameStringId.push(node.ariaNameStringId)
|
||||
}
|
||||
|
||||
return {
|
||||
nodeId,
|
||||
backendNodeId,
|
||||
parentNodeId,
|
||||
firstChildIndex,
|
||||
childCount,
|
||||
shadowRootKind,
|
||||
tagNameStringId,
|
||||
roleStringId,
|
||||
ariaNameStringId,
|
||||
}
|
||||
}
|
||||
|
||||
function adaptStringTable(strings: string[]): CanonicalStringTable {
|
||||
return { values: strings }
|
||||
}
|
||||
|
||||
function adaptTimestamp(iso: string): number {
|
||||
return new Date(iso).getTime()
|
||||
}
|
||||
|
||||
function emptyTextTable(): CanonicalTextTable {
|
||||
return {
|
||||
runId: [],
|
||||
subjectId: [],
|
||||
contentStringId: [],
|
||||
lineBoxId: [],
|
||||
inkLeft: [],
|
||||
inkTop: [],
|
||||
inkRight: [],
|
||||
inkBottom: [],
|
||||
baselineY: [],
|
||||
capHeight: [],
|
||||
computedLineHeight: [],
|
||||
}
|
||||
}
|
||||
|
||||
function emptyPaintTable(): CanonicalPaintTable {
|
||||
return {
|
||||
paintNodeId: [],
|
||||
subjectId: [],
|
||||
stackingContextId: [],
|
||||
bucket: [],
|
||||
localPaintIndex: [],
|
||||
}
|
||||
}
|
||||
|
||||
function emptyVisibilityTable(subjectCount: number): CanonicalVisibilityTable {
|
||||
return {
|
||||
subjectId: Array.from({ length: subjectCount }, (_, i) => i),
|
||||
isRendered: Array(subjectCount).fill(1),
|
||||
isVisible: Array(subjectCount).fill(1),
|
||||
visibleArea: Array(subjectCount).fill(0),
|
||||
clippedArea: Array(subjectCount).fill(0),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a CDP GeometryWorldSnapshot into the canonical GeometryWorld shape.
|
||||
*
|
||||
* This is a lossless structural mapping: CDP-specific structures (like the
|
||||
* DOM tree object) are flattened into columnar arrays, and ISO timestamps
|
||||
* are converted to epoch millis.
|
||||
*/
|
||||
export function adaptSnapshotToCanonical(snapshot: GeometryWorldSnapshot): CanonicalGeometryWorld {
|
||||
return {
|
||||
sceneId: snapshot.sceneId,
|
||||
snapshotId: snapshot.snapshotId,
|
||||
env: snapshot.env,
|
||||
source: {
|
||||
url: snapshot.source.url,
|
||||
browserName: snapshot.source.browserName,
|
||||
browserVersion: snapshot.source.browserVersion,
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: adaptTimestamp(snapshot.source.extractedAt),
|
||||
},
|
||||
strings: adaptStringTable(snapshot.strings),
|
||||
subjects: snapshot.subjects,
|
||||
dom: adaptDom(snapshot.dom),
|
||||
frames: snapshot.frames,
|
||||
matrices: snapshot.matrices,
|
||||
rects: snapshot.rects,
|
||||
boxes: snapshot.boxes,
|
||||
visualBoxes: snapshot.visualBoxes,
|
||||
fragments: snapshot.fragments,
|
||||
transforms: snapshot.transforms,
|
||||
styles: snapshot.styles,
|
||||
text: emptyTextTable(),
|
||||
topology: snapshot.topology,
|
||||
scroll: {
|
||||
containerId: snapshot.scroll.map((s) => s.containerId),
|
||||
scrollLeft: snapshot.scroll.map((s) => s.scrollLeft),
|
||||
scrollTop: snapshot.scroll.map((s) => s.scrollTop),
|
||||
scrollWidth: snapshot.scroll.map((s) => s.scrollWidth),
|
||||
scrollHeight: snapshot.scroll.map((s) => s.scrollHeight),
|
||||
clientWidth: snapshot.scroll.map((s) => s.clientWidth),
|
||||
clientHeight: snapshot.scroll.map((s) => s.clientHeight),
|
||||
},
|
||||
clipping: {
|
||||
clipNodeId: snapshot.clipping.map((c) => c.clipNodeId),
|
||||
subjectId: snapshot.clipping.map((c) => c.subjectId),
|
||||
clipKind: snapshot.clipping.map((c) => c.clipKind),
|
||||
clipLeft: snapshot.clipping.map((c) => c.clipLeft),
|
||||
clipTop: snapshot.clipping.map((c) => c.clipTop),
|
||||
clipRight: snapshot.clipping.map((c) => c.clipRight),
|
||||
clipBottom: snapshot.clipping.map((c) => c.clipBottom),
|
||||
parentClipNodeId: snapshot.clipping.map((c) => c.parentClipNodeId),
|
||||
},
|
||||
paint: emptyPaintTable(),
|
||||
visibility: emptyVisibilityTable(snapshot.subjects.ids.length),
|
||||
provenance: {
|
||||
factId: snapshot.provenance.map((p) => p.factId),
|
||||
extractionStepId: snapshot.provenance.map((p) => p.extractionStepId),
|
||||
sourceKind: snapshot.provenance.map((p) => p.sourceKind),
|
||||
sourceRef: snapshot.provenance.map((p) => p.sourceRef),
|
||||
},
|
||||
confidence: {
|
||||
factId: snapshot.confidence.map((c) => c.factId),
|
||||
confidence: snapshot.confidence.map((c) => c.confidence),
|
||||
reasonCode: snapshot.confidence.map((c) => c.reasonCode),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* DOM Extraction
|
||||
*
|
||||
* Captures the DOM tree, node identifiers, selectors, and shadow DOM
|
||||
* boundaries using CDP DOM domain commands.
|
||||
*
|
||||
* Bulk-first: we request the full document tree in one call, then
|
||||
* traverse the returned tree locally rather than making per-node
|
||||
* round-trips.
|
||||
*/
|
||||
|
||||
import type { CDPSession } from './session.js'
|
||||
|
||||
/**
|
||||
* Raw DOM node as returned by CDP DOM.getDocument with depth -1.
|
||||
*/
|
||||
export interface CDPDOMNode {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
nodeType: number
|
||||
nodeName: string
|
||||
localName?: string
|
||||
nodeValue?: string
|
||||
parentId?: number
|
||||
children?: CDPDOMNode[]
|
||||
shadowRoots?: CDPDOMNode[]
|
||||
pseudoElements?: CDPDOMNode[]
|
||||
attributes?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattened DOM record for a single node.
|
||||
*/
|
||||
export interface DOMNodeRecord {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
parentNodeId: number
|
||||
firstChildIndex: number
|
||||
childCount: number
|
||||
shadowRootKind: 'open' | 'closed' | 'none'
|
||||
tagNameStringId: number
|
||||
roleStringId: number
|
||||
ariaNameStringId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of DOM extraction.
|
||||
*/
|
||||
export interface DOMExtractionResult {
|
||||
/** Flat array of DOM nodes in document order */
|
||||
nodes: DOMNodeRecord[]
|
||||
/** String table for deduplication */
|
||||
strings: string[]
|
||||
/** Map from nodeId to index in nodes array */
|
||||
nodeIdToIndex: Map<number, number>
|
||||
/** Root node index */
|
||||
rootIndex: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the full DOM tree from the current page via CDP.
|
||||
*
|
||||
* Uses DOM.getDocument with depth -1 to fetch the entire tree
|
||||
* in a single protocol round-trip.
|
||||
*/
|
||||
export async function extractDOM(session: CDPSession): Promise<DOMExtractionResult> {
|
||||
const response = await session.send<{ root: CDPDOMNode }>('DOM.getDocument', {
|
||||
depth: -1,
|
||||
pierce: true,
|
||||
})
|
||||
|
||||
const strings: string[] = []
|
||||
const stringIndex = new Map<string, number>()
|
||||
|
||||
function getStringId(value: string): number {
|
||||
if (stringIndex.has(value)) return stringIndex.get(value)!
|
||||
const id = strings.length
|
||||
strings.push(value)
|
||||
stringIndex.set(value, id)
|
||||
return id
|
||||
}
|
||||
|
||||
const nodes: DOMNodeRecord[] = []
|
||||
const nodeIdToIndex = new Map<number, number>()
|
||||
|
||||
function walk(node: CDPDOMNode, parentNodeId: number): number {
|
||||
const nodeIndex = nodes.length
|
||||
nodeIdToIndex.set(node.nodeId, nodeIndex)
|
||||
|
||||
const children = node.children ?? []
|
||||
const shadowRoots = node.shadowRoots ?? []
|
||||
const pseudoElements = node.pseudoElements ?? []
|
||||
const allChildren = [...children, ...shadowRoots, ...pseudoElements]
|
||||
|
||||
const firstChildIndex = nodes.length + 1
|
||||
const childCount = allChildren.length
|
||||
|
||||
// Determine shadow root kind
|
||||
let shadowRootKind: 'open' | 'closed' | 'none' = 'none'
|
||||
if (shadowRoots.length > 0) {
|
||||
// CDP does not explicitly return open/closed in getDocument;
|
||||
// we default to 'open' and refine later if needed.
|
||||
shadowRootKind = 'open'
|
||||
}
|
||||
|
||||
// Extract tag name from localName or nodeName
|
||||
const tagName = node.localName || node.nodeName.toLowerCase()
|
||||
const tagNameStringId = getStringId(tagName)
|
||||
|
||||
// Extract role from attributes if present
|
||||
let roleStringId = getStringId('')
|
||||
let ariaNameStringId = getStringId('')
|
||||
if (node.attributes) {
|
||||
for (let i = 0; i < node.attributes.length; i += 2) {
|
||||
const attrName = node.attributes[i]
|
||||
const attrValue = node.attributes[i + 1]
|
||||
if (attrName === 'role') {
|
||||
roleStringId = getStringId(attrValue)
|
||||
}
|
||||
if (attrName === 'aria-label' || attrName === 'aria-labelledby') {
|
||||
ariaNameStringId = getStringId(attrValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nodes.push({
|
||||
nodeId: node.nodeId,
|
||||
backendNodeId: node.backendNodeId,
|
||||
parentNodeId,
|
||||
firstChildIndex,
|
||||
childCount,
|
||||
shadowRootKind,
|
||||
tagNameStringId,
|
||||
roleStringId,
|
||||
ariaNameStringId,
|
||||
})
|
||||
|
||||
// Walk children after pushing parent so indices are stable.
|
||||
for (const child of allChildren) {
|
||||
walk(child, node.nodeId)
|
||||
}
|
||||
|
||||
return nodeIndex
|
||||
}
|
||||
|
||||
const rootIndex = walk(response.root, 0)
|
||||
|
||||
return {
|
||||
nodes,
|
||||
strings,
|
||||
nodeIdToIndex,
|
||||
rootIndex,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolved selector match with both frontend nodeId and stable backendNodeId.
|
||||
*/
|
||||
export interface SelectorMatch {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a CSS selector to node IDs via CDP.
|
||||
*
|
||||
* Returns an array of matches with both frontend nodeId and stable
|
||||
* backendNodeId. We keep both because backendNodeId is the canonical
|
||||
* identity for geometry/topology, while frontend nodeId is required
|
||||
* by CSS.getComputedStyleForNode.
|
||||
*/
|
||||
export async function resolveSelector(
|
||||
session: CDPSession | { send: CDPSession['send']; getDocumentRootNodeId?: () => Promise<number> },
|
||||
selector: string
|
||||
): Promise<SelectorMatch[]> {
|
||||
// Use cached document root if available, otherwise fetch it.
|
||||
let rootNodeId: number
|
||||
if ('getDocumentRootNodeId' in session && session.getDocumentRootNodeId) {
|
||||
rootNodeId = await session.getDocumentRootNodeId()
|
||||
} else {
|
||||
const doc = await session.send<{ root: { nodeId: number } }>('DOM.getDocument')
|
||||
rootNodeId = doc.root.nodeId
|
||||
}
|
||||
const { nodeIds } = await session.send<{ nodeIds: number[] }>(
|
||||
'DOM.querySelectorAll',
|
||||
{
|
||||
nodeId: rootNodeId,
|
||||
selector,
|
||||
}
|
||||
)
|
||||
// Convert each frontend nodeId to its stable backendNodeId in parallel.
|
||||
const matches: SelectorMatch[] = []
|
||||
const describeResults = await Promise.all(
|
||||
nodeIds.map(async (nodeId) => {
|
||||
try {
|
||||
const result = await session.send<{ node: { backendNodeId: number } }>('DOM.describeNode', { nodeId })
|
||||
return { nodeId, backendNodeId: result.node.backendNodeId }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
})
|
||||
)
|
||||
for (const match of describeResults) {
|
||||
if (match !== null) {
|
||||
matches.push(match)
|
||||
}
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a single selector to one backend node ID, or null if none match.
|
||||
*/
|
||||
export async function resolveOneSelector(
|
||||
session: CDPSession,
|
||||
selector: string
|
||||
): Promise<number | null> {
|
||||
const matches = await resolveSelector(session, selector)
|
||||
return matches.length > 0 ? matches[0].backendNodeId : null
|
||||
}
|
||||
@@ -0,0 +1,738 @@
|
||||
/**
|
||||
* Extractor Integration Tests
|
||||
*
|
||||
* Tests CDP session setup, DOM extraction, and geometry extraction
|
||||
* using a mock CDP session to avoid requiring a real browser.
|
||||
*
|
||||
* Run with: node --test dist/extractor.test.js
|
||||
*/
|
||||
|
||||
import { describe, it } from 'node:test'
|
||||
import assert from 'node:assert'
|
||||
import {
|
||||
CDPSessionManager,
|
||||
CDPExtractor,
|
||||
extractDOM,
|
||||
extractBoxModels,
|
||||
extractFragments,
|
||||
extractTransforms,
|
||||
extractStyles,
|
||||
extractTopology,
|
||||
type CDPSession,
|
||||
type ExtractorRequest,
|
||||
} from './index.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock CDP Session Factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function createMockSession(
|
||||
responses: Map<string, unknown>,
|
||||
fallback?: (method: string, params?: Record<string, unknown>) => unknown
|
||||
): CDPSession {
|
||||
return {
|
||||
async send<T>(method: string, params?: Record<string, unknown>): Promise<T> {
|
||||
const key = `${method}:${JSON.stringify(params ?? {})}`
|
||||
let value: unknown = undefined
|
||||
if (responses.has(method)) {
|
||||
value = responses.get(method)
|
||||
} else if (responses.has(key)) {
|
||||
value = responses.get(key)
|
||||
}
|
||||
if (value instanceof Error) {
|
||||
throw value
|
||||
}
|
||||
if (value !== undefined) {
|
||||
return Promise.resolve(value as T)
|
||||
}
|
||||
if (fallback) {
|
||||
const fallbackValue = fallback(method, params)
|
||||
if (fallbackValue !== undefined) {
|
||||
return Promise.resolve(fallbackValue as T)
|
||||
}
|
||||
}
|
||||
throw new Error(`Unexpected CDP call: ${method} with ${JSON.stringify(params)}`)
|
||||
},
|
||||
on() {},
|
||||
off() {},
|
||||
async detach() {},
|
||||
}
|
||||
}
|
||||
|
||||
function createMockSessionFactory(
|
||||
responses: Map<string, unknown>,
|
||||
fallback?: (method: string, params?: Record<string, unknown>) => unknown
|
||||
): (pageRef: unknown) => Promise<CDPSession> {
|
||||
return async () => createMockSession(responses, fallback)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CDP Session Setup
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CDP Session Management', () => {
|
||||
it('should attach a session via the factory', async () => {
|
||||
const factory = createMockSessionFactory(new Map())
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
const session = await manager.attach()
|
||||
assert.ok(session, 'session should be defined')
|
||||
})
|
||||
|
||||
it('should return the same session on re-attach', async () => {
|
||||
const factory = createMockSessionFactory(new Map())
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
const s1 = await manager.attach()
|
||||
const s2 = await manager.attach()
|
||||
assert.strictEqual(s1, s2, 'should reuse attached session')
|
||||
})
|
||||
|
||||
it('should throw if getSession called before attach', () => {
|
||||
const factory = createMockSessionFactory(new Map())
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
assert.throws(() => manager.getSession(), /not attached/)
|
||||
})
|
||||
|
||||
it('should detach cleanly', async () => {
|
||||
const factory = createMockSessionFactory(new Map())
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
await manager.attach()
|
||||
await manager.detach()
|
||||
assert.throws(() => manager.getSession(), /not attached/)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DOM Extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('DOM Extraction', () => {
|
||||
it('should extract a flat DOM tree from CDP getDocument', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
responses.set('DOM.getDocument', {
|
||||
root: {
|
||||
nodeId: 1,
|
||||
backendNodeId: 101,
|
||||
nodeType: 9,
|
||||
nodeName: '#document',
|
||||
children: [
|
||||
{
|
||||
nodeId: 2,
|
||||
backendNodeId: 102,
|
||||
nodeType: 1,
|
||||
nodeName: 'HTML',
|
||||
localName: 'html',
|
||||
children: [
|
||||
{
|
||||
nodeId: 3,
|
||||
backendNodeId: 103,
|
||||
nodeType: 1,
|
||||
nodeName: 'BODY',
|
||||
localName: 'body',
|
||||
children: [
|
||||
{
|
||||
nodeId: 4,
|
||||
backendNodeId: 104,
|
||||
nodeType: 1,
|
||||
nodeName: 'DIV',
|
||||
localName: 'div',
|
||||
attributes: ['class', 'container', 'role', 'main'],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const result = await extractDOM(session)
|
||||
|
||||
assert.strictEqual(result.nodes.length, 4, 'should have 4 nodes')
|
||||
assert.strictEqual(result.rootIndex, 0, 'root should be index 0')
|
||||
assert.ok(result.nodeIdToIndex.has(4), 'should map nodeId 4')
|
||||
|
||||
// Check string deduplication
|
||||
assert.ok(result.strings.includes('div'), 'should include div tag')
|
||||
assert.ok(result.strings.includes('main'), 'should include role main')
|
||||
|
||||
// Check node 4 record
|
||||
const node4Idx = result.nodeIdToIndex.get(4)!
|
||||
const node4 = result.nodes[node4Idx]
|
||||
assert.strictEqual(node4.nodeId, 4)
|
||||
assert.strictEqual(node4.parentNodeId, 3)
|
||||
assert.strictEqual(node4.shadowRootKind, 'none')
|
||||
})
|
||||
|
||||
it('should handle shadow roots', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
responses.set('DOM.getDocument', {
|
||||
root: {
|
||||
nodeId: 1,
|
||||
backendNodeId: 101,
|
||||
nodeType: 9,
|
||||
nodeName: '#document',
|
||||
children: [
|
||||
{
|
||||
nodeId: 2,
|
||||
backendNodeId: 102,
|
||||
nodeType: 1,
|
||||
nodeName: 'DIV',
|
||||
localName: 'div',
|
||||
shadowRoots: [
|
||||
{
|
||||
nodeId: 3,
|
||||
backendNodeId: 103,
|
||||
nodeType: 11,
|
||||
nodeName: '#document-fragment',
|
||||
children: [
|
||||
{
|
||||
nodeId: 4,
|
||||
backendNodeId: 104,
|
||||
nodeType: 1,
|
||||
nodeName: 'SPAN',
|
||||
localName: 'span',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const result = await extractDOM(session)
|
||||
|
||||
assert.strictEqual(result.nodes.length, 4, 'should have 4 nodes including shadow root and its child')
|
||||
// shadowRootKind is recorded on the host element (node 2), not the fragment
|
||||
const hostIdx = result.nodeIdToIndex.get(2)!
|
||||
assert.strictEqual(result.nodes[hostIdx].shadowRootKind, 'open')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Geometry Extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Geometry Extraction', () => {
|
||||
it('should extract box models in bulk', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
|
||||
// Box model responses keyed by backendNodeId.
|
||||
// DOM.getBoxModel now accepts backendNodeId directly and returns { model: CDPBoxModel }.
|
||||
responses.set(
|
||||
'DOM.getBoxModel:{"backendNodeId":10}',
|
||||
{
|
||||
model: {
|
||||
content: [100, 100, 200, 100, 200, 200, 100, 200],
|
||||
padding: [90, 90, 210, 90, 210, 210, 90, 210],
|
||||
border: [80, 80, 220, 80, 220, 220, 80, 220],
|
||||
margin: [70, 70, 230, 70, 230, 230, 70, 230],
|
||||
width: 100,
|
||||
height: 100,
|
||||
},
|
||||
}
|
||||
)
|
||||
responses.set(
|
||||
'DOM.getBoxModel:{"backendNodeId":20}',
|
||||
{
|
||||
model: {
|
||||
content: [300, 300, 400, 300, 400, 400, 300, 400],
|
||||
padding: [290, 290, 410, 290, 410, 410, 290, 410],
|
||||
border: [280, 280, 420, 280, 420, 420, 280, 420],
|
||||
margin: [270, 270, 430, 270, 430, 430, 270, 430],
|
||||
width: 100,
|
||||
height: 100,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const backendNodeIds = [10, 20]
|
||||
const subjectIds = [0, 1]
|
||||
|
||||
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
|
||||
|
||||
assert.strictEqual(boxes.length, 2, 'should extract 2 boxes')
|
||||
assert.strictEqual(errors.length, 0, 'should have no errors')
|
||||
|
||||
const box0 = boxes[0]
|
||||
assert.strictEqual(box0.borderLeft, 80)
|
||||
assert.strictEqual(box0.borderTop, 80)
|
||||
assert.strictEqual(box0.borderRight, 220)
|
||||
assert.strictEqual(box0.borderBottom, 220)
|
||||
assert.strictEqual(box0.contentLeft, 100)
|
||||
assert.strictEqual(box0.contentTop, 100)
|
||||
assert.strictEqual(box0.contentRight, 200)
|
||||
assert.strictEqual(box0.contentBottom, 200)
|
||||
})
|
||||
|
||||
it('should handle box model extraction failures gracefully', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
// DOM.getBoxModel now uses backendNodeId parameter.
|
||||
responses.set('DOM.getBoxModel:{"backendNodeId":10}', new Error('Node not found'))
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const backendNodeIds = [10]
|
||||
const subjectIds = [0]
|
||||
|
||||
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
|
||||
assert.strictEqual(boxes.length, 0)
|
||||
assert.strictEqual(errors.length, 1)
|
||||
assert.ok(errors[0].reason.includes('Node not found'))
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Style Extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Style Extraction', () => {
|
||||
it('should extract computed styles and map to enums', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
|
||||
// extractStyles now resolves backendNodeId -> objectId via DOM.resolveNode
|
||||
// then uses Runtime.callFunctionOn to read computed styles via JS.
|
||||
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
|
||||
|
||||
responses.set('Runtime.callFunctionOn', {
|
||||
result: {
|
||||
value: {
|
||||
display: 'flex',
|
||||
position: 'relative',
|
||||
zIndex: '10',
|
||||
overflowX: 'hidden',
|
||||
overflowY: 'auto',
|
||||
opacity: '0.5',
|
||||
visibility: 'visible',
|
||||
contain: 'layout paint',
|
||||
pointerEvents: 'none',
|
||||
lineHeight: '24px',
|
||||
fontFamily: 'Arial',
|
||||
fontSize: '16px',
|
||||
fontWeight: '700',
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const backendNodeIds = [110]
|
||||
const subjectIds = [0]
|
||||
|
||||
const { styles, errors } = await extractStyles(session, backendNodeIds, subjectIds, { all: true }, [])
|
||||
|
||||
assert.strictEqual(styles.length, 1)
|
||||
assert.strictEqual(errors.length, 0)
|
||||
|
||||
const s = styles[0]
|
||||
assert.strictEqual(s.display, 5) // flex
|
||||
assert.strictEqual(s.position, 2) // relative
|
||||
assert.strictEqual(s.zIndexKind, 1) // integer
|
||||
assert.strictEqual(s.zIndexValue, 10)
|
||||
assert.strictEqual(s.overflowX, 2) // hidden
|
||||
assert.strictEqual(s.overflowY, 5) // auto
|
||||
assert.strictEqual(s.opacity, 0.5)
|
||||
assert.strictEqual(s.containFlags, 3) // layout(1) | paint(2)
|
||||
assert.strictEqual(s.pointerEvents, 2) // none
|
||||
assert.strictEqual(s.fontSize, 16)
|
||||
assert.strictEqual(s.fontWeight, 700)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Topology Extraction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Topology Extraction', () => {
|
||||
it('should extract topology via Runtime.evaluate', async () => {
|
||||
const mockTopologyResult = {
|
||||
scroll: [
|
||||
{
|
||||
containerId: 1,
|
||||
scrollLeft: 0,
|
||||
scrollTop: 100,
|
||||
scrollWidth: 500,
|
||||
scrollHeight: 1000,
|
||||
clientWidth: 400,
|
||||
clientHeight: 300,
|
||||
},
|
||||
],
|
||||
clipping: [
|
||||
{
|
||||
clipNodeId: 0,
|
||||
subjectId: 1,
|
||||
clipKind: 1,
|
||||
clipLeft: 0,
|
||||
clipTop: 0,
|
||||
clipRight: 400,
|
||||
clipBottom: 300,
|
||||
parentClipNodeId: 0,
|
||||
},
|
||||
],
|
||||
topology: {
|
||||
containingBlockOf: [0],
|
||||
nearestPositionedAncestorOf: [0],
|
||||
scrollContainerOf: [0],
|
||||
stackingContextOf: [0],
|
||||
formattingContextOf: [0],
|
||||
clippingRootOf: [0],
|
||||
paintOrderBucket: [1],
|
||||
paintOrderIndex: [0],
|
||||
},
|
||||
}
|
||||
|
||||
const responses = new Map<string, unknown>()
|
||||
responses.set('Runtime.evaluate', {
|
||||
result: { value: mockTopologyResult },
|
||||
})
|
||||
|
||||
const session = createMockSession(responses)
|
||||
const { result, errors } = await extractTopology(session, [10], [0])
|
||||
|
||||
assert.strictEqual(errors.length, 0)
|
||||
assert.strictEqual(result.scroll.length, 1)
|
||||
assert.strictEqual(result.scroll[0].scrollTop, 100)
|
||||
assert.strictEqual(result.clipping.length, 1)
|
||||
assert.strictEqual(result.topology.paintOrderBucket[0], 1)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Full Orchestrator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CDPExtractor', () => {
|
||||
it('should perform full extraction with partial handling', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
|
||||
// Domain enablement is now part of the attach flow.
|
||||
responses.set('DOM.enable', {})
|
||||
responses.set('CSS.enable', {})
|
||||
responses.set('Runtime.enable', {})
|
||||
|
||||
// DOM.getDocument
|
||||
responses.set('DOM.getDocument', {
|
||||
root: {
|
||||
nodeId: 1,
|
||||
backendNodeId: 101,
|
||||
nodeType: 9,
|
||||
nodeName: '#document',
|
||||
children: [
|
||||
{
|
||||
nodeId: 2,
|
||||
backendNodeId: 102,
|
||||
nodeType: 1,
|
||||
nodeName: 'BODY',
|
||||
localName: 'body',
|
||||
children: [
|
||||
{
|
||||
nodeId: 10,
|
||||
backendNodeId: 110,
|
||||
nodeType: 1,
|
||||
nodeName: 'DIV',
|
||||
localName: 'div',
|
||||
attributes: ['class', 'test'],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
// Selector resolution
|
||||
responses.set('DOM.querySelectorAll', { nodeIds: [10] })
|
||||
responses.set('DOM.describeNode', { node: { backendNodeId: 110 } })
|
||||
|
||||
// Box model (uses backendNodeId, returns { model: {...} })
|
||||
responses.set(
|
||||
'DOM.getBoxModel:{"backendNodeId":110}',
|
||||
{
|
||||
model: {
|
||||
content: [10, 10, 110, 10, 110, 110, 10, 110],
|
||||
padding: [0, 0, 120, 0, 120, 120, 0, 120],
|
||||
border: [0, 0, 120, 0, 120, 120, 0, 120],
|
||||
margin: [0, 0, 120, 0, 120, 120, 0, 120],
|
||||
width: 100,
|
||||
height: 100,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// Styles (uses DOM.resolveNode + Runtime.callFunctionOn)
|
||||
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
|
||||
responses.set('Runtime.callFunctionOn', {
|
||||
result: {
|
||||
value: {
|
||||
display: 'block',
|
||||
position: 'static',
|
||||
zIndex: 'auto',
|
||||
overflowX: 'visible',
|
||||
overflowY: 'visible',
|
||||
opacity: '1',
|
||||
visibility: 'visible',
|
||||
contain: 'none',
|
||||
pointerEvents: 'auto',
|
||||
lineHeight: 'normal',
|
||||
fontFamily: 'sans-serif',
|
||||
fontSize: '16px',
|
||||
fontWeight: '400',
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// Topology via Runtime.evaluate
|
||||
responses.set('Runtime.evaluate', {
|
||||
result: {
|
||||
value: {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [0],
|
||||
nearestPositionedAncestorOf: [0],
|
||||
scrollContainerOf: [0],
|
||||
stackingContextOf: [0],
|
||||
formattingContextOf: [0],
|
||||
clippingRootOf: [0],
|
||||
paintOrderBucket: [1],
|
||||
paintOrderIndex: [0],
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const factory = createMockSessionFactory(responses)
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
const extractor = new CDPExtractor(manager)
|
||||
|
||||
const request: ExtractorRequest = {
|
||||
requestId: 'test-1',
|
||||
sceneTarget: { pageRef: {}, url: 'https://example.test' },
|
||||
env: {
|
||||
viewportWidth: 1280,
|
||||
viewportHeight: 720,
|
||||
colorScheme: 'light',
|
||||
pointer: 'fine',
|
||||
},
|
||||
subjects: [{ id: 's1', selector: '.test' }],
|
||||
requiredFacts: {
|
||||
geometry: true,
|
||||
fragments: false,
|
||||
styles: true,
|
||||
topology: true,
|
||||
},
|
||||
}
|
||||
|
||||
const response = await extractor.extract(request)
|
||||
|
||||
assert.strictEqual(response.requestId, 'test-1')
|
||||
assert.strictEqual(response.status, 'ok')
|
||||
assert.strictEqual(response.diagnostics.length, 0)
|
||||
assert.ok(response.snapshots, 'snapshots array should exist')
|
||||
assert.strictEqual(response.snapshots.length, 1)
|
||||
assert.ok(response.snapshot, 'deprecated snapshot field still present')
|
||||
assert.strictEqual(response.snapshots[0].subjects.ids.length, 1)
|
||||
assert.strictEqual(response.snapshots[0].boxes.boxId.length, 1)
|
||||
assert.strictEqual(response.snapshots[0].styles.display[0], 2) // block
|
||||
assert.strictEqual(response.snapshots[0].provenance.length, 5) // 5 extraction steps
|
||||
})
|
||||
|
||||
it('should return partial status when selectors do not match', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
|
||||
// Domain enablement is now part of the attach flow.
|
||||
responses.set('DOM.enable', {})
|
||||
responses.set('CSS.enable', {})
|
||||
responses.set('Runtime.enable', {})
|
||||
|
||||
responses.set('DOM.getDocument', {
|
||||
root: {
|
||||
nodeId: 1,
|
||||
backendNodeId: 101,
|
||||
nodeType: 9,
|
||||
nodeName: '#document',
|
||||
children: [],
|
||||
},
|
||||
})
|
||||
responses.set('DOM.querySelectorAll', { nodeIds: [] })
|
||||
|
||||
const factory = createMockSessionFactory(responses)
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
const extractor = new CDPExtractor(manager)
|
||||
|
||||
const request: ExtractorRequest = {
|
||||
requestId: 'test-2',
|
||||
sceneTarget: { pageRef: {}, url: 'https://example.test' },
|
||||
env: {
|
||||
viewportWidth: 1280,
|
||||
viewportHeight: 720,
|
||||
colorScheme: 'light',
|
||||
pointer: 'fine',
|
||||
},
|
||||
subjects: [{ id: 's1', selector: '.missing' }],
|
||||
requiredFacts: { geometry: true },
|
||||
}
|
||||
|
||||
const response = await extractor.extract(request)
|
||||
|
||||
assert.strictEqual(response.status, 'partial')
|
||||
assert.ok(
|
||||
response.diagnostics.some((d) => d.code === 'IMH_SELECTOR_NO_MATCH'),
|
||||
'should emit selector no match diagnostic'
|
||||
)
|
||||
})
|
||||
|
||||
it('should resolve multiple matches per selector and preserve identity', async () => {
|
||||
const responses = new Map<string, unknown>()
|
||||
|
||||
// Domain enablement is now part of the attach flow.
|
||||
responses.set('DOM.enable', {})
|
||||
responses.set('CSS.enable', {})
|
||||
responses.set('Runtime.enable', {})
|
||||
|
||||
responses.set('DOM.getDocument', {
|
||||
root: {
|
||||
nodeId: 1,
|
||||
backendNodeId: 101,
|
||||
nodeType: 9,
|
||||
nodeName: '#document',
|
||||
children: [
|
||||
{
|
||||
nodeId: 2,
|
||||
backendNodeId: 102,
|
||||
nodeType: 1,
|
||||
nodeName: 'DIV',
|
||||
localName: 'div',
|
||||
children: [
|
||||
{
|
||||
nodeId: 10,
|
||||
backendNodeId: 110,
|
||||
nodeType: 1,
|
||||
nodeName: 'BUTTON',
|
||||
localName: 'button',
|
||||
attributes: ['class', 'btn'],
|
||||
},
|
||||
{
|
||||
nodeId: 11,
|
||||
backendNodeId: 111,
|
||||
nodeType: 1,
|
||||
nodeName: 'BUTTON',
|
||||
localName: 'button',
|
||||
attributes: ['class', 'btn'],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
// querySelectorAll returns frontend nodeIds for the selector.
|
||||
responses.set('DOM.querySelectorAll', { nodeIds: [10, 11] })
|
||||
|
||||
// DOM.describeNode converts each frontend nodeId to its stable backendNodeId.
|
||||
// We mock the keyed response for each nodeId.
|
||||
responses.set('DOM.describeNode:{"nodeId":10}', { node: { backendNodeId: 110 } })
|
||||
responses.set('DOM.describeNode:{"nodeId":11}', { node: { backendNodeId: 111 } })
|
||||
|
||||
// Box models for each backendNodeId.
|
||||
responses.set(
|
||||
'DOM.getBoxModel:{"backendNodeId":110}',
|
||||
{
|
||||
model: {
|
||||
content: [0, 0, 100, 0, 100, 40, 0, 40],
|
||||
padding: [0, 0, 100, 0, 100, 40, 0, 40],
|
||||
border: [0, 0, 100, 0, 100, 40, 0, 40],
|
||||
margin: [0, 0, 100, 0, 100, 40, 0, 40],
|
||||
width: 100,
|
||||
height: 40,
|
||||
},
|
||||
}
|
||||
)
|
||||
responses.set(
|
||||
'DOM.getBoxModel:{"backendNodeId":111}',
|
||||
{
|
||||
model: {
|
||||
content: [110, 0, 210, 0, 210, 40, 110, 40],
|
||||
padding: [110, 0, 210, 0, 210, 40, 110, 40],
|
||||
border: [110, 0, 210, 0, 210, 40, 110, 40],
|
||||
margin: [110, 0, 210, 0, 210, 40, 110, 40],
|
||||
width: 100,
|
||||
height: 40,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// Transforms use DOM.resolveNode + Runtime.callFunctionOn
|
||||
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
|
||||
responses.set('Runtime.callFunctionOn', {
|
||||
result: {
|
||||
value: {
|
||||
matrix: [1, 0, 0, 1, 0, 0],
|
||||
originX: 0,
|
||||
originY: 0,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const factory = createMockSessionFactory(responses)
|
||||
const manager = new CDPSessionManager(factory, {})
|
||||
const extractor = new CDPExtractor(manager)
|
||||
|
||||
const request: ExtractorRequest = {
|
||||
requestId: 'test-multi',
|
||||
sceneTarget: { pageRef: {}, url: 'https://example.test' },
|
||||
env: {
|
||||
viewportWidth: 1280,
|
||||
viewportHeight: 720,
|
||||
colorScheme: 'light',
|
||||
pointer: 'fine',
|
||||
},
|
||||
subjects: [{ id: 's1', selector: '.btn' }],
|
||||
requiredFacts: { geometry: true, styles: false, topology: false },
|
||||
}
|
||||
|
||||
const response = await extractor.extract(request)
|
||||
|
||||
assert.strictEqual(response.status, 'ok')
|
||||
assert.strictEqual(response.diagnostics.length, 0)
|
||||
assert.strictEqual(response.snapshots[0].subjects.ids.length, 2)
|
||||
assert.strictEqual(response.snapshots[0].boxes.boxId.length, 2)
|
||||
|
||||
// Verify backendNodeIds are preserved in the subjects table.
|
||||
const backendIds = response.snapshots[0].subjects.domNodeId
|
||||
assert.strictEqual(backendIds.length, 2)
|
||||
assert.ok(backendIds.includes(110), 'should include backendNodeId 110')
|
||||
assert.ok(backendIds.includes(111), 'should include backendNodeId 111')
|
||||
})
|
||||
|
||||
it('should return error status when session attach fails', async () => {
|
||||
const failingFactory = async () => {
|
||||
throw new Error('Browser not reachable')
|
||||
}
|
||||
|
||||
const manager = new CDPSessionManager(failingFactory, {})
|
||||
const extractor = new CDPExtractor(manager)
|
||||
|
||||
const request: ExtractorRequest = {
|
||||
requestId: 'test-3',
|
||||
sceneTarget: { pageRef: {}, url: 'https://example.test' },
|
||||
env: {
|
||||
viewportWidth: 1280,
|
||||
viewportHeight: 720,
|
||||
colorScheme: 'light',
|
||||
pointer: 'fine',
|
||||
},
|
||||
subjects: [],
|
||||
requiredFacts: {},
|
||||
}
|
||||
|
||||
const response = await extractor.extract(request)
|
||||
|
||||
assert.strictEqual(response.status, 'error')
|
||||
assert.ok(
|
||||
response.diagnostics.some((d) => d.code === 'IMH_CDP_SESSION_ATTACH_FAILED'),
|
||||
'should emit attach failed diagnostic'
|
||||
)
|
||||
assert.strictEqual(response.snapshots[0].subjects.ids.length, 0)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,973 @@
|
||||
/**
|
||||
* Main Extraction Orchestrator
|
||||
*
|
||||
* Consumes an extractor request plan, coordinates CDP session
|
||||
* management, DOM resolution, geometry capture, style extraction,
|
||||
* and topology building.
|
||||
*
|
||||
* Returns raw browser facts matching the geometry world schema,
|
||||
* with provenance metadata and graceful partial handling.
|
||||
*/
|
||||
|
||||
import type { DiagnosticCode } from 'imhotep-core'
|
||||
import type { CDPSession, CDPSessionManager } from './session.js'
|
||||
import { extractDOM, resolveSelector, type DOMExtractionResult, type SelectorMatch } from './dom.js'
|
||||
import {
|
||||
extractBoxModels,
|
||||
extractFragments,
|
||||
extractTransforms,
|
||||
extractVisualBoxes,
|
||||
type GeometryExtractionResult,
|
||||
type BoxRecord,
|
||||
type FragmentRecord,
|
||||
type TransformRecord,
|
||||
} from './geometry.js'
|
||||
import { extractStyles, type StyleRecord, type StyleExtractionConfig } from './styles.js'
|
||||
import { extractTopology, type TopologyExtractionResult } from './topology.js'
|
||||
|
||||
/**
|
||||
* Extractor request plan.
|
||||
*/
|
||||
export interface ExtractorRequest {
|
||||
requestId: string
|
||||
sceneTarget: {
|
||||
pageRef: unknown
|
||||
url: string
|
||||
}
|
||||
env: {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
}
|
||||
subjects: Array<{ id: string; selector: string; nodeId?: number; backendNodeId?: number }>
|
||||
requiredFacts: {
|
||||
geometry?: boolean
|
||||
fragments?: boolean
|
||||
styles?: boolean | string[]
|
||||
topology?: boolean | string[]
|
||||
text?: boolean
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provenance entry for a fact.
|
||||
*/
|
||||
export interface ProvenanceEntry {
|
||||
factId: number
|
||||
extractionStepId: number
|
||||
sourceKind: number
|
||||
sourceRef: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Confidence entry for a fact.
|
||||
*/
|
||||
export interface ConfidenceEntry {
|
||||
factId: number
|
||||
confidence: number
|
||||
reasonCode: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Extraction trace entry.
|
||||
*/
|
||||
export interface ExtractionTraceEntry {
|
||||
stepId: number
|
||||
factKind: string
|
||||
status: 'ok' | 'partial' | 'error'
|
||||
}
|
||||
|
||||
/**
|
||||
* Extractor response.
|
||||
*
|
||||
* Canonical shape uses snapshots (array). The singular snapshot field is
|
||||
* kept for backward compatibility but deprecated — use snapshots[0].
|
||||
*/
|
||||
export interface ExtractorResponse {
|
||||
requestId: string
|
||||
status: 'ok' | 'partial' | 'error'
|
||||
/** @deprecated Use snapshots[0] instead. Kept for backward compatibility. */
|
||||
snapshot: GeometryWorldSnapshot
|
||||
/** Canonical shape: array of snapshots for multi-state extraction. */
|
||||
snapshots: GeometryWorldSnapshot[]
|
||||
diagnostics: ExtractorDiagnostic[]
|
||||
extractionTrace: {
|
||||
steps: ExtractionTraceEntry[]
|
||||
timings: Array<{ stepId: number; startMs: number; endMs: number }>
|
||||
protocolCalls: Array<{ stepId: number; protocol: string; method: string; params: Record<string, unknown> }>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic for extraction failures.
|
||||
*/
|
||||
export interface ExtractorDiagnostic {
|
||||
code: DiagnosticCode
|
||||
severity: 'warning' | 'error'
|
||||
message: string
|
||||
subjectId?: string
|
||||
selector?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Geometry world snapshot.
|
||||
* Mirrors the geometry world schema from the core contracts.
|
||||
*/
|
||||
export interface GeometryWorldSnapshot {
|
||||
sceneId: string
|
||||
snapshotId: string
|
||||
env: {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
deviceScaleFactor: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
hover: boolean
|
||||
reducedMotion: string
|
||||
locale: string
|
||||
writingMode: string
|
||||
}
|
||||
source: {
|
||||
url: string
|
||||
browserName: string
|
||||
browserVersion: string
|
||||
engine: string
|
||||
extractedAt: string
|
||||
}
|
||||
strings: string[]
|
||||
subjects: {
|
||||
ids: number[]
|
||||
domNodeId: number[]
|
||||
subjectKind: number[]
|
||||
primaryBoxId: number[]
|
||||
firstFragmentId: number[]
|
||||
fragmentCount: number[]
|
||||
firstTextRunId: number[]
|
||||
textRunCount: number[]
|
||||
}
|
||||
dom: DOMExtractionResult
|
||||
frames: {
|
||||
frameId: number[]
|
||||
frameKind: number[]
|
||||
ownerSubjectId: number[]
|
||||
parentFrameId: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
axisMatrixStart: number[]
|
||||
clipRectId: number[]
|
||||
scrollContainerId: number[]
|
||||
writingMode: number[]
|
||||
}
|
||||
matrices: {
|
||||
values: number[]
|
||||
}
|
||||
rects: {
|
||||
rectId: number[]
|
||||
left: number[]
|
||||
top: number[]
|
||||
right: number[]
|
||||
bottom: number[]
|
||||
}
|
||||
boxes: {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
visualBoxes: {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
fragments: {
|
||||
fragmentId: number[]
|
||||
subjectId: number[]
|
||||
fragmentKind: number[]
|
||||
boxLeft: number[]
|
||||
boxTop: number[]
|
||||
boxRight: number[]
|
||||
boxBottom: number[]
|
||||
lineIndex: number[]
|
||||
flowIndex: number[]
|
||||
parentFragmentId: number[]
|
||||
}
|
||||
transforms: {
|
||||
transformId: number[]
|
||||
subjectId: number[]
|
||||
matrixStart: number[]
|
||||
matrixLength: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
}
|
||||
styles: {
|
||||
subjectId: number[]
|
||||
display: number[]
|
||||
position: number[]
|
||||
zIndexKind: number[]
|
||||
zIndexValue: number[]
|
||||
overflowX: number[]
|
||||
overflowY: number[]
|
||||
opacity: number[]
|
||||
visibility: number[]
|
||||
containFlags: number[]
|
||||
pointerEvents: number[]
|
||||
lineHeight: number[]
|
||||
fontFamilyStringId: number[]
|
||||
fontSize: number[]
|
||||
fontWeight: number[]
|
||||
}
|
||||
topology: TopologyExtractionResult['topology']
|
||||
scroll: TopologyExtractionResult['scroll']
|
||||
clipping: TopologyExtractionResult['clipping']
|
||||
provenance: ProvenanceEntry[]
|
||||
confidence: ConfidenceEntry[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Main extractor class.
|
||||
*
|
||||
* Orchestrates the full extraction pipeline:
|
||||
* 1. Attach CDP session
|
||||
* 2. Resolve selectors to node IDs
|
||||
* 3. Extract DOM tree
|
||||
* 4. Extract geometry (boxes, fragments, transforms)
|
||||
* 5. Extract styles
|
||||
* 6. Extract topology
|
||||
* 7. Build geometry world snapshot
|
||||
* 8. Return response with provenance and diagnostics
|
||||
*/
|
||||
export class CDPExtractor {
|
||||
private readonly sessionManager: CDPSessionManager
|
||||
|
||||
constructor(sessionManager: CDPSessionManager) {
|
||||
this.sessionManager = sessionManager
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute extraction according to the request plan.
|
||||
*/
|
||||
async extract(request: ExtractorRequest): Promise<ExtractorResponse> {
|
||||
const startTime = Date.now()
|
||||
const diagnostics: ExtractorDiagnostic[] = []
|
||||
const traceSteps: ExtractionTraceEntry[] = []
|
||||
const traceTimings: Array<{ stepId: number; startMs: number; endMs: number }> = []
|
||||
const traceCalls: Array<{
|
||||
stepId: number
|
||||
protocol: string
|
||||
method: string
|
||||
params: Record<string, unknown>
|
||||
}> = []
|
||||
|
||||
function recordStep(
|
||||
stepId: number,
|
||||
factKind: string,
|
||||
status: 'ok' | 'partial' | 'error',
|
||||
startMs: number,
|
||||
endMs: number
|
||||
) {
|
||||
traceSteps.push({ stepId, factKind, status })
|
||||
traceTimings.push({ stepId, startMs, endMs })
|
||||
}
|
||||
|
||||
let session: CDPSession
|
||||
try {
|
||||
session = await this.sessionManager.attach()
|
||||
// Enable required CDP domains before extraction.
|
||||
// Use cached enablement to avoid redundant round-trips.
|
||||
await this.sessionManager.enableDomain('DOM')
|
||||
await this.sessionManager.enableDomain('CSS')
|
||||
await this.sessionManager.enableDomain('Runtime')
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_CDP_SESSION_ATTACH_FAILED',
|
||||
severity: 'error',
|
||||
message: `Failed to attach CDP session: ${message}`,
|
||||
})
|
||||
const emptySnapshot = createEmptySnapshot(request)
|
||||
return {
|
||||
requestId: request.requestId,
|
||||
status: 'error',
|
||||
snapshot: emptySnapshot,
|
||||
snapshots: [emptySnapshot],
|
||||
diagnostics,
|
||||
extractionTrace: {
|
||||
steps: traceSteps,
|
||||
timings: traceTimings,
|
||||
protocolCalls: traceCalls,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 1: Extract DOM ---
|
||||
// Extract DOM first so the full tree is pushed to the frontend.
|
||||
// This ensures backendNodeIds returned by querySelectorAll remain valid.
|
||||
const domStepStart = Date.now()
|
||||
let domResult: DOMExtractionResult
|
||||
try {
|
||||
domResult = await extractDOM(session)
|
||||
recordStep(1, 'dom-tree', 'ok', domStepStart, Date.now())
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_DOM_EXTRACTION_FAILED',
|
||||
severity: 'error',
|
||||
message: `DOM extraction failed: ${message}`,
|
||||
})
|
||||
domResult = { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 }
|
||||
recordStep(1, 'dom-tree', 'error', domStepStart, Date.now())
|
||||
}
|
||||
|
||||
// --- Step 2: Resolve selectors ---
|
||||
const selectorStepStart = Date.now()
|
||||
const backendNodeIds: number[] = []
|
||||
const nodeIds: number[] = []
|
||||
const subjectIds: number[] = []
|
||||
const selectorDiagnosticsStart = diagnostics.length
|
||||
const resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }> = []
|
||||
|
||||
// Resolve all selectors sequentially to avoid CDP race conditions
|
||||
// when multiple querySelectorAll calls run concurrently.
|
||||
// Callers that already resolved selectors can pass nodeId/backendNodeId
|
||||
// to avoid a duplicate querySelectorAll + describeNode pass.
|
||||
const selectorResults: Array<{ subject: typeof request.subjects[0]; matches: Awaited<ReturnType<typeof resolveSelector>>; error: string | null }> = []
|
||||
for (const subject of request.subjects) {
|
||||
if (subject.nodeId !== undefined && subject.backendNodeId !== undefined) {
|
||||
selectorResults.push({
|
||||
subject,
|
||||
matches: [{ nodeId: subject.nodeId, backendNodeId: subject.backendNodeId }],
|
||||
error: null,
|
||||
})
|
||||
continue
|
||||
}
|
||||
try {
|
||||
const matches = await resolveSelector(session, subject.selector)
|
||||
selectorResults.push({ subject, matches, error: null })
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
selectorResults.push({ subject, matches: [], error: message })
|
||||
}
|
||||
}
|
||||
|
||||
for (const { subject, matches, error } of selectorResults) {
|
||||
if (error) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_SELECTOR_RESOLUTION_FAILED',
|
||||
severity: 'error',
|
||||
message: `Failed to resolve selector "${subject.selector}": ${error}`,
|
||||
subjectId: subject.id,
|
||||
selector: subject.selector,
|
||||
})
|
||||
} else if (matches.length === 0) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_SELECTOR_NO_MATCH',
|
||||
severity: 'warning',
|
||||
message: `Selector "${subject.selector}" matched 0 elements.`,
|
||||
subjectId: subject.id,
|
||||
selector: subject.selector,
|
||||
})
|
||||
} else {
|
||||
for (const match of matches) {
|
||||
backendNodeIds.push(match.backendNodeId)
|
||||
nodeIds.push(match.nodeId)
|
||||
subjectIds.push(resolvedSubjects.length)
|
||||
resolvedSubjects.push({ ...subject, backendNodeId: match.backendNodeId, nodeId: match.nodeId })
|
||||
}
|
||||
}
|
||||
}
|
||||
const selectorDiagnosticsAdded = diagnostics.length > selectorDiagnosticsStart
|
||||
recordStep(2, 'selector-resolution', selectorDiagnosticsAdded ? 'partial' : 'ok', selectorStepStart, Date.now())
|
||||
|
||||
// --- Step 3: Extract Geometry ---
|
||||
const geometryStepStart = Date.now()
|
||||
let boxRecords: BoxRecord[] = []
|
||||
let fragmentRecords: FragmentRecord[] = []
|
||||
let transformRecords: TransformRecord[] = []
|
||||
let matrixValues: number[] = []
|
||||
|
||||
let visualBoxRecords: BoxRecord[] = []
|
||||
|
||||
if (request.requiredFacts.geometry !== false && backendNodeIds.length > 0) {
|
||||
try {
|
||||
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
|
||||
boxRecords = boxes
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_BOX_MODEL_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Box model extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_BOX_MODEL_FAILED',
|
||||
severity: 'error',
|
||||
message: `Box model extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
|
||||
// Extract visual boxes (post-transform coordinates via getBoundingClientRect)
|
||||
try {
|
||||
const { boxes, errors } = await extractVisualBoxes(session, backendNodeIds, subjectIds)
|
||||
visualBoxRecords = boxes
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_VISUAL_BOX_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Visual box extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_VISUAL_BOX_FAILED',
|
||||
severity: 'error',
|
||||
message: `Visual box extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
|
||||
if (request.requiredFacts.fragments) {
|
||||
try {
|
||||
const { fragments, errors } = await extractFragments(session, backendNodeIds, subjectIds)
|
||||
fragmentRecords = fragments
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_FRAGMENT_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Fragment extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_FRAGMENT_FAILED',
|
||||
severity: 'error',
|
||||
message: `Fragment extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const { transforms, matrices, errors } = await extractTransforms(session, backendNodeIds, subjectIds)
|
||||
transformRecords = transforms
|
||||
matrixValues = matrices
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_TRANSFORM_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Transform extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_TRANSFORM_FAILED',
|
||||
severity: 'error',
|
||||
message: `Transform extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(
|
||||
3,
|
||||
'geometry',
|
||||
diagnostics.some((d) => d.code.startsWith('IMH_BOX_MODEL') || d.code.startsWith('IMH_FRAGMENT') || d.code.startsWith('IMH_TRANSFORM'))
|
||||
? 'partial'
|
||||
: 'ok',
|
||||
geometryStepStart,
|
||||
Date.now()
|
||||
)
|
||||
|
||||
// --- Step 4: Extract Styles ---
|
||||
const styleStepStart = Date.now()
|
||||
let styleRecords: StyleRecord[] = []
|
||||
|
||||
if (request.requiredFacts.styles !== false && backendNodeIds.length > 0) {
|
||||
const styleConfig: StyleExtractionConfig =
|
||||
Array.isArray(request.requiredFacts.styles)
|
||||
? { all: false, properties: request.requiredFacts.styles }
|
||||
: { all: true }
|
||||
|
||||
try {
|
||||
const { styles, errors, strings: updatedStrings } = await extractStyles(
|
||||
session,
|
||||
backendNodeIds,
|
||||
subjectIds,
|
||||
styleConfig,
|
||||
domResult.strings
|
||||
)
|
||||
styleRecords = styles
|
||||
domResult.strings = updatedStrings
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_STYLE_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Style extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_STYLE_FAILED',
|
||||
severity: 'error',
|
||||
message: `Style extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(4, 'styles', diagnostics.some((d) => d.code.startsWith('IMH_STYLE')) ? 'partial' : 'ok', styleStepStart, Date.now())
|
||||
|
||||
// --- Step 5: Extract Topology ---
|
||||
const topologyStepStart = Date.now()
|
||||
let topologyResult: TopologyExtractionResult = {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
}
|
||||
|
||||
if (request.requiredFacts.topology !== false && backendNodeIds.length > 0) {
|
||||
try {
|
||||
const { result, errors } = await extractTopology(session, backendNodeIds, subjectIds)
|
||||
topologyResult = result
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_TOPOLOGY_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Topology extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_TOPOLOGY_FAILED',
|
||||
severity: 'error',
|
||||
message: `Topology extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(5, 'topology', diagnostics.some((d) => d.code.startsWith('IMH_TOPOLOGY')) ? 'partial' : 'ok', topologyStepStart, Date.now())
|
||||
|
||||
// --- Build Geometry World Snapshot ---
|
||||
const snapshot = buildSnapshot(
|
||||
request,
|
||||
domResult,
|
||||
boxRecords,
|
||||
visualBoxRecords,
|
||||
fragmentRecords,
|
||||
transformRecords,
|
||||
matrixValues,
|
||||
styleRecords,
|
||||
topologyResult,
|
||||
resolvedSubjects
|
||||
)
|
||||
|
||||
// Determine overall status
|
||||
const hasErrors = diagnostics.some((d) => d.severity === 'error')
|
||||
const hasWarnings = diagnostics.some((d) => d.severity === 'warning')
|
||||
const status: ExtractorResponse['status'] = hasErrors ? 'error' : hasWarnings ? 'partial' : 'ok'
|
||||
|
||||
// Add provenance for extraction steps
|
||||
let factId = 0
|
||||
const provenance: ProvenanceEntry[] = []
|
||||
for (const step of traceSteps) {
|
||||
provenance.push({
|
||||
factId: factId++,
|
||||
extractionStepId: step.stepId,
|
||||
sourceKind: 1, // CDP protocol
|
||||
sourceRef: step.stepId,
|
||||
})
|
||||
}
|
||||
snapshot.provenance = provenance
|
||||
|
||||
return {
|
||||
requestId: request.requestId,
|
||||
status,
|
||||
snapshot,
|
||||
snapshots: [snapshot],
|
||||
diagnostics,
|
||||
extractionTrace: {
|
||||
steps: traceSteps,
|
||||
timings: traceTimings,
|
||||
protocolCalls: traceCalls,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a geometry world snapshot from extracted raw data.
|
||||
*/
|
||||
function buildSnapshot(
|
||||
request: ExtractorRequest,
|
||||
dom: DOMExtractionResult,
|
||||
boxes: BoxRecord[],
|
||||
visualBoxes: BoxRecord[],
|
||||
fragments: FragmentRecord[],
|
||||
transforms: TransformRecord[],
|
||||
matrices: number[],
|
||||
styles: StyleRecord[],
|
||||
topology: TopologyExtractionResult,
|
||||
resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }>
|
||||
): GeometryWorldSnapshot {
|
||||
// Build subjects table
|
||||
const subjectIds: number[] = []
|
||||
const domNodeIds: number[] = []
|
||||
const subjectKinds: number[] = []
|
||||
const primaryBoxIds: number[] = []
|
||||
const firstFragmentIds: number[] = []
|
||||
const fragmentCounts: number[] = []
|
||||
const firstTextRunIds: number[] = []
|
||||
const textRunCounts: number[] = []
|
||||
|
||||
const boxBySubject = new Map<number, number>()
|
||||
for (const b of boxes) {
|
||||
boxBySubject.set(b.subjectId, b.boxId)
|
||||
}
|
||||
|
||||
const fragmentsBySubject = new Map<number, number[]>()
|
||||
for (const f of fragments) {
|
||||
if (!fragmentsBySubject.has(f.subjectId)) {
|
||||
fragmentsBySubject.set(f.subjectId, [])
|
||||
}
|
||||
fragmentsBySubject.get(f.subjectId)!.push(f.fragmentId)
|
||||
}
|
||||
|
||||
for (let i = 0; i < resolvedSubjects.length; i++) {
|
||||
const rs = resolvedSubjects[i]
|
||||
subjectIds.push(i)
|
||||
domNodeIds.push(rs.backendNodeId)
|
||||
subjectKinds.push(1) // element
|
||||
primaryBoxIds.push(boxBySubject.get(i) ?? 0)
|
||||
|
||||
const frags = fragmentsBySubject.get(i)
|
||||
if (frags && frags.length > 0) {
|
||||
firstFragmentIds.push(frags[0])
|
||||
fragmentCounts.push(frags.length)
|
||||
} else {
|
||||
firstFragmentIds.push(0)
|
||||
fragmentCounts.push(0)
|
||||
}
|
||||
|
||||
firstTextRunIds.push(0)
|
||||
textRunCounts.push(0)
|
||||
}
|
||||
|
||||
// Build boxes table arrays
|
||||
const boxesTable = {
|
||||
boxId: boxes.map((b) => b.boxId),
|
||||
subjectId: boxes.map((b) => b.subjectId),
|
||||
frameId: boxes.map((b) => b.frameId),
|
||||
borderLeft: boxes.map((b) => b.borderLeft),
|
||||
borderTop: boxes.map((b) => b.borderTop),
|
||||
borderRight: boxes.map((b) => b.borderRight),
|
||||
borderBottom: boxes.map((b) => b.borderBottom),
|
||||
paddingLeft: boxes.map((b) => b.paddingLeft),
|
||||
paddingTop: boxes.map((b) => b.paddingTop),
|
||||
paddingRight: boxes.map((b) => b.paddingRight),
|
||||
paddingBottom: boxes.map((b) => b.paddingBottom),
|
||||
contentLeft: boxes.map((b) => b.contentLeft),
|
||||
contentTop: boxes.map((b) => b.contentTop),
|
||||
contentRight: boxes.map((b) => b.contentRight),
|
||||
contentBottom: boxes.map((b) => b.contentBottom),
|
||||
}
|
||||
|
||||
const visualBoxesTable = {
|
||||
boxId: visualBoxes.map((b) => b.boxId),
|
||||
subjectId: visualBoxes.map((b) => b.subjectId),
|
||||
frameId: visualBoxes.map((b) => b.frameId),
|
||||
borderLeft: visualBoxes.map((b) => b.borderLeft),
|
||||
borderTop: visualBoxes.map((b) => b.borderTop),
|
||||
borderRight: visualBoxes.map((b) => b.borderRight),
|
||||
borderBottom: visualBoxes.map((b) => b.borderBottom),
|
||||
paddingLeft: visualBoxes.map((b) => b.paddingLeft),
|
||||
paddingTop: visualBoxes.map((b) => b.paddingTop),
|
||||
paddingRight: visualBoxes.map((b) => b.paddingRight),
|
||||
paddingBottom: visualBoxes.map((b) => b.paddingBottom),
|
||||
contentLeft: visualBoxes.map((b) => b.contentLeft),
|
||||
contentTop: visualBoxes.map((b) => b.contentTop),
|
||||
contentRight: visualBoxes.map((b) => b.contentRight),
|
||||
contentBottom: visualBoxes.map((b) => b.contentBottom),
|
||||
}
|
||||
|
||||
const fragmentsTable = {
|
||||
fragmentId: fragments.map((f) => f.fragmentId),
|
||||
subjectId: fragments.map((f) => f.subjectId),
|
||||
fragmentKind: fragments.map((f) => f.fragmentKind),
|
||||
boxLeft: fragments.map((f) => f.boxLeft),
|
||||
boxTop: fragments.map((f) => f.boxTop),
|
||||
boxRight: fragments.map((f) => f.boxRight),
|
||||
boxBottom: fragments.map((f) => f.boxBottom),
|
||||
lineIndex: fragments.map((f) => f.lineIndex),
|
||||
flowIndex: fragments.map((f) => f.flowIndex),
|
||||
parentFragmentId: fragments.map((f) => f.parentFragmentId),
|
||||
}
|
||||
|
||||
const transformsTable = {
|
||||
transformId: transforms.map((t) => t.transformId),
|
||||
subjectId: transforms.map((t) => t.subjectId),
|
||||
matrixStart: transforms.map((t) => t.matrixStart),
|
||||
matrixLength: transforms.map((t) => t.matrixLength),
|
||||
originX: transforms.map((t) => t.originX),
|
||||
originY: transforms.map((t) => t.originY),
|
||||
}
|
||||
|
||||
const stylesTable = {
|
||||
subjectId: styles.map((s) => s.subjectId),
|
||||
display: styles.map((s) => s.display),
|
||||
position: styles.map((s) => s.position),
|
||||
zIndexKind: styles.map((s) => s.zIndexKind),
|
||||
zIndexValue: styles.map((s) => s.zIndexValue),
|
||||
overflowX: styles.map((s) => s.overflowX),
|
||||
overflowY: styles.map((s) => s.overflowY),
|
||||
opacity: styles.map((s) => s.opacity),
|
||||
visibility: styles.map((s) => s.visibility),
|
||||
containFlags: styles.map((s) => s.containFlags),
|
||||
pointerEvents: styles.map((s) => s.pointerEvents),
|
||||
lineHeight: styles.map((s) => s.lineHeight),
|
||||
fontFamilyStringId: styles.map((s) => s.fontFamilyStringId),
|
||||
fontSize: styles.map((s) => s.fontSize),
|
||||
fontWeight: styles.map((s) => s.fontWeight),
|
||||
}
|
||||
|
||||
return {
|
||||
sceneId: request.requestId,
|
||||
snapshotId: 'default',
|
||||
env: {
|
||||
viewportWidth: request.env.viewportWidth,
|
||||
viewportHeight: request.env.viewportHeight,
|
||||
deviceScaleFactor: 1,
|
||||
colorScheme: request.env.colorScheme,
|
||||
pointer: request.env.pointer,
|
||||
hover: false,
|
||||
reducedMotion: 'no-preference',
|
||||
locale: 'en',
|
||||
writingMode: 'horizontal-tb',
|
||||
},
|
||||
source: {
|
||||
url: request.sceneTarget.url,
|
||||
browserName: 'chromium',
|
||||
browserVersion: '',
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: new Date().toISOString(),
|
||||
},
|
||||
strings: dom.strings,
|
||||
subjects: {
|
||||
ids: subjectIds,
|
||||
domNodeId: domNodeIds,
|
||||
subjectKind: subjectKinds,
|
||||
primaryBoxId: primaryBoxIds,
|
||||
firstFragmentId: firstFragmentIds,
|
||||
fragmentCount: fragmentCounts,
|
||||
firstTextRunId: firstTextRunIds,
|
||||
textRunCount: textRunCounts,
|
||||
},
|
||||
dom,
|
||||
frames: {
|
||||
frameId: [],
|
||||
frameKind: [],
|
||||
ownerSubjectId: [],
|
||||
parentFrameId: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
axisMatrixStart: [],
|
||||
clipRectId: [],
|
||||
scrollContainerId: [],
|
||||
writingMode: [],
|
||||
},
|
||||
matrices: { values: matrices },
|
||||
rects: {
|
||||
rectId: [],
|
||||
left: [],
|
||||
top: [],
|
||||
right: [],
|
||||
bottom: [],
|
||||
},
|
||||
boxes: boxesTable,
|
||||
visualBoxes: visualBoxesTable,
|
||||
fragments: fragmentsTable,
|
||||
transforms: transformsTable,
|
||||
styles: stylesTable,
|
||||
topology: topology.topology,
|
||||
scroll: topology.scroll,
|
||||
clipping: topology.clipping,
|
||||
provenance: [],
|
||||
confidence: [],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an empty snapshot for error cases.
|
||||
*/
|
||||
function createEmptySnapshot(request: ExtractorRequest): GeometryWorldSnapshot {
|
||||
return {
|
||||
sceneId: request.requestId,
|
||||
snapshotId: 'default',
|
||||
env: {
|
||||
viewportWidth: request.env.viewportWidth,
|
||||
viewportHeight: request.env.viewportHeight,
|
||||
deviceScaleFactor: 1,
|
||||
colorScheme: request.env.colorScheme,
|
||||
pointer: request.env.pointer,
|
||||
hover: false,
|
||||
reducedMotion: 'no-preference',
|
||||
locale: 'en',
|
||||
writingMode: 'horizontal-tb',
|
||||
},
|
||||
source: {
|
||||
url: request.sceneTarget.url,
|
||||
browserName: 'chromium',
|
||||
browserVersion: '',
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: new Date().toISOString(),
|
||||
},
|
||||
strings: [],
|
||||
subjects: {
|
||||
ids: [],
|
||||
domNodeId: [],
|
||||
subjectKind: [],
|
||||
primaryBoxId: [],
|
||||
firstFragmentId: [],
|
||||
fragmentCount: [],
|
||||
firstTextRunId: [],
|
||||
textRunCount: [],
|
||||
},
|
||||
dom: { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 },
|
||||
frames: {
|
||||
frameId: [],
|
||||
frameKind: [],
|
||||
ownerSubjectId: [],
|
||||
parentFrameId: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
axisMatrixStart: [],
|
||||
clipRectId: [],
|
||||
scrollContainerId: [],
|
||||
writingMode: [],
|
||||
},
|
||||
matrices: { values: [] },
|
||||
rects: {
|
||||
rectId: [],
|
||||
left: [],
|
||||
top: [],
|
||||
right: [],
|
||||
bottom: [],
|
||||
},
|
||||
boxes: {
|
||||
boxId: [],
|
||||
subjectId: [],
|
||||
frameId: [],
|
||||
borderLeft: [],
|
||||
borderTop: [],
|
||||
borderRight: [],
|
||||
borderBottom: [],
|
||||
paddingLeft: [],
|
||||
paddingTop: [],
|
||||
paddingRight: [],
|
||||
paddingBottom: [],
|
||||
contentLeft: [],
|
||||
contentTop: [],
|
||||
contentRight: [],
|
||||
contentBottom: [],
|
||||
},
|
||||
visualBoxes: {
|
||||
boxId: [],
|
||||
subjectId: [],
|
||||
frameId: [],
|
||||
borderLeft: [],
|
||||
borderTop: [],
|
||||
borderRight: [],
|
||||
borderBottom: [],
|
||||
paddingLeft: [],
|
||||
paddingTop: [],
|
||||
paddingRight: [],
|
||||
paddingBottom: [],
|
||||
contentLeft: [],
|
||||
contentTop: [],
|
||||
contentRight: [],
|
||||
contentBottom: [],
|
||||
},
|
||||
fragments: {
|
||||
fragmentId: [],
|
||||
subjectId: [],
|
||||
fragmentKind: [],
|
||||
boxLeft: [],
|
||||
boxTop: [],
|
||||
boxRight: [],
|
||||
boxBottom: [],
|
||||
lineIndex: [],
|
||||
flowIndex: [],
|
||||
parentFragmentId: [],
|
||||
},
|
||||
transforms: {
|
||||
transformId: [],
|
||||
subjectId: [],
|
||||
matrixStart: [],
|
||||
matrixLength: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
},
|
||||
styles: {
|
||||
subjectId: [],
|
||||
display: [],
|
||||
position: [],
|
||||
zIndexKind: [],
|
||||
zIndexValue: [],
|
||||
overflowX: [],
|
||||
overflowY: [],
|
||||
opacity: [],
|
||||
visibility: [],
|
||||
containFlags: [],
|
||||
pointerEvents: [],
|
||||
lineHeight: [],
|
||||
fontFamilyStringId: [],
|
||||
fontSize: [],
|
||||
fontWeight: [],
|
||||
},
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
provenance: [],
|
||||
confidence: [],
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,398 @@
|
||||
/**
|
||||
* Geometry Extraction
|
||||
*
|
||||
* Captures box models, fragment boxes, and transform data for DOM nodes
|
||||
* using CDP DOM.getBoxModel and Runtime.evaluate.
|
||||
*
|
||||
* We batch requests where possible: gather all target node IDs first,
|
||||
* then request box models in bulk via parallel sends.
|
||||
*/
|
||||
|
||||
import type { CDPSession } from './session.js'
|
||||
|
||||
/**
|
||||
* Raw CDP box model for a single node.
|
||||
*/
|
||||
export interface CDPBoxModel {
|
||||
content: number[]
|
||||
padding: number[]
|
||||
border: number[]
|
||||
margin: number[]
|
||||
width: number
|
||||
height: number
|
||||
shapeOutside?: unknown
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonical box geometry record for the geometry world.
|
||||
*/
|
||||
export interface BoxRecord {
|
||||
boxId: number
|
||||
subjectId: number
|
||||
frameId: number
|
||||
borderLeft: number
|
||||
borderTop: number
|
||||
borderRight: number
|
||||
borderBottom: number
|
||||
paddingLeft: number
|
||||
paddingTop: number
|
||||
paddingRight: number
|
||||
paddingBottom: number
|
||||
contentLeft: number
|
||||
contentTop: number
|
||||
contentRight: number
|
||||
contentBottom: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Fragment record for multi-line or multi-column content.
|
||||
*/
|
||||
export interface FragmentRecord {
|
||||
fragmentId: number
|
||||
subjectId: number
|
||||
fragmentKind: number
|
||||
boxLeft: number
|
||||
boxTop: number
|
||||
boxRight: number
|
||||
boxBottom: number
|
||||
lineIndex: number
|
||||
flowIndex: number
|
||||
parentFragmentId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform record for a subject.
|
||||
*/
|
||||
export interface TransformRecord {
|
||||
transformId: number
|
||||
subjectId: number
|
||||
matrixStart: number
|
||||
matrixLength: number
|
||||
originX: number
|
||||
originY: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Geometry extraction result.
|
||||
*/
|
||||
export interface GeometryExtractionResult {
|
||||
boxes: BoxRecord[]
|
||||
fragments: FragmentRecord[]
|
||||
transforms: TransformRecord[]
|
||||
/** Flat array of matrix values (6 elements per 2D matrix) */
|
||||
matrices: number[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract box models for a list of node IDs.
|
||||
*
|
||||
* Sends DOM.getBoxModel for each node in parallel to minimize
|
||||
* round-trip latency.
|
||||
*/
|
||||
export async function extractBoxModels(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[]
|
||||
): Promise<{ boxes: BoxRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
|
||||
const boxes: BoxRecord[] = []
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
|
||||
// Batch: request all box models in parallel using backendNodeId.
|
||||
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
|
||||
try {
|
||||
const response = await session.send<{ model: CDPBoxModel }>('DOM.getBoxModel', {
|
||||
backendNodeId,
|
||||
})
|
||||
return { status: 'ok' as const, model: response.model, backendNodeId, idx }
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
return { status: 'error' as const, error: reason, backendNodeId, idx }
|
||||
}
|
||||
})
|
||||
|
||||
const results = await Promise.all(promises)
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'error') {
|
||||
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
|
||||
continue
|
||||
}
|
||||
|
||||
const model = result.model
|
||||
const subjectId = subjectIds[result.idx]
|
||||
|
||||
// CDP returns quads as [x1,y1,x2,y2,x3,y3,x4,y4].
|
||||
// For axis-aligned boxes, we use the first two points.
|
||||
const borderQuad = model.border
|
||||
const paddingQuad = model.padding
|
||||
const contentQuad = model.content
|
||||
|
||||
boxes.push({
|
||||
boxId: boxes.length,
|
||||
subjectId,
|
||||
frameId: 0, // Frame resolution happens later.
|
||||
borderLeft: Math.min(borderQuad[0], borderQuad[2]),
|
||||
borderTop: Math.min(borderQuad[1], borderQuad[5]),
|
||||
borderRight: Math.max(borderQuad[4], borderQuad[6]),
|
||||
borderBottom: Math.max(borderQuad[3], borderQuad[7]),
|
||||
paddingLeft: Math.min(paddingQuad[0], paddingQuad[2]),
|
||||
paddingTop: Math.min(paddingQuad[1], paddingQuad[5]),
|
||||
paddingRight: Math.max(paddingQuad[4], paddingQuad[6]),
|
||||
paddingBottom: Math.max(paddingQuad[3], paddingQuad[7]),
|
||||
contentLeft: Math.min(contentQuad[0], contentQuad[2]),
|
||||
contentTop: Math.min(contentQuad[1], contentQuad[5]),
|
||||
contentRight: Math.max(contentQuad[4], contentQuad[6]),
|
||||
contentBottom: Math.max(contentQuad[3], contentQuad[7]),
|
||||
})
|
||||
}
|
||||
|
||||
return { boxes, errors }
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract client rect fragments for a list of backend node IDs.
|
||||
*
|
||||
* Resolves each backendNodeId to a RemoteObject via DOM.resolveNode,
|
||||
* then uses Runtime.callFunctionOn to invoke getClientRects() directly
|
||||
* on the element. This avoids DOM mutation and fragile attribute queries.
|
||||
*/
|
||||
export async function extractFragments(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[]
|
||||
): Promise<{ fragments: FragmentRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
|
||||
const fragments: FragmentRecord[] = []
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
|
||||
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
|
||||
try {
|
||||
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
|
||||
backendNodeId,
|
||||
})
|
||||
const response = await session.send<{
|
||||
result: {
|
||||
value: Array<{ left: number; top: number; right: number; bottom: number }>
|
||||
}
|
||||
}>('Runtime.callFunctionOn', {
|
||||
objectId: resolved.object.objectId,
|
||||
functionDeclaration: `function() {
|
||||
const rects = this.getClientRects()
|
||||
const boxes = []
|
||||
for (let i = 0; i < rects.length; i++) {
|
||||
boxes.push({
|
||||
left: rects[i].left,
|
||||
top: rects[i].top,
|
||||
right: rects[i].right,
|
||||
bottom: rects[i].bottom,
|
||||
})
|
||||
}
|
||||
return boxes
|
||||
}`,
|
||||
returnByValue: true,
|
||||
})
|
||||
return { status: 'ok' as const, boxes: response.result.value, backendNodeId, idx }
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
return { status: 'error' as const, error: reason, backendNodeId, idx }
|
||||
}
|
||||
})
|
||||
|
||||
const results = await Promise.all(promises)
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'error') {
|
||||
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
|
||||
continue
|
||||
}
|
||||
|
||||
const subjectId = subjectIds[result.idx]
|
||||
for (let j = 0; j < (result.boxes?.length ?? 0); j++) {
|
||||
const box = result.boxes![j]
|
||||
fragments.push({
|
||||
fragmentId: fragments.length,
|
||||
subjectId,
|
||||
fragmentKind: 1, // 1 = client rect fragment
|
||||
boxLeft: box.left,
|
||||
boxTop: box.top,
|
||||
boxRight: box.right,
|
||||
boxBottom: box.bottom,
|
||||
lineIndex: j,
|
||||
flowIndex: 0,
|
||||
parentFragmentId: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return { fragments, errors }
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract visual bounding boxes for a list of backend node IDs.
|
||||
*
|
||||
* Uses getBoundingClientRect() via Runtime.callFunctionOn to obtain
|
||||
* post-transform viewport coordinates. These are stored in visualBoxes
|
||||
* and represent the effective rendered position after all CSS transforms.
|
||||
*/
|
||||
export async function extractVisualBoxes(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[]
|
||||
): Promise<{ boxes: BoxRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
|
||||
const boxes: BoxRecord[] = []
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
|
||||
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
|
||||
try {
|
||||
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
|
||||
backendNodeId,
|
||||
})
|
||||
const response = await session.send<{
|
||||
result: {
|
||||
value: {
|
||||
left: number
|
||||
top: number
|
||||
right: number
|
||||
bottom: number
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
}
|
||||
}>('Runtime.callFunctionOn', {
|
||||
objectId: resolved.object.objectId,
|
||||
functionDeclaration: `function() {
|
||||
const rect = this.getBoundingClientRect()
|
||||
return {
|
||||
left: rect.left,
|
||||
top: rect.top,
|
||||
right: rect.right,
|
||||
bottom: rect.bottom,
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
}
|
||||
}`,
|
||||
returnByValue: true,
|
||||
})
|
||||
return { status: 'ok' as const, rect: response.result.value, backendNodeId, idx }
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
return { status: 'error' as const, error: reason, backendNodeId, idx }
|
||||
}
|
||||
})
|
||||
|
||||
const results = await Promise.all(promises)
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'error') {
|
||||
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
|
||||
continue
|
||||
}
|
||||
|
||||
const rect = result.rect
|
||||
const subjectId = subjectIds[result.idx]
|
||||
|
||||
boxes.push({
|
||||
boxId: boxes.length,
|
||||
subjectId,
|
||||
frameId: 0,
|
||||
borderLeft: rect.left,
|
||||
borderTop: rect.top,
|
||||
borderRight: rect.right,
|
||||
borderBottom: rect.bottom,
|
||||
paddingLeft: rect.left,
|
||||
paddingTop: rect.top,
|
||||
paddingRight: rect.right,
|
||||
paddingBottom: rect.bottom,
|
||||
contentLeft: rect.left,
|
||||
contentTop: rect.top,
|
||||
contentRight: rect.right,
|
||||
contentBottom: rect.bottom,
|
||||
})
|
||||
}
|
||||
|
||||
return { boxes, errors }
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract CSS transform matrices for a list of node IDs.
|
||||
*
|
||||
* Returns the computed 2D transform matrix (6 values) and origin
|
||||
* for each node that has a transform.
|
||||
*/
|
||||
export async function extractTransforms(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[]
|
||||
): Promise<{
|
||||
transforms: TransformRecord[]
|
||||
matrices: number[]
|
||||
errors: Array<{ backendNodeId: number; reason: string }>
|
||||
}> {
|
||||
const transforms: TransformRecord[] = []
|
||||
const matrices: number[] = []
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
|
||||
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
|
||||
try {
|
||||
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
|
||||
backendNodeId,
|
||||
})
|
||||
const response = await session.send<{
|
||||
result: {
|
||||
value: {
|
||||
matrix: number[]
|
||||
originX: number
|
||||
originY: number
|
||||
}
|
||||
}
|
||||
}>('Runtime.callFunctionOn', {
|
||||
objectId: resolved.object.objectId,
|
||||
functionDeclaration: `function() {
|
||||
const style = window.getComputedStyle(this)
|
||||
const transform = style.transform
|
||||
let matrix = [1, 0, 0, 1, 0, 0]
|
||||
if (transform && transform !== 'none') {
|
||||
const match = transform.match(/matrix\\(([^)]+)\\)/)
|
||||
if (match) {
|
||||
matrix = match[1].split(',').map(Number)
|
||||
}
|
||||
}
|
||||
const origin = style.transformOrigin.split(' ').map(parseFloat)
|
||||
return {
|
||||
matrix,
|
||||
originX: origin[0] || 0,
|
||||
originY: origin[1] || 0,
|
||||
}
|
||||
}`,
|
||||
returnByValue: true,
|
||||
})
|
||||
return { status: 'ok' as const, data: response.result.value, backendNodeId, idx }
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
return { status: 'error' as const, error: reason, backendNodeId, idx }
|
||||
}
|
||||
})
|
||||
|
||||
const results = await Promise.all(promises)
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'error') {
|
||||
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
|
||||
continue
|
||||
}
|
||||
|
||||
const matrix = result.data.matrix ?? [1, 0, 0, 1, 0, 0]
|
||||
const matrixStart = matrices.length
|
||||
matrices.push(...matrix)
|
||||
|
||||
transforms.push({
|
||||
transformId: transforms.length,
|
||||
subjectId: subjectIds[result.idx],
|
||||
matrixStart,
|
||||
matrixLength: matrix.length,
|
||||
originX: result.data.originX ?? 0,
|
||||
originY: result.data.originY ?? 0,
|
||||
})
|
||||
}
|
||||
|
||||
return { transforms, matrices, errors }
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* imhotep-cdp — Chrome DevTools Protocol extraction layer
|
||||
*
|
||||
* Captures browser truth for geometry, topology, and style facts.
|
||||
* Returns raw browser data matching the geometry world schema.
|
||||
*/
|
||||
|
||||
// Session management
|
||||
export {
|
||||
CDPSessionManager,
|
||||
createPlaywrightCDPSession,
|
||||
createSessionManager,
|
||||
type CDPSession,
|
||||
type CDPSessionFactory,
|
||||
} from './session.js'
|
||||
|
||||
// DOM extraction
|
||||
export {
|
||||
extractDOM,
|
||||
resolveSelector,
|
||||
resolveOneSelector,
|
||||
type CDPDOMNode,
|
||||
type DOMNodeRecord,
|
||||
type DOMExtractionResult,
|
||||
} from './dom.js'
|
||||
|
||||
// Geometry extraction
|
||||
export {
|
||||
extractBoxModels,
|
||||
extractFragments,
|
||||
extractTransforms,
|
||||
type BoxRecord,
|
||||
type FragmentRecord,
|
||||
type TransformRecord,
|
||||
type GeometryExtractionResult,
|
||||
type CDPBoxModel,
|
||||
} from './geometry.js'
|
||||
|
||||
// Style extraction
|
||||
export {
|
||||
extractStyles,
|
||||
DisplayEnum,
|
||||
PositionEnum,
|
||||
OverflowEnum,
|
||||
VisibilityEnum,
|
||||
PointerEventsEnum,
|
||||
type StyleRecord,
|
||||
type StyleExtractionConfig,
|
||||
} from './styles.js'
|
||||
|
||||
// Topology extraction
|
||||
export {
|
||||
extractTopology,
|
||||
type ScrollRecord,
|
||||
type ClippingRecord,
|
||||
type TopologyRecord,
|
||||
type TopologyExtractionResult,
|
||||
} from './topology.js'
|
||||
|
||||
// Canonical adapter
|
||||
export {
|
||||
adaptSnapshotToCanonical,
|
||||
type CanonicalGeometryWorld,
|
||||
} from './canonical-adapter.js'
|
||||
|
||||
// Main orchestrator
|
||||
export {
|
||||
CDPExtractor,
|
||||
type ExtractorRequest,
|
||||
type ExtractorResponse,
|
||||
type ExtractorDiagnostic,
|
||||
type GeometryWorldSnapshot,
|
||||
type ProvenanceEntry,
|
||||
type ConfidenceEntry,
|
||||
type ExtractionTraceEntry,
|
||||
} from './extractor.js'
|
||||
@@ -0,0 +1,149 @@
|
||||
/**
|
||||
* CDP Session Management
|
||||
*
|
||||
* Wraps Playwright's CDP session access to provide a typed, injectable
|
||||
* interface for sending Chrome DevTools Protocol commands.
|
||||
*
|
||||
* Design: dependency injection over optional imports. The extractor receives
|
||||
* a session factory rather than importing Playwright directly.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Minimal CDP session interface. Any object that can send CDP commands
|
||||
* and return JSON-like responses satisfies this contract.
|
||||
*/
|
||||
export interface CDPSession {
|
||||
send<T = unknown>(method: string, params?: Record<string, unknown>): Promise<T>
|
||||
on(event: string, handler: (payload: unknown) => void): void
|
||||
off(event: string, handler: (payload: unknown) => void): void
|
||||
detach(): Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory that creates a CDP session for a given Playwright page.
|
||||
* This is injected so the package does not hard-depend on Playwright.
|
||||
*/
|
||||
export type CDPSessionFactory = (pageRef: unknown) => Promise<CDPSession>
|
||||
|
||||
/**
|
||||
* Playwright-backed session factory implementation.
|
||||
*
|
||||
* Usage:
|
||||
* const session = await createPlaywrightCDPSession(page)
|
||||
*
|
||||
* Where `page` is a Playwright Page object.
|
||||
*/
|
||||
export async function createPlaywrightCDPSession(
|
||||
page: unknown
|
||||
): Promise<CDPSession> {
|
||||
// Playwright pages expose context() and a CDP session method.
|
||||
// We access it dynamically to avoid a hard import dependency.
|
||||
const pwPage = page as {
|
||||
context: () => {
|
||||
newCDPSession: (p: unknown) => Promise<CDPSession>
|
||||
}
|
||||
}
|
||||
|
||||
const context = pwPage.context()
|
||||
const session = await context.newCDPSession(page)
|
||||
return session
|
||||
}
|
||||
|
||||
/**
|
||||
* Session manager that owns the lifecycle of a CDP session.
|
||||
*
|
||||
* Handles attach, detach, and basic error recovery.
|
||||
* Caches document root nodeId and enabled domains to minimize round-trips.
|
||||
*/
|
||||
export class CDPSessionManager {
|
||||
private session: CDPSession | null = null
|
||||
private attachPromise: Promise<CDPSession> | null = null
|
||||
private readonly factory: CDPSessionFactory
|
||||
private readonly pageRef: unknown
|
||||
private documentRootNodeId: number | null = null
|
||||
private enabledDomains = new Set<string>()
|
||||
|
||||
constructor(factory: CDPSessionFactory, pageRef: unknown) {
|
||||
this.factory = factory
|
||||
this.pageRef = pageRef
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a session is attached. Idempotent and concurrency-safe.
|
||||
*/
|
||||
async attach(): Promise<CDPSession> {
|
||||
if (this.session) return this.session
|
||||
if (this.attachPromise) return this.attachPromise
|
||||
this.attachPromise = this.factory(this.pageRef).then((s) => {
|
||||
this.session = s
|
||||
this.documentRootNodeId = null
|
||||
this.enabledDomains.clear()
|
||||
return s
|
||||
})
|
||||
return this.attachPromise
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached document root nodeId, or fetch and cache it.
|
||||
*/
|
||||
async getDocumentRootNodeId(): Promise<number> {
|
||||
if (this.documentRootNodeId !== null) {
|
||||
return this.documentRootNodeId
|
||||
}
|
||||
const s = await this.attach()
|
||||
const result = await s.send<{ root: { nodeId: number } }>('DOM.getDocument')
|
||||
this.documentRootNodeId = result.root.nodeId
|
||||
return this.documentRootNodeId
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable a CDP domain if not already enabled.
|
||||
*/
|
||||
async enableDomain(domain: string): Promise<void> {
|
||||
if (this.enabledDomains.has(domain)) {
|
||||
return
|
||||
}
|
||||
const s = await this.attach()
|
||||
await s.send(`${domain}.enable`)
|
||||
this.enabledDomains.add(domain)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current session, throwing if not attached.
|
||||
*/
|
||||
getSession(): CDPSession {
|
||||
if (!this.session) {
|
||||
throw new Error('CDP session not attached. Call attach() first.')
|
||||
}
|
||||
return this.session
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a CDP command, automatically attaching if needed.
|
||||
*/
|
||||
async send<T = unknown>(
|
||||
method: string,
|
||||
params?: Record<string, unknown>
|
||||
): Promise<T> {
|
||||
const s = await this.attach()
|
||||
return s.send<T>(method, params)
|
||||
}
|
||||
|
||||
/**
|
||||
* Detach and clean up the session.
|
||||
*/
|
||||
async detach(): Promise<void> {
|
||||
if (this.session) {
|
||||
await this.session.detach()
|
||||
this.session = null
|
||||
}
|
||||
this.attachPromise = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenient builder for a manager using the Playwright factory.
|
||||
*/
|
||||
export function createSessionManager(page: unknown): CDPSessionManager {
|
||||
return new CDPSessionManager(createPlaywrightCDPSession, page)
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
/**
|
||||
* Computed Style Extraction
|
||||
*
|
||||
* Captures only the style properties required by the active extraction plan.
|
||||
* Uses Runtime.callFunctionOn with objectIds resolved from backendNodeIds
|
||||
* to avoid CSS domain nodeId validity issues.
|
||||
*/
|
||||
|
||||
import type { CDPSession } from './session.js'
|
||||
|
||||
/**
|
||||
* Compact style record for a subject.
|
||||
* Mirrors the geometry world `styles` table schema.
|
||||
*/
|
||||
export interface StyleRecord {
|
||||
subjectId: number
|
||||
display: number
|
||||
position: number
|
||||
zIndexKind: number
|
||||
zIndexValue: number
|
||||
overflowX: number
|
||||
overflowY: number
|
||||
opacity: number
|
||||
visibility: number
|
||||
containFlags: number
|
||||
pointerEvents: number
|
||||
lineHeight: number
|
||||
fontFamilyStringId: number
|
||||
fontSize: number
|
||||
fontWeight: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Enum mappings for compact storage.
|
||||
*/
|
||||
export const DisplayEnum: Record<string, number> = {
|
||||
none: 1,
|
||||
block: 2,
|
||||
inline: 3,
|
||||
'inline-block': 4,
|
||||
flex: 5,
|
||||
'inline-flex': 6,
|
||||
grid: 7,
|
||||
'inline-grid': 8,
|
||||
table: 9,
|
||||
'table-cell': 10,
|
||||
contents: 11,
|
||||
other: 99,
|
||||
}
|
||||
|
||||
export const PositionEnum: Record<string, number> = {
|
||||
static: 1,
|
||||
relative: 2,
|
||||
absolute: 3,
|
||||
fixed: 4,
|
||||
sticky: 5,
|
||||
}
|
||||
|
||||
export const OverflowEnum: Record<string, number> = {
|
||||
visible: 1,
|
||||
hidden: 2,
|
||||
clip: 3,
|
||||
scroll: 4,
|
||||
auto: 5,
|
||||
}
|
||||
|
||||
export const VisibilityEnum: Record<string, number> = {
|
||||
visible: 1,
|
||||
hidden: 2,
|
||||
collapse: 3,
|
||||
}
|
||||
|
||||
export const PointerEventsEnum: Record<string, number> = {
|
||||
auto: 1,
|
||||
none: 2,
|
||||
}
|
||||
|
||||
/**
|
||||
* Style extraction configuration.
|
||||
*/
|
||||
export interface StyleExtractionConfig {
|
||||
/** If true, extract all known style properties */
|
||||
all?: boolean
|
||||
/** Specific property names to extract */
|
||||
properties?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract computed styles for a list of backend node IDs.
|
||||
*
|
||||
* Resolves each backendNodeId to an objectId, then uses
|
||||
* Runtime.callFunctionOn to read computed styles via getComputedStyle
|
||||
* in the browser. This avoids CSS domain nodeId validity issues.
|
||||
*/
|
||||
export async function extractStyles(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[],
|
||||
config: StyleExtractionConfig,
|
||||
strings: string[]
|
||||
): Promise<{
|
||||
styles: StyleRecord[]
|
||||
errors: Array<{ backendNodeId: number; reason: string }>
|
||||
strings: string[]
|
||||
}> {
|
||||
const styles: StyleRecord[] = []
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
const stringIndex = new Map<string, number>()
|
||||
strings.forEach((s, i) => stringIndex.set(s, i))
|
||||
|
||||
function getStringId(value: string): number {
|
||||
if (stringIndex.has(value)) return stringIndex.get(value)!
|
||||
const id = strings.length
|
||||
strings.push(value)
|
||||
stringIndex.set(value, id)
|
||||
return id
|
||||
}
|
||||
|
||||
// Determine which properties we need.
|
||||
const wantAll = config.all ?? true
|
||||
const wantProps = new Set(config.properties ?? [])
|
||||
|
||||
// Resolve backendNodeIds to objectIds
|
||||
const objectIds: string[] = []
|
||||
for (const backendNodeId of backendNodeIds) {
|
||||
try {
|
||||
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
|
||||
backendNodeId,
|
||||
})
|
||||
objectIds.push(resolved.object.objectId)
|
||||
} catch {
|
||||
objectIds.push('')
|
||||
}
|
||||
}
|
||||
|
||||
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
|
||||
const objectId = objectIds[idx]
|
||||
if (!objectId) {
|
||||
return { status: 'error' as const, error: 'Failed to resolve backendNodeId to objectId', backendNodeId, idx }
|
||||
}
|
||||
try {
|
||||
const response = await session.send<{
|
||||
result: { value: Record<string, string> }
|
||||
}>('Runtime.callFunctionOn', {
|
||||
objectId,
|
||||
functionDeclaration: `function() {
|
||||
const s = window.getComputedStyle(this);
|
||||
return {
|
||||
display: s.display,
|
||||
position: s.position,
|
||||
zIndex: s.zIndex,
|
||||
overflowX: s.overflowX,
|
||||
overflowY: s.overflowY,
|
||||
opacity: s.opacity,
|
||||
visibility: s.visibility,
|
||||
contain: s.contain,
|
||||
pointerEvents: s.pointerEvents,
|
||||
lineHeight: s.lineHeight,
|
||||
fontFamily: s.fontFamily,
|
||||
fontSize: s.fontSize,
|
||||
fontWeight: s.fontWeight,
|
||||
};
|
||||
}`,
|
||||
returnByValue: true,
|
||||
})
|
||||
return { status: 'ok' as const, computedStyle: response.result.value, backendNodeId, idx }
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
return { status: 'error' as const, error: reason, backendNodeId, idx }
|
||||
}
|
||||
})
|
||||
|
||||
const results = await Promise.all(promises)
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'error') {
|
||||
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
|
||||
continue
|
||||
}
|
||||
|
||||
const subjectId = subjectIds[result.idx]
|
||||
const map = result.computedStyle
|
||||
|
||||
function get(name: string, defaultValue = ''): string {
|
||||
if (!wantAll && !wantProps.has(name)) return defaultValue
|
||||
return map[name] ?? defaultValue
|
||||
}
|
||||
|
||||
function parseNumber(value: string, defaultValue = 0): number {
|
||||
if (!value || value === 'none' || value === 'auto') return defaultValue
|
||||
const parsed = parseFloat(value)
|
||||
return Number.isNaN(parsed) ? defaultValue : parsed
|
||||
}
|
||||
|
||||
const displayRaw = get('display', 'block')
|
||||
const positionRaw = get('position', 'static')
|
||||
const zIndexRaw = get('zIndex', 'auto')
|
||||
const overflowXRaw = get('overflowX', 'visible')
|
||||
const overflowYRaw = get('overflowY', 'visible')
|
||||
const opacityRaw = get('opacity', '1')
|
||||
const visibilityRaw = get('visibility', 'visible')
|
||||
const containRaw = get('contain', 'none')
|
||||
const pointerEventsRaw = get('pointerEvents', 'auto')
|
||||
const lineHeightRaw = get('lineHeight', 'normal')
|
||||
const fontFamilyRaw = get('fontFamily', '')
|
||||
const fontSizeRaw = get('fontSize', '16px')
|
||||
const fontWeightRaw = get('fontWeight', '400')
|
||||
|
||||
// z-index kind: 0 = auto, 1 = integer
|
||||
const zIndexKind = zIndexRaw === 'auto' ? 0 : 1
|
||||
const zIndexValue = zIndexKind === 1 ? parseInt(zIndexRaw, 10) : 0
|
||||
|
||||
// contain flags bitfield
|
||||
let containFlags = 0
|
||||
if (containRaw !== 'none') {
|
||||
const parts = containRaw.split(' ')
|
||||
for (const part of parts) {
|
||||
if (part === 'layout') containFlags |= 1
|
||||
if (part === 'paint') containFlags |= 2
|
||||
if (part === 'size') containFlags |= 4
|
||||
if (part === 'style') containFlags |= 8
|
||||
if (part === 'strict') containFlags |= 15
|
||||
if (part === 'content') containFlags |= 7
|
||||
}
|
||||
}
|
||||
|
||||
styles.push({
|
||||
subjectId,
|
||||
display: DisplayEnum[displayRaw] ?? DisplayEnum.other,
|
||||
position: PositionEnum[positionRaw] ?? PositionEnum.static,
|
||||
zIndexKind,
|
||||
zIndexValue,
|
||||
overflowX: OverflowEnum[overflowXRaw] ?? OverflowEnum.visible,
|
||||
overflowY: OverflowEnum[overflowYRaw] ?? OverflowEnum.visible,
|
||||
opacity: parseNumber(opacityRaw, 1),
|
||||
visibility: VisibilityEnum[visibilityRaw] ?? VisibilityEnum.visible,
|
||||
containFlags,
|
||||
pointerEvents: PointerEventsEnum[pointerEventsRaw] ?? PointerEventsEnum.auto,
|
||||
lineHeight: parseNumber(lineHeightRaw, 1.2),
|
||||
fontFamilyStringId: getStringId(fontFamilyRaw),
|
||||
fontSize: parseNumber(fontSizeRaw, 16),
|
||||
fontWeight: parseInt(fontWeightRaw, 10) || 400,
|
||||
})
|
||||
}
|
||||
|
||||
return { styles, errors, strings }
|
||||
}
|
||||
@@ -0,0 +1,326 @@
|
||||
/**
|
||||
* Topology Extraction
|
||||
*
|
||||
* Captures non-planar layout facts: scroll containers, clipping chains,
|
||||
* stacking contexts, containing blocks, and formatting contexts.
|
||||
*
|
||||
* We extract these in bulk via a single Runtime.evaluate script that
|
||||
* walks the DOM and computes topology relationships in the browser,
|
||||
* then returns compact tables.
|
||||
*/
|
||||
|
||||
import type { CDPSession } from './session.js'
|
||||
|
||||
/**
|
||||
* Scroll container record.
|
||||
*/
|
||||
export interface ScrollRecord {
|
||||
containerId: number
|
||||
scrollLeft: number
|
||||
scrollTop: number
|
||||
scrollWidth: number
|
||||
scrollHeight: number
|
||||
clientWidth: number
|
||||
clientHeight: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Clipping chain node.
|
||||
*/
|
||||
export interface ClippingRecord {
|
||||
clipNodeId: number
|
||||
subjectId: number
|
||||
clipKind: number
|
||||
clipLeft: number
|
||||
clipTop: number
|
||||
clipRight: number
|
||||
clipBottom: number
|
||||
parentClipNodeId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Topology relation record.
|
||||
*/
|
||||
export interface TopologyRecord {
|
||||
containingBlockOf: number[]
|
||||
nearestPositionedAncestorOf: number[]
|
||||
scrollContainerOf: number[]
|
||||
stackingContextOf: number[]
|
||||
formattingContextOf: number[]
|
||||
clippingRootOf: number[]
|
||||
paintOrderBucket: number[]
|
||||
paintOrderIndex: number[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Full topology extraction result.
|
||||
*/
|
||||
export interface TopologyExtractionResult {
|
||||
scroll: ScrollRecord[]
|
||||
clipping: ClippingRecord[]
|
||||
topology: TopologyRecord
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract topology facts for a list of elements.
|
||||
*
|
||||
* Uses a single injected script to compute all topology relationships
|
||||
* in the browser, avoiding per-node round-trips.
|
||||
*/
|
||||
export async function extractTopology(
|
||||
session: CDPSession,
|
||||
backendNodeIds: number[],
|
||||
subjectIds: number[]
|
||||
): Promise<{
|
||||
result: TopologyExtractionResult
|
||||
errors: Array<{ backendNodeId: number; reason: string }>
|
||||
}> {
|
||||
const errors: Array<{ backendNodeId: number; reason: string }> = []
|
||||
|
||||
if (backendNodeIds.length === 0) {
|
||||
return {
|
||||
result: {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
},
|
||||
errors,
|
||||
}
|
||||
}
|
||||
|
||||
// Inject temporary data attributes so the topology script can correlate
|
||||
// elements with their backendNodeIds, then remove them afterwards.
|
||||
const objectIds: string[] = []
|
||||
for (const backendNodeId of backendNodeIds) {
|
||||
try {
|
||||
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
|
||||
backendNodeId,
|
||||
})
|
||||
objectIds.push(resolved.object.objectId)
|
||||
} catch {
|
||||
objectIds.push('')
|
||||
}
|
||||
}
|
||||
|
||||
// Set attributes
|
||||
for (let i = 0; i < backendNodeIds.length; i++) {
|
||||
const objectId = objectIds[i]
|
||||
if (!objectId) continue
|
||||
try {
|
||||
await session.send<unknown>('Runtime.callFunctionOn', {
|
||||
objectId,
|
||||
functionDeclaration: `function(id) { this.setAttribute('data-imhotep-backend-id', id) }`,
|
||||
arguments: [{ value: String(backendNodeIds[i]) }],
|
||||
})
|
||||
} catch {
|
||||
// Ignore injection errors
|
||||
}
|
||||
}
|
||||
|
||||
const script = `
|
||||
(function() {
|
||||
const subjects = Array.from(document.querySelectorAll('[data-imhotep-backend-id]'))
|
||||
const idMap = new Map()
|
||||
for (const el of subjects) {
|
||||
idMap.set(el, parseInt(el.dataset.imhotepBackendId, 10))
|
||||
}
|
||||
|
||||
const results = {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
}
|
||||
}
|
||||
|
||||
function isScrollContainer(el) {
|
||||
const style = window.getComputedStyle(el)
|
||||
return style.overflowX !== 'visible' || style.overflowY !== 'visible'
|
||||
}
|
||||
|
||||
function isClippingElement(el) {
|
||||
const style = window.getComputedStyle(el)
|
||||
return style.overflowX === 'hidden' || style.overflowX === 'scroll' || style.overflowX === 'auto' ||
|
||||
style.overflowY === 'hidden' || style.overflowY === 'scroll' || style.overflowY === 'auto' ||
|
||||
style.clipPath !== 'none'
|
||||
}
|
||||
|
||||
function createsStackingContext(el) {
|
||||
const style = window.getComputedStyle(el)
|
||||
if (style.position !== 'static' && style.zIndex !== 'auto') return true
|
||||
if (parseFloat(style.opacity) < 1) return true
|
||||
if (style.transform !== 'none') return true
|
||||
if (style.filter !== 'none') return true
|
||||
if (style.clipPath !== 'none') return true
|
||||
if (style.isolation === 'isolate') return true
|
||||
if (style.willChange.includes('transform') || style.willChange.includes('opacity')) return true
|
||||
return false
|
||||
}
|
||||
|
||||
function isPositioned(el) {
|
||||
const style = window.getComputedStyle(el)
|
||||
return style.position !== 'static'
|
||||
}
|
||||
|
||||
function getContainingBlock(el) {
|
||||
let current = el.parentElement
|
||||
while (current) {
|
||||
const style = window.getComputedStyle(current)
|
||||
if (style.position !== 'static') return current
|
||||
current = current.parentElement
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getNearestPositionedAncestor(el) {
|
||||
let current = el.parentElement
|
||||
while (current) {
|
||||
if (isPositioned(current)) return current
|
||||
current = current.parentElement
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getScrollContainer(el) {
|
||||
let current = el.parentElement
|
||||
while (current) {
|
||||
if (isScrollContainer(current)) return current
|
||||
current = current.parentElement
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getClippingRoot(el) {
|
||||
let current = el.parentElement
|
||||
while (current) {
|
||||
if (isClippingElement(current)) return current
|
||||
current = current.parentElement
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getStackingContextRoot(el) {
|
||||
let current = el.parentElement
|
||||
while (current) {
|
||||
if (createsStackingContext(current)) return current
|
||||
current = current.parentElement
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function rectFor(el) {
|
||||
const r = el.getBoundingClientRect()
|
||||
return { left: r.left, top: r.top, right: r.right, bottom: r.bottom }
|
||||
}
|
||||
|
||||
function getId(el) {
|
||||
return el ? (idMap.get(el) || 0) : 0
|
||||
}
|
||||
|
||||
for (let i = 0; i < subjects.length; i++) {
|
||||
const el = subjects[i]
|
||||
const subjectId = idMap.get(el) || 0
|
||||
|
||||
if (isScrollContainer(el)) {
|
||||
const r = rectFor(el)
|
||||
results.scroll.push({
|
||||
containerId: subjectId,
|
||||
scrollLeft: el.scrollLeft,
|
||||
scrollTop: el.scrollTop,
|
||||
scrollWidth: el.scrollWidth,
|
||||
scrollHeight: el.scrollHeight,
|
||||
clientWidth: el.clientWidth,
|
||||
clientHeight: el.clientHeight,
|
||||
})
|
||||
}
|
||||
|
||||
if (isClippingElement(el)) {
|
||||
const r = rectFor(el)
|
||||
results.clipping.push({
|
||||
clipNodeId: results.clipping.length,
|
||||
subjectId,
|
||||
clipKind: 1,
|
||||
clipLeft: r.left,
|
||||
clipTop: r.top,
|
||||
clipRight: r.right,
|
||||
clipBottom: r.bottom,
|
||||
parentClipNodeId: 0,
|
||||
})
|
||||
}
|
||||
|
||||
results.topology.containingBlockOf.push(getId(getContainingBlock(el)))
|
||||
results.topology.nearestPositionedAncestorOf.push(getId(getNearestPositionedAncestor(el)))
|
||||
results.topology.scrollContainerOf.push(getId(getScrollContainer(el)))
|
||||
results.topology.stackingContextOf.push(getId(getStackingContextRoot(el)))
|
||||
results.topology.paintOrderBucket.push(createsStackingContext(el) ? 2 : 1)
|
||||
results.topology.paintOrderIndex.push(i)
|
||||
results.topology.formattingContextOf.push(getId(el.parentElement))
|
||||
results.topology.clippingRootOf.push(getId(getClippingRoot(el)))
|
||||
}
|
||||
|
||||
return results
|
||||
})()
|
||||
`
|
||||
|
||||
let result: TopologyExtractionResult
|
||||
try {
|
||||
const response = await session.send<{
|
||||
result: { value: TopologyExtractionResult }
|
||||
}>('Runtime.evaluate', {
|
||||
expression: script,
|
||||
returnByValue: true,
|
||||
})
|
||||
result = response.result.value
|
||||
} catch (err) {
|
||||
const reason = err instanceof Error ? err.message : String(err)
|
||||
for (const backendNodeId of backendNodeIds) {
|
||||
errors.push({ backendNodeId, reason })
|
||||
}
|
||||
result = {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Remove temporary attributes
|
||||
for (let i = 0; i < backendNodeIds.length; i++) {
|
||||
const objectId = objectIds[i]
|
||||
if (!objectId) continue
|
||||
try {
|
||||
await session.send<unknown>('Runtime.callFunctionOn', {
|
||||
objectId,
|
||||
functionDeclaration: `function() { this.removeAttribute('data-imhotep-backend-id') }`,
|
||||
})
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
|
||||
return { result, errors }
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"paths": {},
|
||||
"composite": false,
|
||||
"noEmitOnError": false
|
||||
},
|
||||
"include": [
|
||||
"src/**/*"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user