v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

This commit is contained in:
John Dvorak
2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
{
"name": "imhotep-cdp",
"version": "1.0.0",
"type": "module",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/anomalyco/imhotep.git"
},
"engines": {
"node": ">=18.0.0"
},
"main": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"scripts": {
"build": "tsc",
"test": "node --test dist/**/*.test.js"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
},
"dependencies": {
"imhotep-core": "^1.0.0"
}
}
@@ -0,0 +1,392 @@
/**
* Canonical Adapter
*
* Converts raw CDP extraction output into the canonical GeometryWorld
* shape defined by imhotep-core. This adapter isolates CDP-specific
* structures from the canonical representation.
*/
import type { GeometryWorldSnapshot } from './extractor.js'
import type { DOMExtractionResult } from './dom.js'
// We define local canonical interfaces to avoid cross-package import
// failures when imhotep-core dist is stale. These mirror the core
// canonical contracts exactly.
export interface CanonicalWorldEnvironment {
viewportWidth: number
viewportHeight: number
deviceScaleFactor: number
colorScheme: string
pointer: string
hover: boolean
reducedMotion: string
locale: string
writingMode: string
}
export interface CanonicalWorldSource {
url: string
browserName: string
browserVersion: string
engine: 'chromium-cdp'
extractedAt: number
}
export interface CanonicalStringTable {
values: string[]
}
export interface CanonicalSubjectsTable {
ids: number[]
domNodeId: number[]
subjectKind: number[]
primaryBoxId: number[]
firstFragmentId: number[]
fragmentCount: number[]
firstTextRunId: number[]
textRunCount: number[]
}
export interface CanonicalDomTable {
nodeId: number[]
backendNodeId: number[]
parentNodeId: number[]
firstChildIndex: number[]
childCount: number[]
shadowRootKind: number[]
tagNameStringId: number[]
roleStringId: number[]
ariaNameStringId: number[]
}
export interface CanonicalFramesTable {
frameId: number[]
frameKind: number[]
ownerSubjectId: number[]
parentFrameId: number[]
originX: number[]
originY: number[]
axisMatrixStart: number[]
clipRectId: number[]
scrollContainerId: number[]
writingMode: number[]
}
export interface CanonicalMatricesTable {
values: number[]
}
export interface CanonicalRectsTable {
rectId: number[]
left: number[]
top: number[]
right: number[]
bottom: number[]
}
export interface CanonicalBoxesTable {
boxId: number[]
subjectId: number[]
frameId: number[]
borderLeft: number[]
borderTop: number[]
borderRight: number[]
borderBottom: number[]
paddingLeft: number[]
paddingTop: number[]
paddingRight: number[]
paddingBottom: number[]
contentLeft: number[]
contentTop: number[]
contentRight: number[]
contentBottom: number[]
}
export interface CanonicalFragmentsTable {
fragmentId: number[]
subjectId: number[]
fragmentKind: number[]
boxLeft: number[]
boxTop: number[]
boxRight: number[]
boxBottom: number[]
lineIndex: number[]
flowIndex: number[]
parentFragmentId: number[]
}
export interface CanonicalTransformsTable {
transformId: number[]
subjectId: number[]
matrixStart: number[]
matrixLength: number[]
originX: number[]
originY: number[]
}
export interface CanonicalStylesTable {
subjectId: number[]
display: number[]
position: number[]
zIndexKind: number[]
zIndexValue: number[]
overflowX: number[]
overflowY: number[]
opacity: number[]
visibility: number[]
containFlags: number[]
pointerEvents: number[]
lineHeight: number[]
fontFamilyStringId: number[]
fontSize: number[]
fontWeight: number[]
}
export interface CanonicalTextTable {
runId: number[]
subjectId: number[]
contentStringId: number[]
lineBoxId: number[]
inkLeft: number[]
inkTop: number[]
inkRight: number[]
inkBottom: number[]
baselineY: number[]
capHeight: number[]
computedLineHeight: number[]
}
export interface CanonicalTopologyTable {
containingBlockOf: number[]
nearestPositionedAncestorOf: number[]
scrollContainerOf: number[]
stackingContextOf: number[]
formattingContextOf: number[]
clippingRootOf: number[]
paintOrderBucket: number[]
paintOrderIndex: number[]
}
export interface CanonicalScrollTable {
containerId: number[]
scrollLeft: number[]
scrollTop: number[]
scrollWidth: number[]
scrollHeight: number[]
clientWidth: number[]
clientHeight: number[]
}
export interface CanonicalClippingTable {
clipNodeId: number[]
subjectId: number[]
clipKind: number[]
clipLeft: number[]
clipTop: number[]
clipRight: number[]
clipBottom: number[]
parentClipNodeId: number[]
}
export interface CanonicalPaintTable {
paintNodeId: number[]
subjectId: number[]
stackingContextId: number[]
bucket: number[]
localPaintIndex: number[]
}
export interface CanonicalVisibilityTable {
subjectId: number[]
isRendered: number[]
isVisible: number[]
visibleArea: number[]
clippedArea: number[]
}
export interface CanonicalProvenanceTable {
factId: number[]
extractionStepId: number[]
sourceKind: number[]
sourceRef: number[]
}
export interface CanonicalConfidenceTable {
factId: number[]
confidence: number[]
reasonCode: number[]
}
export interface CanonicalGeometryWorld {
sceneId: string
snapshotId: string
env: CanonicalWorldEnvironment
source: CanonicalWorldSource
strings: CanonicalStringTable
subjects: CanonicalSubjectsTable
dom: CanonicalDomTable
frames: CanonicalFramesTable
matrices: CanonicalMatricesTable
rects: CanonicalRectsTable
boxes: CanonicalBoxesTable
visualBoxes: CanonicalBoxesTable
fragments: CanonicalFragmentsTable
transforms: CanonicalTransformsTable
styles: CanonicalStylesTable
text: CanonicalTextTable
topology: CanonicalTopologyTable
scroll: CanonicalScrollTable
clipping: CanonicalClippingTable
paint: CanonicalPaintTable
visibility: CanonicalVisibilityTable
provenance: CanonicalProvenanceTable
confidence: CanonicalConfidenceTable
}
function adaptDom(dom: DOMExtractionResult): CanonicalDomTable {
const nodeId: number[] = []
const backendNodeId: number[] = []
const parentNodeId: number[] = []
const firstChildIndex: number[] = []
const childCount: number[] = []
const shadowRootKind: number[] = []
const tagNameStringId: number[] = []
const roleStringId: number[] = []
const ariaNameStringId: number[] = []
for (const node of dom.nodes) {
nodeId.push(node.nodeId)
backendNodeId.push(node.backendNodeId)
parentNodeId.push(node.parentNodeId)
firstChildIndex.push(node.firstChildIndex)
childCount.push(node.childCount)
shadowRootKind.push(node.shadowRootKind === 'open' ? 1 : node.shadowRootKind === 'closed' ? 2 : 0)
tagNameStringId.push(node.tagNameStringId)
roleStringId.push(node.roleStringId)
ariaNameStringId.push(node.ariaNameStringId)
}
return {
nodeId,
backendNodeId,
parentNodeId,
firstChildIndex,
childCount,
shadowRootKind,
tagNameStringId,
roleStringId,
ariaNameStringId,
}
}
function adaptStringTable(strings: string[]): CanonicalStringTable {
return { values: strings }
}
function adaptTimestamp(iso: string): number {
return new Date(iso).getTime()
}
function emptyTextTable(): CanonicalTextTable {
return {
runId: [],
subjectId: [],
contentStringId: [],
lineBoxId: [],
inkLeft: [],
inkTop: [],
inkRight: [],
inkBottom: [],
baselineY: [],
capHeight: [],
computedLineHeight: [],
}
}
function emptyPaintTable(): CanonicalPaintTable {
return {
paintNodeId: [],
subjectId: [],
stackingContextId: [],
bucket: [],
localPaintIndex: [],
}
}
function emptyVisibilityTable(subjectCount: number): CanonicalVisibilityTable {
return {
subjectId: Array.from({ length: subjectCount }, (_, i) => i),
isRendered: Array(subjectCount).fill(1),
isVisible: Array(subjectCount).fill(1),
visibleArea: Array(subjectCount).fill(0),
clippedArea: Array(subjectCount).fill(0),
}
}
/**
* Convert a CDP GeometryWorldSnapshot into the canonical GeometryWorld shape.
*
* This is a lossless structural mapping: CDP-specific structures (like the
* DOM tree object) are flattened into columnar arrays, and ISO timestamps
* are converted to epoch millis.
*/
export function adaptSnapshotToCanonical(snapshot: GeometryWorldSnapshot): CanonicalGeometryWorld {
return {
sceneId: snapshot.sceneId,
snapshotId: snapshot.snapshotId,
env: snapshot.env,
source: {
url: snapshot.source.url,
browserName: snapshot.source.browserName,
browserVersion: snapshot.source.browserVersion,
engine: 'chromium-cdp',
extractedAt: adaptTimestamp(snapshot.source.extractedAt),
},
strings: adaptStringTable(snapshot.strings),
subjects: snapshot.subjects,
dom: adaptDom(snapshot.dom),
frames: snapshot.frames,
matrices: snapshot.matrices,
rects: snapshot.rects,
boxes: snapshot.boxes,
visualBoxes: snapshot.visualBoxes,
fragments: snapshot.fragments,
transforms: snapshot.transforms,
styles: snapshot.styles,
text: emptyTextTable(),
topology: snapshot.topology,
scroll: {
containerId: snapshot.scroll.map((s) => s.containerId),
scrollLeft: snapshot.scroll.map((s) => s.scrollLeft),
scrollTop: snapshot.scroll.map((s) => s.scrollTop),
scrollWidth: snapshot.scroll.map((s) => s.scrollWidth),
scrollHeight: snapshot.scroll.map((s) => s.scrollHeight),
clientWidth: snapshot.scroll.map((s) => s.clientWidth),
clientHeight: snapshot.scroll.map((s) => s.clientHeight),
},
clipping: {
clipNodeId: snapshot.clipping.map((c) => c.clipNodeId),
subjectId: snapshot.clipping.map((c) => c.subjectId),
clipKind: snapshot.clipping.map((c) => c.clipKind),
clipLeft: snapshot.clipping.map((c) => c.clipLeft),
clipTop: snapshot.clipping.map((c) => c.clipTop),
clipRight: snapshot.clipping.map((c) => c.clipRight),
clipBottom: snapshot.clipping.map((c) => c.clipBottom),
parentClipNodeId: snapshot.clipping.map((c) => c.parentClipNodeId),
},
paint: emptyPaintTable(),
visibility: emptyVisibilityTable(snapshot.subjects.ids.length),
provenance: {
factId: snapshot.provenance.map((p) => p.factId),
extractionStepId: snapshot.provenance.map((p) => p.extractionStepId),
sourceKind: snapshot.provenance.map((p) => p.sourceKind),
sourceRef: snapshot.provenance.map((p) => p.sourceRef),
},
confidence: {
factId: snapshot.confidence.map((c) => c.factId),
confidence: snapshot.confidence.map((c) => c.confidence),
reasonCode: snapshot.confidence.map((c) => c.reasonCode),
},
}
}
+220
View File
@@ -0,0 +1,220 @@
/**
* DOM Extraction
*
* Captures the DOM tree, node identifiers, selectors, and shadow DOM
* boundaries using CDP DOM domain commands.
*
* Bulk-first: we request the full document tree in one call, then
* traverse the returned tree locally rather than making per-node
* round-trips.
*/
import type { CDPSession } from './session.js'
/**
* Raw DOM node as returned by CDP DOM.getDocument with depth -1.
*/
export interface CDPDOMNode {
nodeId: number
backendNodeId: number
nodeType: number
nodeName: string
localName?: string
nodeValue?: string
parentId?: number
children?: CDPDOMNode[]
shadowRoots?: CDPDOMNode[]
pseudoElements?: CDPDOMNode[]
attributes?: string[]
}
/**
* Flattened DOM record for a single node.
*/
export interface DOMNodeRecord {
nodeId: number
backendNodeId: number
parentNodeId: number
firstChildIndex: number
childCount: number
shadowRootKind: 'open' | 'closed' | 'none'
tagNameStringId: number
roleStringId: number
ariaNameStringId: number
}
/**
* Result of DOM extraction.
*/
export interface DOMExtractionResult {
/** Flat array of DOM nodes in document order */
nodes: DOMNodeRecord[]
/** String table for deduplication */
strings: string[]
/** Map from nodeId to index in nodes array */
nodeIdToIndex: Map<number, number>
/** Root node index */
rootIndex: number
}
/**
* Extract the full DOM tree from the current page via CDP.
*
* Uses DOM.getDocument with depth -1 to fetch the entire tree
* in a single protocol round-trip.
*/
export async function extractDOM(session: CDPSession): Promise<DOMExtractionResult> {
const response = await session.send<{ root: CDPDOMNode }>('DOM.getDocument', {
depth: -1,
pierce: true,
})
const strings: string[] = []
const stringIndex = new Map<string, number>()
function getStringId(value: string): number {
if (stringIndex.has(value)) return stringIndex.get(value)!
const id = strings.length
strings.push(value)
stringIndex.set(value, id)
return id
}
const nodes: DOMNodeRecord[] = []
const nodeIdToIndex = new Map<number, number>()
function walk(node: CDPDOMNode, parentNodeId: number): number {
const nodeIndex = nodes.length
nodeIdToIndex.set(node.nodeId, nodeIndex)
const children = node.children ?? []
const shadowRoots = node.shadowRoots ?? []
const pseudoElements = node.pseudoElements ?? []
const allChildren = [...children, ...shadowRoots, ...pseudoElements]
const firstChildIndex = nodes.length + 1
const childCount = allChildren.length
// Determine shadow root kind
let shadowRootKind: 'open' | 'closed' | 'none' = 'none'
if (shadowRoots.length > 0) {
// CDP does not explicitly return open/closed in getDocument;
// we default to 'open' and refine later if needed.
shadowRootKind = 'open'
}
// Extract tag name from localName or nodeName
const tagName = node.localName || node.nodeName.toLowerCase()
const tagNameStringId = getStringId(tagName)
// Extract role from attributes if present
let roleStringId = getStringId('')
let ariaNameStringId = getStringId('')
if (node.attributes) {
for (let i = 0; i < node.attributes.length; i += 2) {
const attrName = node.attributes[i]
const attrValue = node.attributes[i + 1]
if (attrName === 'role') {
roleStringId = getStringId(attrValue)
}
if (attrName === 'aria-label' || attrName === 'aria-labelledby') {
ariaNameStringId = getStringId(attrValue)
}
}
}
nodes.push({
nodeId: node.nodeId,
backendNodeId: node.backendNodeId,
parentNodeId,
firstChildIndex,
childCount,
shadowRootKind,
tagNameStringId,
roleStringId,
ariaNameStringId,
})
// Walk children after pushing parent so indices are stable.
for (const child of allChildren) {
walk(child, node.nodeId)
}
return nodeIndex
}
const rootIndex = walk(response.root, 0)
return {
nodes,
strings,
nodeIdToIndex,
rootIndex,
}
}
/**
* Resolved selector match with both frontend nodeId and stable backendNodeId.
*/
export interface SelectorMatch {
nodeId: number
backendNodeId: number
}
/**
* Resolve a CSS selector to node IDs via CDP.
*
* Returns an array of matches with both frontend nodeId and stable
* backendNodeId. We keep both because backendNodeId is the canonical
* identity for geometry/topology, while frontend nodeId is required
* by CSS.getComputedStyleForNode.
*/
export async function resolveSelector(
session: CDPSession | { send: CDPSession['send']; getDocumentRootNodeId?: () => Promise<number> },
selector: string
): Promise<SelectorMatch[]> {
// Use cached document root if available, otherwise fetch it.
let rootNodeId: number
if ('getDocumentRootNodeId' in session && session.getDocumentRootNodeId) {
rootNodeId = await session.getDocumentRootNodeId()
} else {
const doc = await session.send<{ root: { nodeId: number } }>('DOM.getDocument')
rootNodeId = doc.root.nodeId
}
const { nodeIds } = await session.send<{ nodeIds: number[] }>(
'DOM.querySelectorAll',
{
nodeId: rootNodeId,
selector,
}
)
// Convert each frontend nodeId to its stable backendNodeId in parallel.
const matches: SelectorMatch[] = []
const describeResults = await Promise.all(
nodeIds.map(async (nodeId) => {
try {
const result = await session.send<{ node: { backendNodeId: number } }>('DOM.describeNode', { nodeId })
return { nodeId, backendNodeId: result.node.backendNodeId }
} catch {
return null
}
})
)
for (const match of describeResults) {
if (match !== null) {
matches.push(match)
}
}
return matches
}
/**
* Resolve a single selector to one backend node ID, or null if none match.
*/
export async function resolveOneSelector(
session: CDPSession,
selector: string
): Promise<number | null> {
const matches = await resolveSelector(session, selector)
return matches.length > 0 ? matches[0].backendNodeId : null
}
+738
View File
@@ -0,0 +1,738 @@
/**
* Extractor Integration Tests
*
* Tests CDP session setup, DOM extraction, and geometry extraction
* using a mock CDP session to avoid requiring a real browser.
*
* Run with: node --test dist/extractor.test.js
*/
import { describe, it } from 'node:test'
import assert from 'node:assert'
import {
CDPSessionManager,
CDPExtractor,
extractDOM,
extractBoxModels,
extractFragments,
extractTransforms,
extractStyles,
extractTopology,
type CDPSession,
type ExtractorRequest,
} from './index.js'
// ---------------------------------------------------------------------------
// Mock CDP Session Factory
// ---------------------------------------------------------------------------
function createMockSession(
responses: Map<string, unknown>,
fallback?: (method: string, params?: Record<string, unknown>) => unknown
): CDPSession {
return {
async send<T>(method: string, params?: Record<string, unknown>): Promise<T> {
const key = `${method}:${JSON.stringify(params ?? {})}`
let value: unknown = undefined
if (responses.has(method)) {
value = responses.get(method)
} else if (responses.has(key)) {
value = responses.get(key)
}
if (value instanceof Error) {
throw value
}
if (value !== undefined) {
return Promise.resolve(value as T)
}
if (fallback) {
const fallbackValue = fallback(method, params)
if (fallbackValue !== undefined) {
return Promise.resolve(fallbackValue as T)
}
}
throw new Error(`Unexpected CDP call: ${method} with ${JSON.stringify(params)}`)
},
on() {},
off() {},
async detach() {},
}
}
function createMockSessionFactory(
responses: Map<string, unknown>,
fallback?: (method: string, params?: Record<string, unknown>) => unknown
): (pageRef: unknown) => Promise<CDPSession> {
return async () => createMockSession(responses, fallback)
}
// ---------------------------------------------------------------------------
// CDP Session Setup
// ---------------------------------------------------------------------------
describe('CDP Session Management', () => {
it('should attach a session via the factory', async () => {
const factory = createMockSessionFactory(new Map())
const manager = new CDPSessionManager(factory, {})
const session = await manager.attach()
assert.ok(session, 'session should be defined')
})
it('should return the same session on re-attach', async () => {
const factory = createMockSessionFactory(new Map())
const manager = new CDPSessionManager(factory, {})
const s1 = await manager.attach()
const s2 = await manager.attach()
assert.strictEqual(s1, s2, 'should reuse attached session')
})
it('should throw if getSession called before attach', () => {
const factory = createMockSessionFactory(new Map())
const manager = new CDPSessionManager(factory, {})
assert.throws(() => manager.getSession(), /not attached/)
})
it('should detach cleanly', async () => {
const factory = createMockSessionFactory(new Map())
const manager = new CDPSessionManager(factory, {})
await manager.attach()
await manager.detach()
assert.throws(() => manager.getSession(), /not attached/)
})
})
// ---------------------------------------------------------------------------
// DOM Extraction
// ---------------------------------------------------------------------------
describe('DOM Extraction', () => {
it('should extract a flat DOM tree from CDP getDocument', async () => {
const responses = new Map<string, unknown>()
responses.set('DOM.getDocument', {
root: {
nodeId: 1,
backendNodeId: 101,
nodeType: 9,
nodeName: '#document',
children: [
{
nodeId: 2,
backendNodeId: 102,
nodeType: 1,
nodeName: 'HTML',
localName: 'html',
children: [
{
nodeId: 3,
backendNodeId: 103,
nodeType: 1,
nodeName: 'BODY',
localName: 'body',
children: [
{
nodeId: 4,
backendNodeId: 104,
nodeType: 1,
nodeName: 'DIV',
localName: 'div',
attributes: ['class', 'container', 'role', 'main'],
},
],
},
],
},
],
},
})
const session = createMockSession(responses)
const result = await extractDOM(session)
assert.strictEqual(result.nodes.length, 4, 'should have 4 nodes')
assert.strictEqual(result.rootIndex, 0, 'root should be index 0')
assert.ok(result.nodeIdToIndex.has(4), 'should map nodeId 4')
// Check string deduplication
assert.ok(result.strings.includes('div'), 'should include div tag')
assert.ok(result.strings.includes('main'), 'should include role main')
// Check node 4 record
const node4Idx = result.nodeIdToIndex.get(4)!
const node4 = result.nodes[node4Idx]
assert.strictEqual(node4.nodeId, 4)
assert.strictEqual(node4.parentNodeId, 3)
assert.strictEqual(node4.shadowRootKind, 'none')
})
it('should handle shadow roots', async () => {
const responses = new Map<string, unknown>()
responses.set('DOM.getDocument', {
root: {
nodeId: 1,
backendNodeId: 101,
nodeType: 9,
nodeName: '#document',
children: [
{
nodeId: 2,
backendNodeId: 102,
nodeType: 1,
nodeName: 'DIV',
localName: 'div',
shadowRoots: [
{
nodeId: 3,
backendNodeId: 103,
nodeType: 11,
nodeName: '#document-fragment',
children: [
{
nodeId: 4,
backendNodeId: 104,
nodeType: 1,
nodeName: 'SPAN',
localName: 'span',
},
],
},
],
},
],
},
})
const session = createMockSession(responses)
const result = await extractDOM(session)
assert.strictEqual(result.nodes.length, 4, 'should have 4 nodes including shadow root and its child')
// shadowRootKind is recorded on the host element (node 2), not the fragment
const hostIdx = result.nodeIdToIndex.get(2)!
assert.strictEqual(result.nodes[hostIdx].shadowRootKind, 'open')
})
})
// ---------------------------------------------------------------------------
// Geometry Extraction
// ---------------------------------------------------------------------------
describe('Geometry Extraction', () => {
it('should extract box models in bulk', async () => {
const responses = new Map<string, unknown>()
// Box model responses keyed by backendNodeId.
// DOM.getBoxModel now accepts backendNodeId directly and returns { model: CDPBoxModel }.
responses.set(
'DOM.getBoxModel:{"backendNodeId":10}',
{
model: {
content: [100, 100, 200, 100, 200, 200, 100, 200],
padding: [90, 90, 210, 90, 210, 210, 90, 210],
border: [80, 80, 220, 80, 220, 220, 80, 220],
margin: [70, 70, 230, 70, 230, 230, 70, 230],
width: 100,
height: 100,
},
}
)
responses.set(
'DOM.getBoxModel:{"backendNodeId":20}',
{
model: {
content: [300, 300, 400, 300, 400, 400, 300, 400],
padding: [290, 290, 410, 290, 410, 410, 290, 410],
border: [280, 280, 420, 280, 420, 420, 280, 420],
margin: [270, 270, 430, 270, 430, 430, 270, 430],
width: 100,
height: 100,
},
}
)
const session = createMockSession(responses)
const backendNodeIds = [10, 20]
const subjectIds = [0, 1]
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
assert.strictEqual(boxes.length, 2, 'should extract 2 boxes')
assert.strictEqual(errors.length, 0, 'should have no errors')
const box0 = boxes[0]
assert.strictEqual(box0.borderLeft, 80)
assert.strictEqual(box0.borderTop, 80)
assert.strictEqual(box0.borderRight, 220)
assert.strictEqual(box0.borderBottom, 220)
assert.strictEqual(box0.contentLeft, 100)
assert.strictEqual(box0.contentTop, 100)
assert.strictEqual(box0.contentRight, 200)
assert.strictEqual(box0.contentBottom, 200)
})
it('should handle box model extraction failures gracefully', async () => {
const responses = new Map<string, unknown>()
// DOM.getBoxModel now uses backendNodeId parameter.
responses.set('DOM.getBoxModel:{"backendNodeId":10}', new Error('Node not found'))
const session = createMockSession(responses)
const backendNodeIds = [10]
const subjectIds = [0]
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
assert.strictEqual(boxes.length, 0)
assert.strictEqual(errors.length, 1)
assert.ok(errors[0].reason.includes('Node not found'))
})
})
// ---------------------------------------------------------------------------
// Style Extraction
// ---------------------------------------------------------------------------
describe('Style Extraction', () => {
it('should extract computed styles and map to enums', async () => {
const responses = new Map<string, unknown>()
// extractStyles now resolves backendNodeId -> objectId via DOM.resolveNode
// then uses Runtime.callFunctionOn to read computed styles via JS.
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
responses.set('Runtime.callFunctionOn', {
result: {
value: {
display: 'flex',
position: 'relative',
zIndex: '10',
overflowX: 'hidden',
overflowY: 'auto',
opacity: '0.5',
visibility: 'visible',
contain: 'layout paint',
pointerEvents: 'none',
lineHeight: '24px',
fontFamily: 'Arial',
fontSize: '16px',
fontWeight: '700',
},
},
})
const session = createMockSession(responses)
const backendNodeIds = [110]
const subjectIds = [0]
const { styles, errors } = await extractStyles(session, backendNodeIds, subjectIds, { all: true }, [])
assert.strictEqual(styles.length, 1)
assert.strictEqual(errors.length, 0)
const s = styles[0]
assert.strictEqual(s.display, 5) // flex
assert.strictEqual(s.position, 2) // relative
assert.strictEqual(s.zIndexKind, 1) // integer
assert.strictEqual(s.zIndexValue, 10)
assert.strictEqual(s.overflowX, 2) // hidden
assert.strictEqual(s.overflowY, 5) // auto
assert.strictEqual(s.opacity, 0.5)
assert.strictEqual(s.containFlags, 3) // layout(1) | paint(2)
assert.strictEqual(s.pointerEvents, 2) // none
assert.strictEqual(s.fontSize, 16)
assert.strictEqual(s.fontWeight, 700)
})
})
// ---------------------------------------------------------------------------
// Topology Extraction
// ---------------------------------------------------------------------------
describe('Topology Extraction', () => {
it('should extract topology via Runtime.evaluate', async () => {
const mockTopologyResult = {
scroll: [
{
containerId: 1,
scrollLeft: 0,
scrollTop: 100,
scrollWidth: 500,
scrollHeight: 1000,
clientWidth: 400,
clientHeight: 300,
},
],
clipping: [
{
clipNodeId: 0,
subjectId: 1,
clipKind: 1,
clipLeft: 0,
clipTop: 0,
clipRight: 400,
clipBottom: 300,
parentClipNodeId: 0,
},
],
topology: {
containingBlockOf: [0],
nearestPositionedAncestorOf: [0],
scrollContainerOf: [0],
stackingContextOf: [0],
formattingContextOf: [0],
clippingRootOf: [0],
paintOrderBucket: [1],
paintOrderIndex: [0],
},
}
const responses = new Map<string, unknown>()
responses.set('Runtime.evaluate', {
result: { value: mockTopologyResult },
})
const session = createMockSession(responses)
const { result, errors } = await extractTopology(session, [10], [0])
assert.strictEqual(errors.length, 0)
assert.strictEqual(result.scroll.length, 1)
assert.strictEqual(result.scroll[0].scrollTop, 100)
assert.strictEqual(result.clipping.length, 1)
assert.strictEqual(result.topology.paintOrderBucket[0], 1)
})
})
// ---------------------------------------------------------------------------
// Full Orchestrator
// ---------------------------------------------------------------------------
describe('CDPExtractor', () => {
it('should perform full extraction with partial handling', async () => {
const responses = new Map<string, unknown>()
// Domain enablement is now part of the attach flow.
responses.set('DOM.enable', {})
responses.set('CSS.enable', {})
responses.set('Runtime.enable', {})
// DOM.getDocument
responses.set('DOM.getDocument', {
root: {
nodeId: 1,
backendNodeId: 101,
nodeType: 9,
nodeName: '#document',
children: [
{
nodeId: 2,
backendNodeId: 102,
nodeType: 1,
nodeName: 'BODY',
localName: 'body',
children: [
{
nodeId: 10,
backendNodeId: 110,
nodeType: 1,
nodeName: 'DIV',
localName: 'div',
attributes: ['class', 'test'],
},
],
},
],
},
})
// Selector resolution
responses.set('DOM.querySelectorAll', { nodeIds: [10] })
responses.set('DOM.describeNode', { node: { backendNodeId: 110 } })
// Box model (uses backendNodeId, returns { model: {...} })
responses.set(
'DOM.getBoxModel:{"backendNodeId":110}',
{
model: {
content: [10, 10, 110, 10, 110, 110, 10, 110],
padding: [0, 0, 120, 0, 120, 120, 0, 120],
border: [0, 0, 120, 0, 120, 120, 0, 120],
margin: [0, 0, 120, 0, 120, 120, 0, 120],
width: 100,
height: 100,
},
}
)
// Styles (uses DOM.resolveNode + Runtime.callFunctionOn)
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
responses.set('Runtime.callFunctionOn', {
result: {
value: {
display: 'block',
position: 'static',
zIndex: 'auto',
overflowX: 'visible',
overflowY: 'visible',
opacity: '1',
visibility: 'visible',
contain: 'none',
pointerEvents: 'auto',
lineHeight: 'normal',
fontFamily: 'sans-serif',
fontSize: '16px',
fontWeight: '400',
},
},
})
// Topology via Runtime.evaluate
responses.set('Runtime.evaluate', {
result: {
value: {
scroll: [],
clipping: [],
topology: {
containingBlockOf: [0],
nearestPositionedAncestorOf: [0],
scrollContainerOf: [0],
stackingContextOf: [0],
formattingContextOf: [0],
clippingRootOf: [0],
paintOrderBucket: [1],
paintOrderIndex: [0],
},
},
},
})
const factory = createMockSessionFactory(responses)
const manager = new CDPSessionManager(factory, {})
const extractor = new CDPExtractor(manager)
const request: ExtractorRequest = {
requestId: 'test-1',
sceneTarget: { pageRef: {}, url: 'https://example.test' },
env: {
viewportWidth: 1280,
viewportHeight: 720,
colorScheme: 'light',
pointer: 'fine',
},
subjects: [{ id: 's1', selector: '.test' }],
requiredFacts: {
geometry: true,
fragments: false,
styles: true,
topology: true,
},
}
const response = await extractor.extract(request)
assert.strictEqual(response.requestId, 'test-1')
assert.strictEqual(response.status, 'ok')
assert.strictEqual(response.diagnostics.length, 0)
assert.ok(response.snapshots, 'snapshots array should exist')
assert.strictEqual(response.snapshots.length, 1)
assert.ok(response.snapshot, 'deprecated snapshot field still present')
assert.strictEqual(response.snapshots[0].subjects.ids.length, 1)
assert.strictEqual(response.snapshots[0].boxes.boxId.length, 1)
assert.strictEqual(response.snapshots[0].styles.display[0], 2) // block
assert.strictEqual(response.snapshots[0].provenance.length, 5) // 5 extraction steps
})
it('should return partial status when selectors do not match', async () => {
const responses = new Map<string, unknown>()
// Domain enablement is now part of the attach flow.
responses.set('DOM.enable', {})
responses.set('CSS.enable', {})
responses.set('Runtime.enable', {})
responses.set('DOM.getDocument', {
root: {
nodeId: 1,
backendNodeId: 101,
nodeType: 9,
nodeName: '#document',
children: [],
},
})
responses.set('DOM.querySelectorAll', { nodeIds: [] })
const factory = createMockSessionFactory(responses)
const manager = new CDPSessionManager(factory, {})
const extractor = new CDPExtractor(manager)
const request: ExtractorRequest = {
requestId: 'test-2',
sceneTarget: { pageRef: {}, url: 'https://example.test' },
env: {
viewportWidth: 1280,
viewportHeight: 720,
colorScheme: 'light',
pointer: 'fine',
},
subjects: [{ id: 's1', selector: '.missing' }],
requiredFacts: { geometry: true },
}
const response = await extractor.extract(request)
assert.strictEqual(response.status, 'partial')
assert.ok(
response.diagnostics.some((d) => d.code === 'IMH_SELECTOR_NO_MATCH'),
'should emit selector no match diagnostic'
)
})
it('should resolve multiple matches per selector and preserve identity', async () => {
const responses = new Map<string, unknown>()
// Domain enablement is now part of the attach flow.
responses.set('DOM.enable', {})
responses.set('CSS.enable', {})
responses.set('Runtime.enable', {})
responses.set('DOM.getDocument', {
root: {
nodeId: 1,
backendNodeId: 101,
nodeType: 9,
nodeName: '#document',
children: [
{
nodeId: 2,
backendNodeId: 102,
nodeType: 1,
nodeName: 'DIV',
localName: 'div',
children: [
{
nodeId: 10,
backendNodeId: 110,
nodeType: 1,
nodeName: 'BUTTON',
localName: 'button',
attributes: ['class', 'btn'],
},
{
nodeId: 11,
backendNodeId: 111,
nodeType: 1,
nodeName: 'BUTTON',
localName: 'button',
attributes: ['class', 'btn'],
},
],
},
],
},
})
// querySelectorAll returns frontend nodeIds for the selector.
responses.set('DOM.querySelectorAll', { nodeIds: [10, 11] })
// DOM.describeNode converts each frontend nodeId to its stable backendNodeId.
// We mock the keyed response for each nodeId.
responses.set('DOM.describeNode:{"nodeId":10}', { node: { backendNodeId: 110 } })
responses.set('DOM.describeNode:{"nodeId":11}', { node: { backendNodeId: 111 } })
// Box models for each backendNodeId.
responses.set(
'DOM.getBoxModel:{"backendNodeId":110}',
{
model: {
content: [0, 0, 100, 0, 100, 40, 0, 40],
padding: [0, 0, 100, 0, 100, 40, 0, 40],
border: [0, 0, 100, 0, 100, 40, 0, 40],
margin: [0, 0, 100, 0, 100, 40, 0, 40],
width: 100,
height: 40,
},
}
)
responses.set(
'DOM.getBoxModel:{"backendNodeId":111}',
{
model: {
content: [110, 0, 210, 0, 210, 40, 110, 40],
padding: [110, 0, 210, 0, 210, 40, 110, 40],
border: [110, 0, 210, 0, 210, 40, 110, 40],
margin: [110, 0, 210, 0, 210, 40, 110, 40],
width: 100,
height: 40,
},
}
)
// Transforms use DOM.resolveNode + Runtime.callFunctionOn
responses.set('DOM.resolveNode', { object: { objectId: 'obj1' } })
responses.set('Runtime.callFunctionOn', {
result: {
value: {
matrix: [1, 0, 0, 1, 0, 0],
originX: 0,
originY: 0,
},
},
})
const factory = createMockSessionFactory(responses)
const manager = new CDPSessionManager(factory, {})
const extractor = new CDPExtractor(manager)
const request: ExtractorRequest = {
requestId: 'test-multi',
sceneTarget: { pageRef: {}, url: 'https://example.test' },
env: {
viewportWidth: 1280,
viewportHeight: 720,
colorScheme: 'light',
pointer: 'fine',
},
subjects: [{ id: 's1', selector: '.btn' }],
requiredFacts: { geometry: true, styles: false, topology: false },
}
const response = await extractor.extract(request)
assert.strictEqual(response.status, 'ok')
assert.strictEqual(response.diagnostics.length, 0)
assert.strictEqual(response.snapshots[0].subjects.ids.length, 2)
assert.strictEqual(response.snapshots[0].boxes.boxId.length, 2)
// Verify backendNodeIds are preserved in the subjects table.
const backendIds = response.snapshots[0].subjects.domNodeId
assert.strictEqual(backendIds.length, 2)
assert.ok(backendIds.includes(110), 'should include backendNodeId 110')
assert.ok(backendIds.includes(111), 'should include backendNodeId 111')
})
it('should return error status when session attach fails', async () => {
const failingFactory = async () => {
throw new Error('Browser not reachable')
}
const manager = new CDPSessionManager(failingFactory, {})
const extractor = new CDPExtractor(manager)
const request: ExtractorRequest = {
requestId: 'test-3',
sceneTarget: { pageRef: {}, url: 'https://example.test' },
env: {
viewportWidth: 1280,
viewportHeight: 720,
colorScheme: 'light',
pointer: 'fine',
},
subjects: [],
requiredFacts: {},
}
const response = await extractor.extract(request)
assert.strictEqual(response.status, 'error')
assert.ok(
response.diagnostics.some((d) => d.code === 'IMH_CDP_SESSION_ATTACH_FAILED'),
'should emit attach failed diagnostic'
)
assert.strictEqual(response.snapshots[0].subjects.ids.length, 0)
})
})
+973
View File
@@ -0,0 +1,973 @@
/**
* Main Extraction Orchestrator
*
* Consumes an extractor request plan, coordinates CDP session
* management, DOM resolution, geometry capture, style extraction,
* and topology building.
*
* Returns raw browser facts matching the geometry world schema,
* with provenance metadata and graceful partial handling.
*/
import type { DiagnosticCode } from 'imhotep-core'
import type { CDPSession, CDPSessionManager } from './session.js'
import { extractDOM, resolveSelector, type DOMExtractionResult, type SelectorMatch } from './dom.js'
import {
extractBoxModels,
extractFragments,
extractTransforms,
extractVisualBoxes,
type GeometryExtractionResult,
type BoxRecord,
type FragmentRecord,
type TransformRecord,
} from './geometry.js'
import { extractStyles, type StyleRecord, type StyleExtractionConfig } from './styles.js'
import { extractTopology, type TopologyExtractionResult } from './topology.js'
/**
* Extractor request plan.
*/
export interface ExtractorRequest {
requestId: string
sceneTarget: {
pageRef: unknown
url: string
}
env: {
viewportWidth: number
viewportHeight: number
colorScheme: string
pointer: string
}
subjects: Array<{ id: string; selector: string; nodeId?: number; backendNodeId?: number }>
requiredFacts: {
geometry?: boolean
fragments?: boolean
styles?: boolean | string[]
topology?: boolean | string[]
text?: boolean
}
}
/**
* Provenance entry for a fact.
*/
export interface ProvenanceEntry {
factId: number
extractionStepId: number
sourceKind: number
sourceRef: number
}
/**
* Confidence entry for a fact.
*/
export interface ConfidenceEntry {
factId: number
confidence: number
reasonCode: number
}
/**
* Extraction trace entry.
*/
export interface ExtractionTraceEntry {
stepId: number
factKind: string
status: 'ok' | 'partial' | 'error'
}
/**
* Extractor response.
*
* Canonical shape uses snapshots (array). The singular snapshot field is
* kept for backward compatibility but deprecated — use snapshots[0].
*/
export interface ExtractorResponse {
requestId: string
status: 'ok' | 'partial' | 'error'
/** @deprecated Use snapshots[0] instead. Kept for backward compatibility. */
snapshot: GeometryWorldSnapshot
/** Canonical shape: array of snapshots for multi-state extraction. */
snapshots: GeometryWorldSnapshot[]
diagnostics: ExtractorDiagnostic[]
extractionTrace: {
steps: ExtractionTraceEntry[]
timings: Array<{ stepId: number; startMs: number; endMs: number }>
protocolCalls: Array<{ stepId: number; protocol: string; method: string; params: Record<string, unknown> }>
}
}
/**
* Diagnostic for extraction failures.
*/
export interface ExtractorDiagnostic {
code: DiagnosticCode
severity: 'warning' | 'error'
message: string
subjectId?: string
selector?: string
}
/**
* Geometry world snapshot.
* Mirrors the geometry world schema from the core contracts.
*/
export interface GeometryWorldSnapshot {
sceneId: string
snapshotId: string
env: {
viewportWidth: number
viewportHeight: number
deviceScaleFactor: number
colorScheme: string
pointer: string
hover: boolean
reducedMotion: string
locale: string
writingMode: string
}
source: {
url: string
browserName: string
browserVersion: string
engine: string
extractedAt: string
}
strings: string[]
subjects: {
ids: number[]
domNodeId: number[]
subjectKind: number[]
primaryBoxId: number[]
firstFragmentId: number[]
fragmentCount: number[]
firstTextRunId: number[]
textRunCount: number[]
}
dom: DOMExtractionResult
frames: {
frameId: number[]
frameKind: number[]
ownerSubjectId: number[]
parentFrameId: number[]
originX: number[]
originY: number[]
axisMatrixStart: number[]
clipRectId: number[]
scrollContainerId: number[]
writingMode: number[]
}
matrices: {
values: number[]
}
rects: {
rectId: number[]
left: number[]
top: number[]
right: number[]
bottom: number[]
}
boxes: {
boxId: number[]
subjectId: number[]
frameId: number[]
borderLeft: number[]
borderTop: number[]
borderRight: number[]
borderBottom: number[]
paddingLeft: number[]
paddingTop: number[]
paddingRight: number[]
paddingBottom: number[]
contentLeft: number[]
contentTop: number[]
contentRight: number[]
contentBottom: number[]
}
visualBoxes: {
boxId: number[]
subjectId: number[]
frameId: number[]
borderLeft: number[]
borderTop: number[]
borderRight: number[]
borderBottom: number[]
paddingLeft: number[]
paddingTop: number[]
paddingRight: number[]
paddingBottom: number[]
contentLeft: number[]
contentTop: number[]
contentRight: number[]
contentBottom: number[]
}
fragments: {
fragmentId: number[]
subjectId: number[]
fragmentKind: number[]
boxLeft: number[]
boxTop: number[]
boxRight: number[]
boxBottom: number[]
lineIndex: number[]
flowIndex: number[]
parentFragmentId: number[]
}
transforms: {
transformId: number[]
subjectId: number[]
matrixStart: number[]
matrixLength: number[]
originX: number[]
originY: number[]
}
styles: {
subjectId: number[]
display: number[]
position: number[]
zIndexKind: number[]
zIndexValue: number[]
overflowX: number[]
overflowY: number[]
opacity: number[]
visibility: number[]
containFlags: number[]
pointerEvents: number[]
lineHeight: number[]
fontFamilyStringId: number[]
fontSize: number[]
fontWeight: number[]
}
topology: TopologyExtractionResult['topology']
scroll: TopologyExtractionResult['scroll']
clipping: TopologyExtractionResult['clipping']
provenance: ProvenanceEntry[]
confidence: ConfidenceEntry[]
}
/**
* Main extractor class.
*
* Orchestrates the full extraction pipeline:
* 1. Attach CDP session
* 2. Resolve selectors to node IDs
* 3. Extract DOM tree
* 4. Extract geometry (boxes, fragments, transforms)
* 5. Extract styles
* 6. Extract topology
* 7. Build geometry world snapshot
* 8. Return response with provenance and diagnostics
*/
export class CDPExtractor {
private readonly sessionManager: CDPSessionManager
constructor(sessionManager: CDPSessionManager) {
this.sessionManager = sessionManager
}
/**
* Execute extraction according to the request plan.
*/
async extract(request: ExtractorRequest): Promise<ExtractorResponse> {
const startTime = Date.now()
const diagnostics: ExtractorDiagnostic[] = []
const traceSteps: ExtractionTraceEntry[] = []
const traceTimings: Array<{ stepId: number; startMs: number; endMs: number }> = []
const traceCalls: Array<{
stepId: number
protocol: string
method: string
params: Record<string, unknown>
}> = []
function recordStep(
stepId: number,
factKind: string,
status: 'ok' | 'partial' | 'error',
startMs: number,
endMs: number
) {
traceSteps.push({ stepId, factKind, status })
traceTimings.push({ stepId, startMs, endMs })
}
let session: CDPSession
try {
session = await this.sessionManager.attach()
// Enable required CDP domains before extraction.
// Use cached enablement to avoid redundant round-trips.
await this.sessionManager.enableDomain('DOM')
await this.sessionManager.enableDomain('CSS')
await this.sessionManager.enableDomain('Runtime')
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_CDP_SESSION_ATTACH_FAILED',
severity: 'error',
message: `Failed to attach CDP session: ${message}`,
})
const emptySnapshot = createEmptySnapshot(request)
return {
requestId: request.requestId,
status: 'error',
snapshot: emptySnapshot,
snapshots: [emptySnapshot],
diagnostics,
extractionTrace: {
steps: traceSteps,
timings: traceTimings,
protocolCalls: traceCalls,
},
}
}
// --- Step 1: Extract DOM ---
// Extract DOM first so the full tree is pushed to the frontend.
// This ensures backendNodeIds returned by querySelectorAll remain valid.
const domStepStart = Date.now()
let domResult: DOMExtractionResult
try {
domResult = await extractDOM(session)
recordStep(1, 'dom-tree', 'ok', domStepStart, Date.now())
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_DOM_EXTRACTION_FAILED',
severity: 'error',
message: `DOM extraction failed: ${message}`,
})
domResult = { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 }
recordStep(1, 'dom-tree', 'error', domStepStart, Date.now())
}
// --- Step 2: Resolve selectors ---
const selectorStepStart = Date.now()
const backendNodeIds: number[] = []
const nodeIds: number[] = []
const subjectIds: number[] = []
const selectorDiagnosticsStart = diagnostics.length
const resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }> = []
// Resolve all selectors sequentially to avoid CDP race conditions
// when multiple querySelectorAll calls run concurrently.
// Callers that already resolved selectors can pass nodeId/backendNodeId
// to avoid a duplicate querySelectorAll + describeNode pass.
const selectorResults: Array<{ subject: typeof request.subjects[0]; matches: Awaited<ReturnType<typeof resolveSelector>>; error: string | null }> = []
for (const subject of request.subjects) {
if (subject.nodeId !== undefined && subject.backendNodeId !== undefined) {
selectorResults.push({
subject,
matches: [{ nodeId: subject.nodeId, backendNodeId: subject.backendNodeId }],
error: null,
})
continue
}
try {
const matches = await resolveSelector(session, subject.selector)
selectorResults.push({ subject, matches, error: null })
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
selectorResults.push({ subject, matches: [], error: message })
}
}
for (const { subject, matches, error } of selectorResults) {
if (error) {
diagnostics.push({
code: 'IMH_SELECTOR_RESOLUTION_FAILED',
severity: 'error',
message: `Failed to resolve selector "${subject.selector}": ${error}`,
subjectId: subject.id,
selector: subject.selector,
})
} else if (matches.length === 0) {
diagnostics.push({
code: 'IMH_SELECTOR_NO_MATCH',
severity: 'warning',
message: `Selector "${subject.selector}" matched 0 elements.`,
subjectId: subject.id,
selector: subject.selector,
})
} else {
for (const match of matches) {
backendNodeIds.push(match.backendNodeId)
nodeIds.push(match.nodeId)
subjectIds.push(resolvedSubjects.length)
resolvedSubjects.push({ ...subject, backendNodeId: match.backendNodeId, nodeId: match.nodeId })
}
}
}
const selectorDiagnosticsAdded = diagnostics.length > selectorDiagnosticsStart
recordStep(2, 'selector-resolution', selectorDiagnosticsAdded ? 'partial' : 'ok', selectorStepStart, Date.now())
// --- Step 3: Extract Geometry ---
const geometryStepStart = Date.now()
let boxRecords: BoxRecord[] = []
let fragmentRecords: FragmentRecord[] = []
let transformRecords: TransformRecord[] = []
let matrixValues: number[] = []
let visualBoxRecords: BoxRecord[] = []
if (request.requiredFacts.geometry !== false && backendNodeIds.length > 0) {
try {
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
boxRecords = boxes
for (const e of errors) {
diagnostics.push({
code: 'IMH_BOX_MODEL_PARTIAL',
severity: 'warning',
message: `Box model extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_BOX_MODEL_FAILED',
severity: 'error',
message: `Box model extraction failed: ${message}`,
})
}
// Extract visual boxes (post-transform coordinates via getBoundingClientRect)
try {
const { boxes, errors } = await extractVisualBoxes(session, backendNodeIds, subjectIds)
visualBoxRecords = boxes
for (const e of errors) {
diagnostics.push({
code: 'IMH_VISUAL_BOX_PARTIAL',
severity: 'warning',
message: `Visual box extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_VISUAL_BOX_FAILED',
severity: 'error',
message: `Visual box extraction failed: ${message}`,
})
}
if (request.requiredFacts.fragments) {
try {
const { fragments, errors } = await extractFragments(session, backendNodeIds, subjectIds)
fragmentRecords = fragments
for (const e of errors) {
diagnostics.push({
code: 'IMH_FRAGMENT_PARTIAL',
severity: 'warning',
message: `Fragment extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_FRAGMENT_FAILED',
severity: 'error',
message: `Fragment extraction failed: ${message}`,
})
}
}
try {
const { transforms, matrices, errors } = await extractTransforms(session, backendNodeIds, subjectIds)
transformRecords = transforms
matrixValues = matrices
for (const e of errors) {
diagnostics.push({
code: 'IMH_TRANSFORM_PARTIAL',
severity: 'warning',
message: `Transform extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_TRANSFORM_FAILED',
severity: 'error',
message: `Transform extraction failed: ${message}`,
})
}
}
recordStep(
3,
'geometry',
diagnostics.some((d) => d.code.startsWith('IMH_BOX_MODEL') || d.code.startsWith('IMH_FRAGMENT') || d.code.startsWith('IMH_TRANSFORM'))
? 'partial'
: 'ok',
geometryStepStart,
Date.now()
)
// --- Step 4: Extract Styles ---
const styleStepStart = Date.now()
let styleRecords: StyleRecord[] = []
if (request.requiredFacts.styles !== false && backendNodeIds.length > 0) {
const styleConfig: StyleExtractionConfig =
Array.isArray(request.requiredFacts.styles)
? { all: false, properties: request.requiredFacts.styles }
: { all: true }
try {
const { styles, errors, strings: updatedStrings } = await extractStyles(
session,
backendNodeIds,
subjectIds,
styleConfig,
domResult.strings
)
styleRecords = styles
domResult.strings = updatedStrings
for (const e of errors) {
diagnostics.push({
code: 'IMH_STYLE_PARTIAL',
severity: 'warning',
message: `Style extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_STYLE_FAILED',
severity: 'error',
message: `Style extraction failed: ${message}`,
})
}
}
recordStep(4, 'styles', diagnostics.some((d) => d.code.startsWith('IMH_STYLE')) ? 'partial' : 'ok', styleStepStart, Date.now())
// --- Step 5: Extract Topology ---
const topologyStepStart = Date.now()
let topologyResult: TopologyExtractionResult = {
scroll: [],
clipping: [],
topology: {
containingBlockOf: [],
nearestPositionedAncestorOf: [],
scrollContainerOf: [],
stackingContextOf: [],
formattingContextOf: [],
clippingRootOf: [],
paintOrderBucket: [],
paintOrderIndex: [],
},
}
if (request.requiredFacts.topology !== false && backendNodeIds.length > 0) {
try {
const { result, errors } = await extractTopology(session, backendNodeIds, subjectIds)
topologyResult = result
for (const e of errors) {
diagnostics.push({
code: 'IMH_TOPOLOGY_PARTIAL',
severity: 'warning',
message: `Topology extraction failed for node ${e.backendNodeId}: ${e.reason}`,
})
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
diagnostics.push({
code: 'IMH_TOPOLOGY_FAILED',
severity: 'error',
message: `Topology extraction failed: ${message}`,
})
}
}
recordStep(5, 'topology', diagnostics.some((d) => d.code.startsWith('IMH_TOPOLOGY')) ? 'partial' : 'ok', topologyStepStart, Date.now())
// --- Build Geometry World Snapshot ---
const snapshot = buildSnapshot(
request,
domResult,
boxRecords,
visualBoxRecords,
fragmentRecords,
transformRecords,
matrixValues,
styleRecords,
topologyResult,
resolvedSubjects
)
// Determine overall status
const hasErrors = diagnostics.some((d) => d.severity === 'error')
const hasWarnings = diagnostics.some((d) => d.severity === 'warning')
const status: ExtractorResponse['status'] = hasErrors ? 'error' : hasWarnings ? 'partial' : 'ok'
// Add provenance for extraction steps
let factId = 0
const provenance: ProvenanceEntry[] = []
for (const step of traceSteps) {
provenance.push({
factId: factId++,
extractionStepId: step.stepId,
sourceKind: 1, // CDP protocol
sourceRef: step.stepId,
})
}
snapshot.provenance = provenance
return {
requestId: request.requestId,
status,
snapshot,
snapshots: [snapshot],
diagnostics,
extractionTrace: {
steps: traceSteps,
timings: traceTimings,
protocolCalls: traceCalls,
},
}
}
}
/**
* Build a geometry world snapshot from extracted raw data.
*/
function buildSnapshot(
request: ExtractorRequest,
dom: DOMExtractionResult,
boxes: BoxRecord[],
visualBoxes: BoxRecord[],
fragments: FragmentRecord[],
transforms: TransformRecord[],
matrices: number[],
styles: StyleRecord[],
topology: TopologyExtractionResult,
resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }>
): GeometryWorldSnapshot {
// Build subjects table
const subjectIds: number[] = []
const domNodeIds: number[] = []
const subjectKinds: number[] = []
const primaryBoxIds: number[] = []
const firstFragmentIds: number[] = []
const fragmentCounts: number[] = []
const firstTextRunIds: number[] = []
const textRunCounts: number[] = []
const boxBySubject = new Map<number, number>()
for (const b of boxes) {
boxBySubject.set(b.subjectId, b.boxId)
}
const fragmentsBySubject = new Map<number, number[]>()
for (const f of fragments) {
if (!fragmentsBySubject.has(f.subjectId)) {
fragmentsBySubject.set(f.subjectId, [])
}
fragmentsBySubject.get(f.subjectId)!.push(f.fragmentId)
}
for (let i = 0; i < resolvedSubjects.length; i++) {
const rs = resolvedSubjects[i]
subjectIds.push(i)
domNodeIds.push(rs.backendNodeId)
subjectKinds.push(1) // element
primaryBoxIds.push(boxBySubject.get(i) ?? 0)
const frags = fragmentsBySubject.get(i)
if (frags && frags.length > 0) {
firstFragmentIds.push(frags[0])
fragmentCounts.push(frags.length)
} else {
firstFragmentIds.push(0)
fragmentCounts.push(0)
}
firstTextRunIds.push(0)
textRunCounts.push(0)
}
// Build boxes table arrays
const boxesTable = {
boxId: boxes.map((b) => b.boxId),
subjectId: boxes.map((b) => b.subjectId),
frameId: boxes.map((b) => b.frameId),
borderLeft: boxes.map((b) => b.borderLeft),
borderTop: boxes.map((b) => b.borderTop),
borderRight: boxes.map((b) => b.borderRight),
borderBottom: boxes.map((b) => b.borderBottom),
paddingLeft: boxes.map((b) => b.paddingLeft),
paddingTop: boxes.map((b) => b.paddingTop),
paddingRight: boxes.map((b) => b.paddingRight),
paddingBottom: boxes.map((b) => b.paddingBottom),
contentLeft: boxes.map((b) => b.contentLeft),
contentTop: boxes.map((b) => b.contentTop),
contentRight: boxes.map((b) => b.contentRight),
contentBottom: boxes.map((b) => b.contentBottom),
}
const visualBoxesTable = {
boxId: visualBoxes.map((b) => b.boxId),
subjectId: visualBoxes.map((b) => b.subjectId),
frameId: visualBoxes.map((b) => b.frameId),
borderLeft: visualBoxes.map((b) => b.borderLeft),
borderTop: visualBoxes.map((b) => b.borderTop),
borderRight: visualBoxes.map((b) => b.borderRight),
borderBottom: visualBoxes.map((b) => b.borderBottom),
paddingLeft: visualBoxes.map((b) => b.paddingLeft),
paddingTop: visualBoxes.map((b) => b.paddingTop),
paddingRight: visualBoxes.map((b) => b.paddingRight),
paddingBottom: visualBoxes.map((b) => b.paddingBottom),
contentLeft: visualBoxes.map((b) => b.contentLeft),
contentTop: visualBoxes.map((b) => b.contentTop),
contentRight: visualBoxes.map((b) => b.contentRight),
contentBottom: visualBoxes.map((b) => b.contentBottom),
}
const fragmentsTable = {
fragmentId: fragments.map((f) => f.fragmentId),
subjectId: fragments.map((f) => f.subjectId),
fragmentKind: fragments.map((f) => f.fragmentKind),
boxLeft: fragments.map((f) => f.boxLeft),
boxTop: fragments.map((f) => f.boxTop),
boxRight: fragments.map((f) => f.boxRight),
boxBottom: fragments.map((f) => f.boxBottom),
lineIndex: fragments.map((f) => f.lineIndex),
flowIndex: fragments.map((f) => f.flowIndex),
parentFragmentId: fragments.map((f) => f.parentFragmentId),
}
const transformsTable = {
transformId: transforms.map((t) => t.transformId),
subjectId: transforms.map((t) => t.subjectId),
matrixStart: transforms.map((t) => t.matrixStart),
matrixLength: transforms.map((t) => t.matrixLength),
originX: transforms.map((t) => t.originX),
originY: transforms.map((t) => t.originY),
}
const stylesTable = {
subjectId: styles.map((s) => s.subjectId),
display: styles.map((s) => s.display),
position: styles.map((s) => s.position),
zIndexKind: styles.map((s) => s.zIndexKind),
zIndexValue: styles.map((s) => s.zIndexValue),
overflowX: styles.map((s) => s.overflowX),
overflowY: styles.map((s) => s.overflowY),
opacity: styles.map((s) => s.opacity),
visibility: styles.map((s) => s.visibility),
containFlags: styles.map((s) => s.containFlags),
pointerEvents: styles.map((s) => s.pointerEvents),
lineHeight: styles.map((s) => s.lineHeight),
fontFamilyStringId: styles.map((s) => s.fontFamilyStringId),
fontSize: styles.map((s) => s.fontSize),
fontWeight: styles.map((s) => s.fontWeight),
}
return {
sceneId: request.requestId,
snapshotId: 'default',
env: {
viewportWidth: request.env.viewportWidth,
viewportHeight: request.env.viewportHeight,
deviceScaleFactor: 1,
colorScheme: request.env.colorScheme,
pointer: request.env.pointer,
hover: false,
reducedMotion: 'no-preference',
locale: 'en',
writingMode: 'horizontal-tb',
},
source: {
url: request.sceneTarget.url,
browserName: 'chromium',
browserVersion: '',
engine: 'chromium-cdp',
extractedAt: new Date().toISOString(),
},
strings: dom.strings,
subjects: {
ids: subjectIds,
domNodeId: domNodeIds,
subjectKind: subjectKinds,
primaryBoxId: primaryBoxIds,
firstFragmentId: firstFragmentIds,
fragmentCount: fragmentCounts,
firstTextRunId: firstTextRunIds,
textRunCount: textRunCounts,
},
dom,
frames: {
frameId: [],
frameKind: [],
ownerSubjectId: [],
parentFrameId: [],
originX: [],
originY: [],
axisMatrixStart: [],
clipRectId: [],
scrollContainerId: [],
writingMode: [],
},
matrices: { values: matrices },
rects: {
rectId: [],
left: [],
top: [],
right: [],
bottom: [],
},
boxes: boxesTable,
visualBoxes: visualBoxesTable,
fragments: fragmentsTable,
transforms: transformsTable,
styles: stylesTable,
topology: topology.topology,
scroll: topology.scroll,
clipping: topology.clipping,
provenance: [],
confidence: [],
}
}
/**
* Create an empty snapshot for error cases.
*/
function createEmptySnapshot(request: ExtractorRequest): GeometryWorldSnapshot {
return {
sceneId: request.requestId,
snapshotId: 'default',
env: {
viewportWidth: request.env.viewportWidth,
viewportHeight: request.env.viewportHeight,
deviceScaleFactor: 1,
colorScheme: request.env.colorScheme,
pointer: request.env.pointer,
hover: false,
reducedMotion: 'no-preference',
locale: 'en',
writingMode: 'horizontal-tb',
},
source: {
url: request.sceneTarget.url,
browserName: 'chromium',
browserVersion: '',
engine: 'chromium-cdp',
extractedAt: new Date().toISOString(),
},
strings: [],
subjects: {
ids: [],
domNodeId: [],
subjectKind: [],
primaryBoxId: [],
firstFragmentId: [],
fragmentCount: [],
firstTextRunId: [],
textRunCount: [],
},
dom: { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 },
frames: {
frameId: [],
frameKind: [],
ownerSubjectId: [],
parentFrameId: [],
originX: [],
originY: [],
axisMatrixStart: [],
clipRectId: [],
scrollContainerId: [],
writingMode: [],
},
matrices: { values: [] },
rects: {
rectId: [],
left: [],
top: [],
right: [],
bottom: [],
},
boxes: {
boxId: [],
subjectId: [],
frameId: [],
borderLeft: [],
borderTop: [],
borderRight: [],
borderBottom: [],
paddingLeft: [],
paddingTop: [],
paddingRight: [],
paddingBottom: [],
contentLeft: [],
contentTop: [],
contentRight: [],
contentBottom: [],
},
visualBoxes: {
boxId: [],
subjectId: [],
frameId: [],
borderLeft: [],
borderTop: [],
borderRight: [],
borderBottom: [],
paddingLeft: [],
paddingTop: [],
paddingRight: [],
paddingBottom: [],
contentLeft: [],
contentTop: [],
contentRight: [],
contentBottom: [],
},
fragments: {
fragmentId: [],
subjectId: [],
fragmentKind: [],
boxLeft: [],
boxTop: [],
boxRight: [],
boxBottom: [],
lineIndex: [],
flowIndex: [],
parentFragmentId: [],
},
transforms: {
transformId: [],
subjectId: [],
matrixStart: [],
matrixLength: [],
originX: [],
originY: [],
},
styles: {
subjectId: [],
display: [],
position: [],
zIndexKind: [],
zIndexValue: [],
overflowX: [],
overflowY: [],
opacity: [],
visibility: [],
containFlags: [],
pointerEvents: [],
lineHeight: [],
fontFamilyStringId: [],
fontSize: [],
fontWeight: [],
},
topology: {
containingBlockOf: [],
nearestPositionedAncestorOf: [],
scrollContainerOf: [],
stackingContextOf: [],
formattingContextOf: [],
clippingRootOf: [],
paintOrderBucket: [],
paintOrderIndex: [],
},
scroll: [],
clipping: [],
provenance: [],
confidence: [],
}
}
+398
View File
@@ -0,0 +1,398 @@
/**
* Geometry Extraction
*
* Captures box models, fragment boxes, and transform data for DOM nodes
* using CDP DOM.getBoxModel and Runtime.evaluate.
*
* We batch requests where possible: gather all target node IDs first,
* then request box models in bulk via parallel sends.
*/
import type { CDPSession } from './session.js'
/**
* Raw CDP box model for a single node.
*/
export interface CDPBoxModel {
content: number[]
padding: number[]
border: number[]
margin: number[]
width: number
height: number
shapeOutside?: unknown
}
/**
* Canonical box geometry record for the geometry world.
*/
export interface BoxRecord {
boxId: number
subjectId: number
frameId: number
borderLeft: number
borderTop: number
borderRight: number
borderBottom: number
paddingLeft: number
paddingTop: number
paddingRight: number
paddingBottom: number
contentLeft: number
contentTop: number
contentRight: number
contentBottom: number
}
/**
* Fragment record for multi-line or multi-column content.
*/
export interface FragmentRecord {
fragmentId: number
subjectId: number
fragmentKind: number
boxLeft: number
boxTop: number
boxRight: number
boxBottom: number
lineIndex: number
flowIndex: number
parentFragmentId: number
}
/**
* Transform record for a subject.
*/
export interface TransformRecord {
transformId: number
subjectId: number
matrixStart: number
matrixLength: number
originX: number
originY: number
}
/**
* Geometry extraction result.
*/
export interface GeometryExtractionResult {
boxes: BoxRecord[]
fragments: FragmentRecord[]
transforms: TransformRecord[]
/** Flat array of matrix values (6 elements per 2D matrix) */
matrices: number[]
}
/**
* Extract box models for a list of node IDs.
*
* Sends DOM.getBoxModel for each node in parallel to minimize
* round-trip latency.
*/
export async function extractBoxModels(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[]
): Promise<{ boxes: BoxRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
const boxes: BoxRecord[] = []
const errors: Array<{ backendNodeId: number; reason: string }> = []
// Batch: request all box models in parallel using backendNodeId.
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
try {
const response = await session.send<{ model: CDPBoxModel }>('DOM.getBoxModel', {
backendNodeId,
})
return { status: 'ok' as const, model: response.model, backendNodeId, idx }
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
return { status: 'error' as const, error: reason, backendNodeId, idx }
}
})
const results = await Promise.all(promises)
for (const result of results) {
if (result.status === 'error') {
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
continue
}
const model = result.model
const subjectId = subjectIds[result.idx]
// CDP returns quads as [x1,y1,x2,y2,x3,y3,x4,y4].
// For axis-aligned boxes, we use the first two points.
const borderQuad = model.border
const paddingQuad = model.padding
const contentQuad = model.content
boxes.push({
boxId: boxes.length,
subjectId,
frameId: 0, // Frame resolution happens later.
borderLeft: Math.min(borderQuad[0], borderQuad[2]),
borderTop: Math.min(borderQuad[1], borderQuad[5]),
borderRight: Math.max(borderQuad[4], borderQuad[6]),
borderBottom: Math.max(borderQuad[3], borderQuad[7]),
paddingLeft: Math.min(paddingQuad[0], paddingQuad[2]),
paddingTop: Math.min(paddingQuad[1], paddingQuad[5]),
paddingRight: Math.max(paddingQuad[4], paddingQuad[6]),
paddingBottom: Math.max(paddingQuad[3], paddingQuad[7]),
contentLeft: Math.min(contentQuad[0], contentQuad[2]),
contentTop: Math.min(contentQuad[1], contentQuad[5]),
contentRight: Math.max(contentQuad[4], contentQuad[6]),
contentBottom: Math.max(contentQuad[3], contentQuad[7]),
})
}
return { boxes, errors }
}
/**
* Extract client rect fragments for a list of backend node IDs.
*
* Resolves each backendNodeId to a RemoteObject via DOM.resolveNode,
* then uses Runtime.callFunctionOn to invoke getClientRects() directly
* on the element. This avoids DOM mutation and fragile attribute queries.
*/
export async function extractFragments(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[]
): Promise<{ fragments: FragmentRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
const fragments: FragmentRecord[] = []
const errors: Array<{ backendNodeId: number; reason: string }> = []
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
try {
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
backendNodeId,
})
const response = await session.send<{
result: {
value: Array<{ left: number; top: number; right: number; bottom: number }>
}
}>('Runtime.callFunctionOn', {
objectId: resolved.object.objectId,
functionDeclaration: `function() {
const rects = this.getClientRects()
const boxes = []
for (let i = 0; i < rects.length; i++) {
boxes.push({
left: rects[i].left,
top: rects[i].top,
right: rects[i].right,
bottom: rects[i].bottom,
})
}
return boxes
}`,
returnByValue: true,
})
return { status: 'ok' as const, boxes: response.result.value, backendNodeId, idx }
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
return { status: 'error' as const, error: reason, backendNodeId, idx }
}
})
const results = await Promise.all(promises)
for (const result of results) {
if (result.status === 'error') {
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
continue
}
const subjectId = subjectIds[result.idx]
for (let j = 0; j < (result.boxes?.length ?? 0); j++) {
const box = result.boxes![j]
fragments.push({
fragmentId: fragments.length,
subjectId,
fragmentKind: 1, // 1 = client rect fragment
boxLeft: box.left,
boxTop: box.top,
boxRight: box.right,
boxBottom: box.bottom,
lineIndex: j,
flowIndex: 0,
parentFragmentId: 0,
})
}
}
return { fragments, errors }
}
/**
* Extract visual bounding boxes for a list of backend node IDs.
*
* Uses getBoundingClientRect() via Runtime.callFunctionOn to obtain
* post-transform viewport coordinates. These are stored in visualBoxes
* and represent the effective rendered position after all CSS transforms.
*/
export async function extractVisualBoxes(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[]
): Promise<{ boxes: BoxRecord[]; errors: Array<{ backendNodeId: number; reason: string }> }> {
const boxes: BoxRecord[] = []
const errors: Array<{ backendNodeId: number; reason: string }> = []
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
try {
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
backendNodeId,
})
const response = await session.send<{
result: {
value: {
left: number
top: number
right: number
bottom: number
width: number
height: number
}
}
}>('Runtime.callFunctionOn', {
objectId: resolved.object.objectId,
functionDeclaration: `function() {
const rect = this.getBoundingClientRect()
return {
left: rect.left,
top: rect.top,
right: rect.right,
bottom: rect.bottom,
width: rect.width,
height: rect.height,
}
}`,
returnByValue: true,
})
return { status: 'ok' as const, rect: response.result.value, backendNodeId, idx }
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
return { status: 'error' as const, error: reason, backendNodeId, idx }
}
})
const results = await Promise.all(promises)
for (const result of results) {
if (result.status === 'error') {
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
continue
}
const rect = result.rect
const subjectId = subjectIds[result.idx]
boxes.push({
boxId: boxes.length,
subjectId,
frameId: 0,
borderLeft: rect.left,
borderTop: rect.top,
borderRight: rect.right,
borderBottom: rect.bottom,
paddingLeft: rect.left,
paddingTop: rect.top,
paddingRight: rect.right,
paddingBottom: rect.bottom,
contentLeft: rect.left,
contentTop: rect.top,
contentRight: rect.right,
contentBottom: rect.bottom,
})
}
return { boxes, errors }
}
/**
* Extract CSS transform matrices for a list of node IDs.
*
* Returns the computed 2D transform matrix (6 values) and origin
* for each node that has a transform.
*/
export async function extractTransforms(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[]
): Promise<{
transforms: TransformRecord[]
matrices: number[]
errors: Array<{ backendNodeId: number; reason: string }>
}> {
const transforms: TransformRecord[] = []
const matrices: number[] = []
const errors: Array<{ backendNodeId: number; reason: string }> = []
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
try {
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
backendNodeId,
})
const response = await session.send<{
result: {
value: {
matrix: number[]
originX: number
originY: number
}
}
}>('Runtime.callFunctionOn', {
objectId: resolved.object.objectId,
functionDeclaration: `function() {
const style = window.getComputedStyle(this)
const transform = style.transform
let matrix = [1, 0, 0, 1, 0, 0]
if (transform && transform !== 'none') {
const match = transform.match(/matrix\\(([^)]+)\\)/)
if (match) {
matrix = match[1].split(',').map(Number)
}
}
const origin = style.transformOrigin.split(' ').map(parseFloat)
return {
matrix,
originX: origin[0] || 0,
originY: origin[1] || 0,
}
}`,
returnByValue: true,
})
return { status: 'ok' as const, data: response.result.value, backendNodeId, idx }
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
return { status: 'error' as const, error: reason, backendNodeId, idx }
}
})
const results = await Promise.all(promises)
for (const result of results) {
if (result.status === 'error') {
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
continue
}
const matrix = result.data.matrix ?? [1, 0, 0, 1, 0, 0]
const matrixStart = matrices.length
matrices.push(...matrix)
transforms.push({
transformId: transforms.length,
subjectId: subjectIds[result.idx],
matrixStart,
matrixLength: matrix.length,
originX: result.data.originX ?? 0,
originY: result.data.originY ?? 0,
})
}
return { transforms, matrices, errors }
}
+76
View File
@@ -0,0 +1,76 @@
/**
* imhotep-cdp — Chrome DevTools Protocol extraction layer
*
* Captures browser truth for geometry, topology, and style facts.
* Returns raw browser data matching the geometry world schema.
*/
// Session management
export {
CDPSessionManager,
createPlaywrightCDPSession,
createSessionManager,
type CDPSession,
type CDPSessionFactory,
} from './session.js'
// DOM extraction
export {
extractDOM,
resolveSelector,
resolveOneSelector,
type CDPDOMNode,
type DOMNodeRecord,
type DOMExtractionResult,
} from './dom.js'
// Geometry extraction
export {
extractBoxModels,
extractFragments,
extractTransforms,
type BoxRecord,
type FragmentRecord,
type TransformRecord,
type GeometryExtractionResult,
type CDPBoxModel,
} from './geometry.js'
// Style extraction
export {
extractStyles,
DisplayEnum,
PositionEnum,
OverflowEnum,
VisibilityEnum,
PointerEventsEnum,
type StyleRecord,
type StyleExtractionConfig,
} from './styles.js'
// Topology extraction
export {
extractTopology,
type ScrollRecord,
type ClippingRecord,
type TopologyRecord,
type TopologyExtractionResult,
} from './topology.js'
// Canonical adapter
export {
adaptSnapshotToCanonical,
type CanonicalGeometryWorld,
} from './canonical-adapter.js'
// Main orchestrator
export {
CDPExtractor,
type ExtractorRequest,
type ExtractorResponse,
type ExtractorDiagnostic,
type GeometryWorldSnapshot,
type ProvenanceEntry,
type ConfidenceEntry,
type ExtractionTraceEntry,
} from './extractor.js'
+149
View File
@@ -0,0 +1,149 @@
/**
* CDP Session Management
*
* Wraps Playwright's CDP session access to provide a typed, injectable
* interface for sending Chrome DevTools Protocol commands.
*
* Design: dependency injection over optional imports. The extractor receives
* a session factory rather than importing Playwright directly.
*/
/**
* Minimal CDP session interface. Any object that can send CDP commands
* and return JSON-like responses satisfies this contract.
*/
export interface CDPSession {
send<T = unknown>(method: string, params?: Record<string, unknown>): Promise<T>
on(event: string, handler: (payload: unknown) => void): void
off(event: string, handler: (payload: unknown) => void): void
detach(): Promise<void>
}
/**
* Factory that creates a CDP session for a given Playwright page.
* This is injected so the package does not hard-depend on Playwright.
*/
export type CDPSessionFactory = (pageRef: unknown) => Promise<CDPSession>
/**
* Playwright-backed session factory implementation.
*
* Usage:
* const session = await createPlaywrightCDPSession(page)
*
* Where `page` is a Playwright Page object.
*/
export async function createPlaywrightCDPSession(
page: unknown
): Promise<CDPSession> {
// Playwright pages expose context() and a CDP session method.
// We access it dynamically to avoid a hard import dependency.
const pwPage = page as {
context: () => {
newCDPSession: (p: unknown) => Promise<CDPSession>
}
}
const context = pwPage.context()
const session = await context.newCDPSession(page)
return session
}
/**
* Session manager that owns the lifecycle of a CDP session.
*
* Handles attach, detach, and basic error recovery.
* Caches document root nodeId and enabled domains to minimize round-trips.
*/
export class CDPSessionManager {
private session: CDPSession | null = null
private attachPromise: Promise<CDPSession> | null = null
private readonly factory: CDPSessionFactory
private readonly pageRef: unknown
private documentRootNodeId: number | null = null
private enabledDomains = new Set<string>()
constructor(factory: CDPSessionFactory, pageRef: unknown) {
this.factory = factory
this.pageRef = pageRef
}
/**
* Ensure a session is attached. Idempotent and concurrency-safe.
*/
async attach(): Promise<CDPSession> {
if (this.session) return this.session
if (this.attachPromise) return this.attachPromise
this.attachPromise = this.factory(this.pageRef).then((s) => {
this.session = s
this.documentRootNodeId = null
this.enabledDomains.clear()
return s
})
return this.attachPromise
}
/**
* Get cached document root nodeId, or fetch and cache it.
*/
async getDocumentRootNodeId(): Promise<number> {
if (this.documentRootNodeId !== null) {
return this.documentRootNodeId
}
const s = await this.attach()
const result = await s.send<{ root: { nodeId: number } }>('DOM.getDocument')
this.documentRootNodeId = result.root.nodeId
return this.documentRootNodeId
}
/**
* Enable a CDP domain if not already enabled.
*/
async enableDomain(domain: string): Promise<void> {
if (this.enabledDomains.has(domain)) {
return
}
const s = await this.attach()
await s.send(`${domain}.enable`)
this.enabledDomains.add(domain)
}
/**
* Get the current session, throwing if not attached.
*/
getSession(): CDPSession {
if (!this.session) {
throw new Error('CDP session not attached. Call attach() first.')
}
return this.session
}
/**
* Send a CDP command, automatically attaching if needed.
*/
async send<T = unknown>(
method: string,
params?: Record<string, unknown>
): Promise<T> {
const s = await this.attach()
return s.send<T>(method, params)
}
/**
* Detach and clean up the session.
*/
async detach(): Promise<void> {
if (this.session) {
await this.session.detach()
this.session = null
}
this.attachPromise = null
}
}
/**
* Convenient builder for a manager using the Playwright factory.
*/
export function createSessionManager(page: unknown): CDPSessionManager {
return new CDPSessionManager(createPlaywrightCDPSession, page)
}
+247
View File
@@ -0,0 +1,247 @@
/**
* Computed Style Extraction
*
* Captures only the style properties required by the active extraction plan.
* Uses Runtime.callFunctionOn with objectIds resolved from backendNodeIds
* to avoid CSS domain nodeId validity issues.
*/
import type { CDPSession } from './session.js'
/**
* Compact style record for a subject.
* Mirrors the geometry world `styles` table schema.
*/
export interface StyleRecord {
subjectId: number
display: number
position: number
zIndexKind: number
zIndexValue: number
overflowX: number
overflowY: number
opacity: number
visibility: number
containFlags: number
pointerEvents: number
lineHeight: number
fontFamilyStringId: number
fontSize: number
fontWeight: number
}
/**
* Enum mappings for compact storage.
*/
export const DisplayEnum: Record<string, number> = {
none: 1,
block: 2,
inline: 3,
'inline-block': 4,
flex: 5,
'inline-flex': 6,
grid: 7,
'inline-grid': 8,
table: 9,
'table-cell': 10,
contents: 11,
other: 99,
}
export const PositionEnum: Record<string, number> = {
static: 1,
relative: 2,
absolute: 3,
fixed: 4,
sticky: 5,
}
export const OverflowEnum: Record<string, number> = {
visible: 1,
hidden: 2,
clip: 3,
scroll: 4,
auto: 5,
}
export const VisibilityEnum: Record<string, number> = {
visible: 1,
hidden: 2,
collapse: 3,
}
export const PointerEventsEnum: Record<string, number> = {
auto: 1,
none: 2,
}
/**
* Style extraction configuration.
*/
export interface StyleExtractionConfig {
/** If true, extract all known style properties */
all?: boolean
/** Specific property names to extract */
properties?: string[]
}
/**
* Extract computed styles for a list of backend node IDs.
*
* Resolves each backendNodeId to an objectId, then uses
* Runtime.callFunctionOn to read computed styles via getComputedStyle
* in the browser. This avoids CSS domain nodeId validity issues.
*/
export async function extractStyles(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[],
config: StyleExtractionConfig,
strings: string[]
): Promise<{
styles: StyleRecord[]
errors: Array<{ backendNodeId: number; reason: string }>
strings: string[]
}> {
const styles: StyleRecord[] = []
const errors: Array<{ backendNodeId: number; reason: string }> = []
const stringIndex = new Map<string, number>()
strings.forEach((s, i) => stringIndex.set(s, i))
function getStringId(value: string): number {
if (stringIndex.has(value)) return stringIndex.get(value)!
const id = strings.length
strings.push(value)
stringIndex.set(value, id)
return id
}
// Determine which properties we need.
const wantAll = config.all ?? true
const wantProps = new Set(config.properties ?? [])
// Resolve backendNodeIds to objectIds
const objectIds: string[] = []
for (const backendNodeId of backendNodeIds) {
try {
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
backendNodeId,
})
objectIds.push(resolved.object.objectId)
} catch {
objectIds.push('')
}
}
const promises = backendNodeIds.map(async (backendNodeId, idx) => {
const objectId = objectIds[idx]
if (!objectId) {
return { status: 'error' as const, error: 'Failed to resolve backendNodeId to objectId', backendNodeId, idx }
}
try {
const response = await session.send<{
result: { value: Record<string, string> }
}>('Runtime.callFunctionOn', {
objectId,
functionDeclaration: `function() {
const s = window.getComputedStyle(this);
return {
display: s.display,
position: s.position,
zIndex: s.zIndex,
overflowX: s.overflowX,
overflowY: s.overflowY,
opacity: s.opacity,
visibility: s.visibility,
contain: s.contain,
pointerEvents: s.pointerEvents,
lineHeight: s.lineHeight,
fontFamily: s.fontFamily,
fontSize: s.fontSize,
fontWeight: s.fontWeight,
};
}`,
returnByValue: true,
})
return { status: 'ok' as const, computedStyle: response.result.value, backendNodeId, idx }
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
return { status: 'error' as const, error: reason, backendNodeId, idx }
}
})
const results = await Promise.all(promises)
for (const result of results) {
if (result.status === 'error') {
errors.push({ backendNodeId: result.backendNodeId, reason: result.error })
continue
}
const subjectId = subjectIds[result.idx]
const map = result.computedStyle
function get(name: string, defaultValue = ''): string {
if (!wantAll && !wantProps.has(name)) return defaultValue
return map[name] ?? defaultValue
}
function parseNumber(value: string, defaultValue = 0): number {
if (!value || value === 'none' || value === 'auto') return defaultValue
const parsed = parseFloat(value)
return Number.isNaN(parsed) ? defaultValue : parsed
}
const displayRaw = get('display', 'block')
const positionRaw = get('position', 'static')
const zIndexRaw = get('zIndex', 'auto')
const overflowXRaw = get('overflowX', 'visible')
const overflowYRaw = get('overflowY', 'visible')
const opacityRaw = get('opacity', '1')
const visibilityRaw = get('visibility', 'visible')
const containRaw = get('contain', 'none')
const pointerEventsRaw = get('pointerEvents', 'auto')
const lineHeightRaw = get('lineHeight', 'normal')
const fontFamilyRaw = get('fontFamily', '')
const fontSizeRaw = get('fontSize', '16px')
const fontWeightRaw = get('fontWeight', '400')
// z-index kind: 0 = auto, 1 = integer
const zIndexKind = zIndexRaw === 'auto' ? 0 : 1
const zIndexValue = zIndexKind === 1 ? parseInt(zIndexRaw, 10) : 0
// contain flags bitfield
let containFlags = 0
if (containRaw !== 'none') {
const parts = containRaw.split(' ')
for (const part of parts) {
if (part === 'layout') containFlags |= 1
if (part === 'paint') containFlags |= 2
if (part === 'size') containFlags |= 4
if (part === 'style') containFlags |= 8
if (part === 'strict') containFlags |= 15
if (part === 'content') containFlags |= 7
}
}
styles.push({
subjectId,
display: DisplayEnum[displayRaw] ?? DisplayEnum.other,
position: PositionEnum[positionRaw] ?? PositionEnum.static,
zIndexKind,
zIndexValue,
overflowX: OverflowEnum[overflowXRaw] ?? OverflowEnum.visible,
overflowY: OverflowEnum[overflowYRaw] ?? OverflowEnum.visible,
opacity: parseNumber(opacityRaw, 1),
visibility: VisibilityEnum[visibilityRaw] ?? VisibilityEnum.visible,
containFlags,
pointerEvents: PointerEventsEnum[pointerEventsRaw] ?? PointerEventsEnum.auto,
lineHeight: parseNumber(lineHeightRaw, 1.2),
fontFamilyStringId: getStringId(fontFamilyRaw),
fontSize: parseNumber(fontSizeRaw, 16),
fontWeight: parseInt(fontWeightRaw, 10) || 400,
})
}
return { styles, errors, strings }
}
+326
View File
@@ -0,0 +1,326 @@
/**
* Topology Extraction
*
* Captures non-planar layout facts: scroll containers, clipping chains,
* stacking contexts, containing blocks, and formatting contexts.
*
* We extract these in bulk via a single Runtime.evaluate script that
* walks the DOM and computes topology relationships in the browser,
* then returns compact tables.
*/
import type { CDPSession } from './session.js'
/**
* Scroll container record.
*/
export interface ScrollRecord {
containerId: number
scrollLeft: number
scrollTop: number
scrollWidth: number
scrollHeight: number
clientWidth: number
clientHeight: number
}
/**
* Clipping chain node.
*/
export interface ClippingRecord {
clipNodeId: number
subjectId: number
clipKind: number
clipLeft: number
clipTop: number
clipRight: number
clipBottom: number
parentClipNodeId: number
}
/**
* Topology relation record.
*/
export interface TopologyRecord {
containingBlockOf: number[]
nearestPositionedAncestorOf: number[]
scrollContainerOf: number[]
stackingContextOf: number[]
formattingContextOf: number[]
clippingRootOf: number[]
paintOrderBucket: number[]
paintOrderIndex: number[]
}
/**
* Full topology extraction result.
*/
export interface TopologyExtractionResult {
scroll: ScrollRecord[]
clipping: ClippingRecord[]
topology: TopologyRecord
}
/**
* Extract topology facts for a list of elements.
*
* Uses a single injected script to compute all topology relationships
* in the browser, avoiding per-node round-trips.
*/
export async function extractTopology(
session: CDPSession,
backendNodeIds: number[],
subjectIds: number[]
): Promise<{
result: TopologyExtractionResult
errors: Array<{ backendNodeId: number; reason: string }>
}> {
const errors: Array<{ backendNodeId: number; reason: string }> = []
if (backendNodeIds.length === 0) {
return {
result: {
scroll: [],
clipping: [],
topology: {
containingBlockOf: [],
nearestPositionedAncestorOf: [],
scrollContainerOf: [],
stackingContextOf: [],
formattingContextOf: [],
clippingRootOf: [],
paintOrderBucket: [],
paintOrderIndex: [],
},
},
errors,
}
}
// Inject temporary data attributes so the topology script can correlate
// elements with their backendNodeIds, then remove them afterwards.
const objectIds: string[] = []
for (const backendNodeId of backendNodeIds) {
try {
const resolved = await session.send<{ object: { objectId: string } }>('DOM.resolveNode', {
backendNodeId,
})
objectIds.push(resolved.object.objectId)
} catch {
objectIds.push('')
}
}
// Set attributes
for (let i = 0; i < backendNodeIds.length; i++) {
const objectId = objectIds[i]
if (!objectId) continue
try {
await session.send<unknown>('Runtime.callFunctionOn', {
objectId,
functionDeclaration: `function(id) { this.setAttribute('data-imhotep-backend-id', id) }`,
arguments: [{ value: String(backendNodeIds[i]) }],
})
} catch {
// Ignore injection errors
}
}
const script = `
(function() {
const subjects = Array.from(document.querySelectorAll('[data-imhotep-backend-id]'))
const idMap = new Map()
for (const el of subjects) {
idMap.set(el, parseInt(el.dataset.imhotepBackendId, 10))
}
const results = {
scroll: [],
clipping: [],
topology: {
containingBlockOf: [],
nearestPositionedAncestorOf: [],
scrollContainerOf: [],
stackingContextOf: [],
formattingContextOf: [],
clippingRootOf: [],
paintOrderBucket: [],
paintOrderIndex: [],
}
}
function isScrollContainer(el) {
const style = window.getComputedStyle(el)
return style.overflowX !== 'visible' || style.overflowY !== 'visible'
}
function isClippingElement(el) {
const style = window.getComputedStyle(el)
return style.overflowX === 'hidden' || style.overflowX === 'scroll' || style.overflowX === 'auto' ||
style.overflowY === 'hidden' || style.overflowY === 'scroll' || style.overflowY === 'auto' ||
style.clipPath !== 'none'
}
function createsStackingContext(el) {
const style = window.getComputedStyle(el)
if (style.position !== 'static' && style.zIndex !== 'auto') return true
if (parseFloat(style.opacity) < 1) return true
if (style.transform !== 'none') return true
if (style.filter !== 'none') return true
if (style.clipPath !== 'none') return true
if (style.isolation === 'isolate') return true
if (style.willChange.includes('transform') || style.willChange.includes('opacity')) return true
return false
}
function isPositioned(el) {
const style = window.getComputedStyle(el)
return style.position !== 'static'
}
function getContainingBlock(el) {
let current = el.parentElement
while (current) {
const style = window.getComputedStyle(current)
if (style.position !== 'static') return current
current = current.parentElement
}
return null
}
function getNearestPositionedAncestor(el) {
let current = el.parentElement
while (current) {
if (isPositioned(current)) return current
current = current.parentElement
}
return null
}
function getScrollContainer(el) {
let current = el.parentElement
while (current) {
if (isScrollContainer(current)) return current
current = current.parentElement
}
return null
}
function getClippingRoot(el) {
let current = el.parentElement
while (current) {
if (isClippingElement(current)) return current
current = current.parentElement
}
return null
}
function getStackingContextRoot(el) {
let current = el.parentElement
while (current) {
if (createsStackingContext(current)) return current
current = current.parentElement
}
return null
}
function rectFor(el) {
const r = el.getBoundingClientRect()
return { left: r.left, top: r.top, right: r.right, bottom: r.bottom }
}
function getId(el) {
return el ? (idMap.get(el) || 0) : 0
}
for (let i = 0; i < subjects.length; i++) {
const el = subjects[i]
const subjectId = idMap.get(el) || 0
if (isScrollContainer(el)) {
const r = rectFor(el)
results.scroll.push({
containerId: subjectId,
scrollLeft: el.scrollLeft,
scrollTop: el.scrollTop,
scrollWidth: el.scrollWidth,
scrollHeight: el.scrollHeight,
clientWidth: el.clientWidth,
clientHeight: el.clientHeight,
})
}
if (isClippingElement(el)) {
const r = rectFor(el)
results.clipping.push({
clipNodeId: results.clipping.length,
subjectId,
clipKind: 1,
clipLeft: r.left,
clipTop: r.top,
clipRight: r.right,
clipBottom: r.bottom,
parentClipNodeId: 0,
})
}
results.topology.containingBlockOf.push(getId(getContainingBlock(el)))
results.topology.nearestPositionedAncestorOf.push(getId(getNearestPositionedAncestor(el)))
results.topology.scrollContainerOf.push(getId(getScrollContainer(el)))
results.topology.stackingContextOf.push(getId(getStackingContextRoot(el)))
results.topology.paintOrderBucket.push(createsStackingContext(el) ? 2 : 1)
results.topology.paintOrderIndex.push(i)
results.topology.formattingContextOf.push(getId(el.parentElement))
results.topology.clippingRootOf.push(getId(getClippingRoot(el)))
}
return results
})()
`
let result: TopologyExtractionResult
try {
const response = await session.send<{
result: { value: TopologyExtractionResult }
}>('Runtime.evaluate', {
expression: script,
returnByValue: true,
})
result = response.result.value
} catch (err) {
const reason = err instanceof Error ? err.message : String(err)
for (const backendNodeId of backendNodeIds) {
errors.push({ backendNodeId, reason })
}
result = {
scroll: [],
clipping: [],
topology: {
containingBlockOf: [],
nearestPositionedAncestorOf: [],
scrollContainerOf: [],
stackingContextOf: [],
formattingContextOf: [],
clippingRootOf: [],
paintOrderBucket: [],
paintOrderIndex: [],
},
}
}
// Remove temporary attributes
for (let i = 0; i < backendNodeIds.length; i++) {
const objectId = objectIds[i]
if (!objectId) continue
try {
await session.send<unknown>('Runtime.callFunctionOn', {
objectId,
functionDeclaration: `function() { this.removeAttribute('data-imhotep-backend-id') }`,
})
} catch {
// Ignore cleanup errors
}
}
return { result, errors }
}
+13
View File
@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"paths": {},
"composite": false,
"noEmitOnError": false
},
"include": [
"src/**/*"
]
}