v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* DOM Extraction
|
||||
*
|
||||
* Captures the DOM tree, node identifiers, selectors, and shadow DOM
|
||||
* boundaries using CDP DOM domain commands.
|
||||
*
|
||||
* Bulk-first: we request the full document tree in one call, then
|
||||
* traverse the returned tree locally rather than making per-node
|
||||
* round-trips.
|
||||
*/
|
||||
|
||||
import type { CDPSession } from './session.js'
|
||||
|
||||
/**
|
||||
* Raw DOM node as returned by CDP DOM.getDocument with depth -1.
|
||||
*/
|
||||
export interface CDPDOMNode {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
nodeType: number
|
||||
nodeName: string
|
||||
localName?: string
|
||||
nodeValue?: string
|
||||
parentId?: number
|
||||
children?: CDPDOMNode[]
|
||||
shadowRoots?: CDPDOMNode[]
|
||||
pseudoElements?: CDPDOMNode[]
|
||||
attributes?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattened DOM record for a single node.
|
||||
*/
|
||||
export interface DOMNodeRecord {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
parentNodeId: number
|
||||
firstChildIndex: number
|
||||
childCount: number
|
||||
shadowRootKind: 'open' | 'closed' | 'none'
|
||||
tagNameStringId: number
|
||||
roleStringId: number
|
||||
ariaNameStringId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of DOM extraction.
|
||||
*/
|
||||
export interface DOMExtractionResult {
|
||||
/** Flat array of DOM nodes in document order */
|
||||
nodes: DOMNodeRecord[]
|
||||
/** String table for deduplication */
|
||||
strings: string[]
|
||||
/** Map from nodeId to index in nodes array */
|
||||
nodeIdToIndex: Map<number, number>
|
||||
/** Root node index */
|
||||
rootIndex: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the full DOM tree from the current page via CDP.
|
||||
*
|
||||
* Uses DOM.getDocument with depth -1 to fetch the entire tree
|
||||
* in a single protocol round-trip.
|
||||
*/
|
||||
export async function extractDOM(session: CDPSession): Promise<DOMExtractionResult> {
|
||||
const response = await session.send<{ root: CDPDOMNode }>('DOM.getDocument', {
|
||||
depth: -1,
|
||||
pierce: true,
|
||||
})
|
||||
|
||||
const strings: string[] = []
|
||||
const stringIndex = new Map<string, number>()
|
||||
|
||||
function getStringId(value: string): number {
|
||||
if (stringIndex.has(value)) return stringIndex.get(value)!
|
||||
const id = strings.length
|
||||
strings.push(value)
|
||||
stringIndex.set(value, id)
|
||||
return id
|
||||
}
|
||||
|
||||
const nodes: DOMNodeRecord[] = []
|
||||
const nodeIdToIndex = new Map<number, number>()
|
||||
|
||||
function walk(node: CDPDOMNode, parentNodeId: number): number {
|
||||
const nodeIndex = nodes.length
|
||||
nodeIdToIndex.set(node.nodeId, nodeIndex)
|
||||
|
||||
const children = node.children ?? []
|
||||
const shadowRoots = node.shadowRoots ?? []
|
||||
const pseudoElements = node.pseudoElements ?? []
|
||||
const allChildren = [...children, ...shadowRoots, ...pseudoElements]
|
||||
|
||||
const firstChildIndex = nodes.length + 1
|
||||
const childCount = allChildren.length
|
||||
|
||||
// Determine shadow root kind
|
||||
let shadowRootKind: 'open' | 'closed' | 'none' = 'none'
|
||||
if (shadowRoots.length > 0) {
|
||||
// CDP does not explicitly return open/closed in getDocument;
|
||||
// we default to 'open' and refine later if needed.
|
||||
shadowRootKind = 'open'
|
||||
}
|
||||
|
||||
// Extract tag name from localName or nodeName
|
||||
const tagName = node.localName || node.nodeName.toLowerCase()
|
||||
const tagNameStringId = getStringId(tagName)
|
||||
|
||||
// Extract role from attributes if present
|
||||
let roleStringId = getStringId('')
|
||||
let ariaNameStringId = getStringId('')
|
||||
if (node.attributes) {
|
||||
for (let i = 0; i < node.attributes.length; i += 2) {
|
||||
const attrName = node.attributes[i]
|
||||
const attrValue = node.attributes[i + 1]
|
||||
if (attrName === 'role') {
|
||||
roleStringId = getStringId(attrValue)
|
||||
}
|
||||
if (attrName === 'aria-label' || attrName === 'aria-labelledby') {
|
||||
ariaNameStringId = getStringId(attrValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nodes.push({
|
||||
nodeId: node.nodeId,
|
||||
backendNodeId: node.backendNodeId,
|
||||
parentNodeId,
|
||||
firstChildIndex,
|
||||
childCount,
|
||||
shadowRootKind,
|
||||
tagNameStringId,
|
||||
roleStringId,
|
||||
ariaNameStringId,
|
||||
})
|
||||
|
||||
// Walk children after pushing parent so indices are stable.
|
||||
for (const child of allChildren) {
|
||||
walk(child, node.nodeId)
|
||||
}
|
||||
|
||||
return nodeIndex
|
||||
}
|
||||
|
||||
const rootIndex = walk(response.root, 0)
|
||||
|
||||
return {
|
||||
nodes,
|
||||
strings,
|
||||
nodeIdToIndex,
|
||||
rootIndex,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolved selector match with both frontend nodeId and stable backendNodeId.
|
||||
*/
|
||||
export interface SelectorMatch {
|
||||
nodeId: number
|
||||
backendNodeId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a CSS selector to node IDs via CDP.
|
||||
*
|
||||
* Returns an array of matches with both frontend nodeId and stable
|
||||
* backendNodeId. We keep both because backendNodeId is the canonical
|
||||
* identity for geometry/topology, while frontend nodeId is required
|
||||
* by CSS.getComputedStyleForNode.
|
||||
*/
|
||||
export async function resolveSelector(
|
||||
session: CDPSession | { send: CDPSession['send']; getDocumentRootNodeId?: () => Promise<number> },
|
||||
selector: string
|
||||
): Promise<SelectorMatch[]> {
|
||||
// Use cached document root if available, otherwise fetch it.
|
||||
let rootNodeId: number
|
||||
if ('getDocumentRootNodeId' in session && session.getDocumentRootNodeId) {
|
||||
rootNodeId = await session.getDocumentRootNodeId()
|
||||
} else {
|
||||
const doc = await session.send<{ root: { nodeId: number } }>('DOM.getDocument')
|
||||
rootNodeId = doc.root.nodeId
|
||||
}
|
||||
const { nodeIds } = await session.send<{ nodeIds: number[] }>(
|
||||
'DOM.querySelectorAll',
|
||||
{
|
||||
nodeId: rootNodeId,
|
||||
selector,
|
||||
}
|
||||
)
|
||||
// Convert each frontend nodeId to its stable backendNodeId in parallel.
|
||||
const matches: SelectorMatch[] = []
|
||||
const describeResults = await Promise.all(
|
||||
nodeIds.map(async (nodeId) => {
|
||||
try {
|
||||
const result = await session.send<{ node: { backendNodeId: number } }>('DOM.describeNode', { nodeId })
|
||||
return { nodeId, backendNodeId: result.node.backendNodeId }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
})
|
||||
)
|
||||
for (const match of describeResults) {
|
||||
if (match !== null) {
|
||||
matches.push(match)
|
||||
}
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a single selector to one backend node ID, or null if none match.
|
||||
*/
|
||||
export async function resolveOneSelector(
|
||||
session: CDPSession,
|
||||
selector: string
|
||||
): Promise<number | null> {
|
||||
const matches = await resolveSelector(session, selector)
|
||||
return matches.length > 0 ? matches[0].backendNodeId : null
|
||||
}
|
||||
Reference in New Issue
Block a user