v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

This commit is contained in:
John Dvorak
2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
+220
View File
@@ -0,0 +1,220 @@
/**
* DOM Extraction
*
* Captures the DOM tree, node identifiers, selectors, and shadow DOM
* boundaries using CDP DOM domain commands.
*
* Bulk-first: we request the full document tree in one call, then
* traverse the returned tree locally rather than making per-node
* round-trips.
*/
import type { CDPSession } from './session.js'
/**
* Raw DOM node as returned by CDP DOM.getDocument with depth -1.
*/
export interface CDPDOMNode {
nodeId: number
backendNodeId: number
nodeType: number
nodeName: string
localName?: string
nodeValue?: string
parentId?: number
children?: CDPDOMNode[]
shadowRoots?: CDPDOMNode[]
pseudoElements?: CDPDOMNode[]
attributes?: string[]
}
/**
* Flattened DOM record for a single node.
*/
export interface DOMNodeRecord {
nodeId: number
backendNodeId: number
parentNodeId: number
firstChildIndex: number
childCount: number
shadowRootKind: 'open' | 'closed' | 'none'
tagNameStringId: number
roleStringId: number
ariaNameStringId: number
}
/**
* Result of DOM extraction.
*/
export interface DOMExtractionResult {
/** Flat array of DOM nodes in document order */
nodes: DOMNodeRecord[]
/** String table for deduplication */
strings: string[]
/** Map from nodeId to index in nodes array */
nodeIdToIndex: Map<number, number>
/** Root node index */
rootIndex: number
}
/**
* Extract the full DOM tree from the current page via CDP.
*
* Uses DOM.getDocument with depth -1 to fetch the entire tree
* in a single protocol round-trip.
*/
export async function extractDOM(session: CDPSession): Promise<DOMExtractionResult> {
const response = await session.send<{ root: CDPDOMNode }>('DOM.getDocument', {
depth: -1,
pierce: true,
})
const strings: string[] = []
const stringIndex = new Map<string, number>()
function getStringId(value: string): number {
if (stringIndex.has(value)) return stringIndex.get(value)!
const id = strings.length
strings.push(value)
stringIndex.set(value, id)
return id
}
const nodes: DOMNodeRecord[] = []
const nodeIdToIndex = new Map<number, number>()
function walk(node: CDPDOMNode, parentNodeId: number): number {
const nodeIndex = nodes.length
nodeIdToIndex.set(node.nodeId, nodeIndex)
const children = node.children ?? []
const shadowRoots = node.shadowRoots ?? []
const pseudoElements = node.pseudoElements ?? []
const allChildren = [...children, ...shadowRoots, ...pseudoElements]
const firstChildIndex = nodes.length + 1
const childCount = allChildren.length
// Determine shadow root kind
let shadowRootKind: 'open' | 'closed' | 'none' = 'none'
if (shadowRoots.length > 0) {
// CDP does not explicitly return open/closed in getDocument;
// we default to 'open' and refine later if needed.
shadowRootKind = 'open'
}
// Extract tag name from localName or nodeName
const tagName = node.localName || node.nodeName.toLowerCase()
const tagNameStringId = getStringId(tagName)
// Extract role from attributes if present
let roleStringId = getStringId('')
let ariaNameStringId = getStringId('')
if (node.attributes) {
for (let i = 0; i < node.attributes.length; i += 2) {
const attrName = node.attributes[i]
const attrValue = node.attributes[i + 1]
if (attrName === 'role') {
roleStringId = getStringId(attrValue)
}
if (attrName === 'aria-label' || attrName === 'aria-labelledby') {
ariaNameStringId = getStringId(attrValue)
}
}
}
nodes.push({
nodeId: node.nodeId,
backendNodeId: node.backendNodeId,
parentNodeId,
firstChildIndex,
childCount,
shadowRootKind,
tagNameStringId,
roleStringId,
ariaNameStringId,
})
// Walk children after pushing parent so indices are stable.
for (const child of allChildren) {
walk(child, node.nodeId)
}
return nodeIndex
}
const rootIndex = walk(response.root, 0)
return {
nodes,
strings,
nodeIdToIndex,
rootIndex,
}
}
/**
* Resolved selector match with both frontend nodeId and stable backendNodeId.
*/
export interface SelectorMatch {
nodeId: number
backendNodeId: number
}
/**
* Resolve a CSS selector to node IDs via CDP.
*
* Returns an array of matches with both frontend nodeId and stable
* backendNodeId. We keep both because backendNodeId is the canonical
* identity for geometry/topology, while frontend nodeId is required
* by CSS.getComputedStyleForNode.
*/
export async function resolveSelector(
session: CDPSession | { send: CDPSession['send']; getDocumentRootNodeId?: () => Promise<number> },
selector: string
): Promise<SelectorMatch[]> {
// Use cached document root if available, otherwise fetch it.
let rootNodeId: number
if ('getDocumentRootNodeId' in session && session.getDocumentRootNodeId) {
rootNodeId = await session.getDocumentRootNodeId()
} else {
const doc = await session.send<{ root: { nodeId: number } }>('DOM.getDocument')
rootNodeId = doc.root.nodeId
}
const { nodeIds } = await session.send<{ nodeIds: number[] }>(
'DOM.querySelectorAll',
{
nodeId: rootNodeId,
selector,
}
)
// Convert each frontend nodeId to its stable backendNodeId in parallel.
const matches: SelectorMatch[] = []
const describeResults = await Promise.all(
nodeIds.map(async (nodeId) => {
try {
const result = await session.send<{ node: { backendNodeId: number } }>('DOM.describeNode', { nodeId })
return { nodeId, backendNodeId: result.node.backendNodeId }
} catch {
return null
}
})
)
for (const match of describeResults) {
if (match !== null) {
matches.push(match)
}
}
return matches
}
/**
* Resolve a single selector to one backend node ID, or null if none match.
*/
export async function resolveOneSelector(
session: CDPSession,
selector: string
): Promise<number | null> {
const matches = await resolveSelector(session, selector)
return matches.length > 0 ? matches[0].backendNodeId : null
}