v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,973 @@
|
||||
/**
|
||||
* Main Extraction Orchestrator
|
||||
*
|
||||
* Consumes an extractor request plan, coordinates CDP session
|
||||
* management, DOM resolution, geometry capture, style extraction,
|
||||
* and topology building.
|
||||
*
|
||||
* Returns raw browser facts matching the geometry world schema,
|
||||
* with provenance metadata and graceful partial handling.
|
||||
*/
|
||||
|
||||
import type { DiagnosticCode } from 'imhotep-core'
|
||||
import type { CDPSession, CDPSessionManager } from './session.js'
|
||||
import { extractDOM, resolveSelector, type DOMExtractionResult, type SelectorMatch } from './dom.js'
|
||||
import {
|
||||
extractBoxModels,
|
||||
extractFragments,
|
||||
extractTransforms,
|
||||
extractVisualBoxes,
|
||||
type GeometryExtractionResult,
|
||||
type BoxRecord,
|
||||
type FragmentRecord,
|
||||
type TransformRecord,
|
||||
} from './geometry.js'
|
||||
import { extractStyles, type StyleRecord, type StyleExtractionConfig } from './styles.js'
|
||||
import { extractTopology, type TopologyExtractionResult } from './topology.js'
|
||||
|
||||
/**
|
||||
* Extractor request plan.
|
||||
*/
|
||||
export interface ExtractorRequest {
|
||||
requestId: string
|
||||
sceneTarget: {
|
||||
pageRef: unknown
|
||||
url: string
|
||||
}
|
||||
env: {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
}
|
||||
subjects: Array<{ id: string; selector: string; nodeId?: number; backendNodeId?: number }>
|
||||
requiredFacts: {
|
||||
geometry?: boolean
|
||||
fragments?: boolean
|
||||
styles?: boolean | string[]
|
||||
topology?: boolean | string[]
|
||||
text?: boolean
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provenance entry for a fact.
|
||||
*/
|
||||
export interface ProvenanceEntry {
|
||||
factId: number
|
||||
extractionStepId: number
|
||||
sourceKind: number
|
||||
sourceRef: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Confidence entry for a fact.
|
||||
*/
|
||||
export interface ConfidenceEntry {
|
||||
factId: number
|
||||
confidence: number
|
||||
reasonCode: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Extraction trace entry.
|
||||
*/
|
||||
export interface ExtractionTraceEntry {
|
||||
stepId: number
|
||||
factKind: string
|
||||
status: 'ok' | 'partial' | 'error'
|
||||
}
|
||||
|
||||
/**
|
||||
* Extractor response.
|
||||
*
|
||||
* Canonical shape uses snapshots (array). The singular snapshot field is
|
||||
* kept for backward compatibility but deprecated — use snapshots[0].
|
||||
*/
|
||||
export interface ExtractorResponse {
|
||||
requestId: string
|
||||
status: 'ok' | 'partial' | 'error'
|
||||
/** @deprecated Use snapshots[0] instead. Kept for backward compatibility. */
|
||||
snapshot: GeometryWorldSnapshot
|
||||
/** Canonical shape: array of snapshots for multi-state extraction. */
|
||||
snapshots: GeometryWorldSnapshot[]
|
||||
diagnostics: ExtractorDiagnostic[]
|
||||
extractionTrace: {
|
||||
steps: ExtractionTraceEntry[]
|
||||
timings: Array<{ stepId: number; startMs: number; endMs: number }>
|
||||
protocolCalls: Array<{ stepId: number; protocol: string; method: string; params: Record<string, unknown> }>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic for extraction failures.
|
||||
*/
|
||||
export interface ExtractorDiagnostic {
|
||||
code: DiagnosticCode
|
||||
severity: 'warning' | 'error'
|
||||
message: string
|
||||
subjectId?: string
|
||||
selector?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Geometry world snapshot.
|
||||
* Mirrors the geometry world schema from the core contracts.
|
||||
*/
|
||||
export interface GeometryWorldSnapshot {
|
||||
sceneId: string
|
||||
snapshotId: string
|
||||
env: {
|
||||
viewportWidth: number
|
||||
viewportHeight: number
|
||||
deviceScaleFactor: number
|
||||
colorScheme: string
|
||||
pointer: string
|
||||
hover: boolean
|
||||
reducedMotion: string
|
||||
locale: string
|
||||
writingMode: string
|
||||
}
|
||||
source: {
|
||||
url: string
|
||||
browserName: string
|
||||
browserVersion: string
|
||||
engine: string
|
||||
extractedAt: string
|
||||
}
|
||||
strings: string[]
|
||||
subjects: {
|
||||
ids: number[]
|
||||
domNodeId: number[]
|
||||
subjectKind: number[]
|
||||
primaryBoxId: number[]
|
||||
firstFragmentId: number[]
|
||||
fragmentCount: number[]
|
||||
firstTextRunId: number[]
|
||||
textRunCount: number[]
|
||||
}
|
||||
dom: DOMExtractionResult
|
||||
frames: {
|
||||
frameId: number[]
|
||||
frameKind: number[]
|
||||
ownerSubjectId: number[]
|
||||
parentFrameId: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
axisMatrixStart: number[]
|
||||
clipRectId: number[]
|
||||
scrollContainerId: number[]
|
||||
writingMode: number[]
|
||||
}
|
||||
matrices: {
|
||||
values: number[]
|
||||
}
|
||||
rects: {
|
||||
rectId: number[]
|
||||
left: number[]
|
||||
top: number[]
|
||||
right: number[]
|
||||
bottom: number[]
|
||||
}
|
||||
boxes: {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
visualBoxes: {
|
||||
boxId: number[]
|
||||
subjectId: number[]
|
||||
frameId: number[]
|
||||
borderLeft: number[]
|
||||
borderTop: number[]
|
||||
borderRight: number[]
|
||||
borderBottom: number[]
|
||||
paddingLeft: number[]
|
||||
paddingTop: number[]
|
||||
paddingRight: number[]
|
||||
paddingBottom: number[]
|
||||
contentLeft: number[]
|
||||
contentTop: number[]
|
||||
contentRight: number[]
|
||||
contentBottom: number[]
|
||||
}
|
||||
fragments: {
|
||||
fragmentId: number[]
|
||||
subjectId: number[]
|
||||
fragmentKind: number[]
|
||||
boxLeft: number[]
|
||||
boxTop: number[]
|
||||
boxRight: number[]
|
||||
boxBottom: number[]
|
||||
lineIndex: number[]
|
||||
flowIndex: number[]
|
||||
parentFragmentId: number[]
|
||||
}
|
||||
transforms: {
|
||||
transformId: number[]
|
||||
subjectId: number[]
|
||||
matrixStart: number[]
|
||||
matrixLength: number[]
|
||||
originX: number[]
|
||||
originY: number[]
|
||||
}
|
||||
styles: {
|
||||
subjectId: number[]
|
||||
display: number[]
|
||||
position: number[]
|
||||
zIndexKind: number[]
|
||||
zIndexValue: number[]
|
||||
overflowX: number[]
|
||||
overflowY: number[]
|
||||
opacity: number[]
|
||||
visibility: number[]
|
||||
containFlags: number[]
|
||||
pointerEvents: number[]
|
||||
lineHeight: number[]
|
||||
fontFamilyStringId: number[]
|
||||
fontSize: number[]
|
||||
fontWeight: number[]
|
||||
}
|
||||
topology: TopologyExtractionResult['topology']
|
||||
scroll: TopologyExtractionResult['scroll']
|
||||
clipping: TopologyExtractionResult['clipping']
|
||||
provenance: ProvenanceEntry[]
|
||||
confidence: ConfidenceEntry[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Main extractor class.
|
||||
*
|
||||
* Orchestrates the full extraction pipeline:
|
||||
* 1. Attach CDP session
|
||||
* 2. Resolve selectors to node IDs
|
||||
* 3. Extract DOM tree
|
||||
* 4. Extract geometry (boxes, fragments, transforms)
|
||||
* 5. Extract styles
|
||||
* 6. Extract topology
|
||||
* 7. Build geometry world snapshot
|
||||
* 8. Return response with provenance and diagnostics
|
||||
*/
|
||||
export class CDPExtractor {
|
||||
private readonly sessionManager: CDPSessionManager
|
||||
|
||||
constructor(sessionManager: CDPSessionManager) {
|
||||
this.sessionManager = sessionManager
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute extraction according to the request plan.
|
||||
*/
|
||||
async extract(request: ExtractorRequest): Promise<ExtractorResponse> {
|
||||
const startTime = Date.now()
|
||||
const diagnostics: ExtractorDiagnostic[] = []
|
||||
const traceSteps: ExtractionTraceEntry[] = []
|
||||
const traceTimings: Array<{ stepId: number; startMs: number; endMs: number }> = []
|
||||
const traceCalls: Array<{
|
||||
stepId: number
|
||||
protocol: string
|
||||
method: string
|
||||
params: Record<string, unknown>
|
||||
}> = []
|
||||
|
||||
function recordStep(
|
||||
stepId: number,
|
||||
factKind: string,
|
||||
status: 'ok' | 'partial' | 'error',
|
||||
startMs: number,
|
||||
endMs: number
|
||||
) {
|
||||
traceSteps.push({ stepId, factKind, status })
|
||||
traceTimings.push({ stepId, startMs, endMs })
|
||||
}
|
||||
|
||||
let session: CDPSession
|
||||
try {
|
||||
session = await this.sessionManager.attach()
|
||||
// Enable required CDP domains before extraction.
|
||||
// Use cached enablement to avoid redundant round-trips.
|
||||
await this.sessionManager.enableDomain('DOM')
|
||||
await this.sessionManager.enableDomain('CSS')
|
||||
await this.sessionManager.enableDomain('Runtime')
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_CDP_SESSION_ATTACH_FAILED',
|
||||
severity: 'error',
|
||||
message: `Failed to attach CDP session: ${message}`,
|
||||
})
|
||||
const emptySnapshot = createEmptySnapshot(request)
|
||||
return {
|
||||
requestId: request.requestId,
|
||||
status: 'error',
|
||||
snapshot: emptySnapshot,
|
||||
snapshots: [emptySnapshot],
|
||||
diagnostics,
|
||||
extractionTrace: {
|
||||
steps: traceSteps,
|
||||
timings: traceTimings,
|
||||
protocolCalls: traceCalls,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 1: Extract DOM ---
|
||||
// Extract DOM first so the full tree is pushed to the frontend.
|
||||
// This ensures backendNodeIds returned by querySelectorAll remain valid.
|
||||
const domStepStart = Date.now()
|
||||
let domResult: DOMExtractionResult
|
||||
try {
|
||||
domResult = await extractDOM(session)
|
||||
recordStep(1, 'dom-tree', 'ok', domStepStart, Date.now())
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_DOM_EXTRACTION_FAILED',
|
||||
severity: 'error',
|
||||
message: `DOM extraction failed: ${message}`,
|
||||
})
|
||||
domResult = { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 }
|
||||
recordStep(1, 'dom-tree', 'error', domStepStart, Date.now())
|
||||
}
|
||||
|
||||
// --- Step 2: Resolve selectors ---
|
||||
const selectorStepStart = Date.now()
|
||||
const backendNodeIds: number[] = []
|
||||
const nodeIds: number[] = []
|
||||
const subjectIds: number[] = []
|
||||
const selectorDiagnosticsStart = diagnostics.length
|
||||
const resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }> = []
|
||||
|
||||
// Resolve all selectors sequentially to avoid CDP race conditions
|
||||
// when multiple querySelectorAll calls run concurrently.
|
||||
// Callers that already resolved selectors can pass nodeId/backendNodeId
|
||||
// to avoid a duplicate querySelectorAll + describeNode pass.
|
||||
const selectorResults: Array<{ subject: typeof request.subjects[0]; matches: Awaited<ReturnType<typeof resolveSelector>>; error: string | null }> = []
|
||||
for (const subject of request.subjects) {
|
||||
if (subject.nodeId !== undefined && subject.backendNodeId !== undefined) {
|
||||
selectorResults.push({
|
||||
subject,
|
||||
matches: [{ nodeId: subject.nodeId, backendNodeId: subject.backendNodeId }],
|
||||
error: null,
|
||||
})
|
||||
continue
|
||||
}
|
||||
try {
|
||||
const matches = await resolveSelector(session, subject.selector)
|
||||
selectorResults.push({ subject, matches, error: null })
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
selectorResults.push({ subject, matches: [], error: message })
|
||||
}
|
||||
}
|
||||
|
||||
for (const { subject, matches, error } of selectorResults) {
|
||||
if (error) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_SELECTOR_RESOLUTION_FAILED',
|
||||
severity: 'error',
|
||||
message: `Failed to resolve selector "${subject.selector}": ${error}`,
|
||||
subjectId: subject.id,
|
||||
selector: subject.selector,
|
||||
})
|
||||
} else if (matches.length === 0) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_SELECTOR_NO_MATCH',
|
||||
severity: 'warning',
|
||||
message: `Selector "${subject.selector}" matched 0 elements.`,
|
||||
subjectId: subject.id,
|
||||
selector: subject.selector,
|
||||
})
|
||||
} else {
|
||||
for (const match of matches) {
|
||||
backendNodeIds.push(match.backendNodeId)
|
||||
nodeIds.push(match.nodeId)
|
||||
subjectIds.push(resolvedSubjects.length)
|
||||
resolvedSubjects.push({ ...subject, backendNodeId: match.backendNodeId, nodeId: match.nodeId })
|
||||
}
|
||||
}
|
||||
}
|
||||
const selectorDiagnosticsAdded = diagnostics.length > selectorDiagnosticsStart
|
||||
recordStep(2, 'selector-resolution', selectorDiagnosticsAdded ? 'partial' : 'ok', selectorStepStart, Date.now())
|
||||
|
||||
// --- Step 3: Extract Geometry ---
|
||||
const geometryStepStart = Date.now()
|
||||
let boxRecords: BoxRecord[] = []
|
||||
let fragmentRecords: FragmentRecord[] = []
|
||||
let transformRecords: TransformRecord[] = []
|
||||
let matrixValues: number[] = []
|
||||
|
||||
let visualBoxRecords: BoxRecord[] = []
|
||||
|
||||
if (request.requiredFacts.geometry !== false && backendNodeIds.length > 0) {
|
||||
try {
|
||||
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
|
||||
boxRecords = boxes
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_BOX_MODEL_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Box model extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_BOX_MODEL_FAILED',
|
||||
severity: 'error',
|
||||
message: `Box model extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
|
||||
// Extract visual boxes (post-transform coordinates via getBoundingClientRect)
|
||||
try {
|
||||
const { boxes, errors } = await extractVisualBoxes(session, backendNodeIds, subjectIds)
|
||||
visualBoxRecords = boxes
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_VISUAL_BOX_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Visual box extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_VISUAL_BOX_FAILED',
|
||||
severity: 'error',
|
||||
message: `Visual box extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
|
||||
if (request.requiredFacts.fragments) {
|
||||
try {
|
||||
const { fragments, errors } = await extractFragments(session, backendNodeIds, subjectIds)
|
||||
fragmentRecords = fragments
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_FRAGMENT_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Fragment extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_FRAGMENT_FAILED',
|
||||
severity: 'error',
|
||||
message: `Fragment extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const { transforms, matrices, errors } = await extractTransforms(session, backendNodeIds, subjectIds)
|
||||
transformRecords = transforms
|
||||
matrixValues = matrices
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_TRANSFORM_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Transform extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_TRANSFORM_FAILED',
|
||||
severity: 'error',
|
||||
message: `Transform extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(
|
||||
3,
|
||||
'geometry',
|
||||
diagnostics.some((d) => d.code.startsWith('IMH_BOX_MODEL') || d.code.startsWith('IMH_FRAGMENT') || d.code.startsWith('IMH_TRANSFORM'))
|
||||
? 'partial'
|
||||
: 'ok',
|
||||
geometryStepStart,
|
||||
Date.now()
|
||||
)
|
||||
|
||||
// --- Step 4: Extract Styles ---
|
||||
const styleStepStart = Date.now()
|
||||
let styleRecords: StyleRecord[] = []
|
||||
|
||||
if (request.requiredFacts.styles !== false && backendNodeIds.length > 0) {
|
||||
const styleConfig: StyleExtractionConfig =
|
||||
Array.isArray(request.requiredFacts.styles)
|
||||
? { all: false, properties: request.requiredFacts.styles }
|
||||
: { all: true }
|
||||
|
||||
try {
|
||||
const { styles, errors, strings: updatedStrings } = await extractStyles(
|
||||
session,
|
||||
backendNodeIds,
|
||||
subjectIds,
|
||||
styleConfig,
|
||||
domResult.strings
|
||||
)
|
||||
styleRecords = styles
|
||||
domResult.strings = updatedStrings
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_STYLE_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Style extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_STYLE_FAILED',
|
||||
severity: 'error',
|
||||
message: `Style extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(4, 'styles', diagnostics.some((d) => d.code.startsWith('IMH_STYLE')) ? 'partial' : 'ok', styleStepStart, Date.now())
|
||||
|
||||
// --- Step 5: Extract Topology ---
|
||||
const topologyStepStart = Date.now()
|
||||
let topologyResult: TopologyExtractionResult = {
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
}
|
||||
|
||||
if (request.requiredFacts.topology !== false && backendNodeIds.length > 0) {
|
||||
try {
|
||||
const { result, errors } = await extractTopology(session, backendNodeIds, subjectIds)
|
||||
topologyResult = result
|
||||
for (const e of errors) {
|
||||
diagnostics.push({
|
||||
code: 'IMH_TOPOLOGY_PARTIAL',
|
||||
severity: 'warning',
|
||||
message: `Topology extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
diagnostics.push({
|
||||
code: 'IMH_TOPOLOGY_FAILED',
|
||||
severity: 'error',
|
||||
message: `Topology extraction failed: ${message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
recordStep(5, 'topology', diagnostics.some((d) => d.code.startsWith('IMH_TOPOLOGY')) ? 'partial' : 'ok', topologyStepStart, Date.now())
|
||||
|
||||
// --- Build Geometry World Snapshot ---
|
||||
const snapshot = buildSnapshot(
|
||||
request,
|
||||
domResult,
|
||||
boxRecords,
|
||||
visualBoxRecords,
|
||||
fragmentRecords,
|
||||
transformRecords,
|
||||
matrixValues,
|
||||
styleRecords,
|
||||
topologyResult,
|
||||
resolvedSubjects
|
||||
)
|
||||
|
||||
// Determine overall status
|
||||
const hasErrors = diagnostics.some((d) => d.severity === 'error')
|
||||
const hasWarnings = diagnostics.some((d) => d.severity === 'warning')
|
||||
const status: ExtractorResponse['status'] = hasErrors ? 'error' : hasWarnings ? 'partial' : 'ok'
|
||||
|
||||
// Add provenance for extraction steps
|
||||
let factId = 0
|
||||
const provenance: ProvenanceEntry[] = []
|
||||
for (const step of traceSteps) {
|
||||
provenance.push({
|
||||
factId: factId++,
|
||||
extractionStepId: step.stepId,
|
||||
sourceKind: 1, // CDP protocol
|
||||
sourceRef: step.stepId,
|
||||
})
|
||||
}
|
||||
snapshot.provenance = provenance
|
||||
|
||||
return {
|
||||
requestId: request.requestId,
|
||||
status,
|
||||
snapshot,
|
||||
snapshots: [snapshot],
|
||||
diagnostics,
|
||||
extractionTrace: {
|
||||
steps: traceSteps,
|
||||
timings: traceTimings,
|
||||
protocolCalls: traceCalls,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a geometry world snapshot from extracted raw data.
|
||||
*/
|
||||
function buildSnapshot(
|
||||
request: ExtractorRequest,
|
||||
dom: DOMExtractionResult,
|
||||
boxes: BoxRecord[],
|
||||
visualBoxes: BoxRecord[],
|
||||
fragments: FragmentRecord[],
|
||||
transforms: TransformRecord[],
|
||||
matrices: number[],
|
||||
styles: StyleRecord[],
|
||||
topology: TopologyExtractionResult,
|
||||
resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }>
|
||||
): GeometryWorldSnapshot {
|
||||
// Build subjects table
|
||||
const subjectIds: number[] = []
|
||||
const domNodeIds: number[] = []
|
||||
const subjectKinds: number[] = []
|
||||
const primaryBoxIds: number[] = []
|
||||
const firstFragmentIds: number[] = []
|
||||
const fragmentCounts: number[] = []
|
||||
const firstTextRunIds: number[] = []
|
||||
const textRunCounts: number[] = []
|
||||
|
||||
const boxBySubject = new Map<number, number>()
|
||||
for (const b of boxes) {
|
||||
boxBySubject.set(b.subjectId, b.boxId)
|
||||
}
|
||||
|
||||
const fragmentsBySubject = new Map<number, number[]>()
|
||||
for (const f of fragments) {
|
||||
if (!fragmentsBySubject.has(f.subjectId)) {
|
||||
fragmentsBySubject.set(f.subjectId, [])
|
||||
}
|
||||
fragmentsBySubject.get(f.subjectId)!.push(f.fragmentId)
|
||||
}
|
||||
|
||||
for (let i = 0; i < resolvedSubjects.length; i++) {
|
||||
const rs = resolvedSubjects[i]
|
||||
subjectIds.push(i)
|
||||
domNodeIds.push(rs.backendNodeId)
|
||||
subjectKinds.push(1) // element
|
||||
primaryBoxIds.push(boxBySubject.get(i) ?? 0)
|
||||
|
||||
const frags = fragmentsBySubject.get(i)
|
||||
if (frags && frags.length > 0) {
|
||||
firstFragmentIds.push(frags[0])
|
||||
fragmentCounts.push(frags.length)
|
||||
} else {
|
||||
firstFragmentIds.push(0)
|
||||
fragmentCounts.push(0)
|
||||
}
|
||||
|
||||
firstTextRunIds.push(0)
|
||||
textRunCounts.push(0)
|
||||
}
|
||||
|
||||
// Build boxes table arrays
|
||||
const boxesTable = {
|
||||
boxId: boxes.map((b) => b.boxId),
|
||||
subjectId: boxes.map((b) => b.subjectId),
|
||||
frameId: boxes.map((b) => b.frameId),
|
||||
borderLeft: boxes.map((b) => b.borderLeft),
|
||||
borderTop: boxes.map((b) => b.borderTop),
|
||||
borderRight: boxes.map((b) => b.borderRight),
|
||||
borderBottom: boxes.map((b) => b.borderBottom),
|
||||
paddingLeft: boxes.map((b) => b.paddingLeft),
|
||||
paddingTop: boxes.map((b) => b.paddingTop),
|
||||
paddingRight: boxes.map((b) => b.paddingRight),
|
||||
paddingBottom: boxes.map((b) => b.paddingBottom),
|
||||
contentLeft: boxes.map((b) => b.contentLeft),
|
||||
contentTop: boxes.map((b) => b.contentTop),
|
||||
contentRight: boxes.map((b) => b.contentRight),
|
||||
contentBottom: boxes.map((b) => b.contentBottom),
|
||||
}
|
||||
|
||||
const visualBoxesTable = {
|
||||
boxId: visualBoxes.map((b) => b.boxId),
|
||||
subjectId: visualBoxes.map((b) => b.subjectId),
|
||||
frameId: visualBoxes.map((b) => b.frameId),
|
||||
borderLeft: visualBoxes.map((b) => b.borderLeft),
|
||||
borderTop: visualBoxes.map((b) => b.borderTop),
|
||||
borderRight: visualBoxes.map((b) => b.borderRight),
|
||||
borderBottom: visualBoxes.map((b) => b.borderBottom),
|
||||
paddingLeft: visualBoxes.map((b) => b.paddingLeft),
|
||||
paddingTop: visualBoxes.map((b) => b.paddingTop),
|
||||
paddingRight: visualBoxes.map((b) => b.paddingRight),
|
||||
paddingBottom: visualBoxes.map((b) => b.paddingBottom),
|
||||
contentLeft: visualBoxes.map((b) => b.contentLeft),
|
||||
contentTop: visualBoxes.map((b) => b.contentTop),
|
||||
contentRight: visualBoxes.map((b) => b.contentRight),
|
||||
contentBottom: visualBoxes.map((b) => b.contentBottom),
|
||||
}
|
||||
|
||||
const fragmentsTable = {
|
||||
fragmentId: fragments.map((f) => f.fragmentId),
|
||||
subjectId: fragments.map((f) => f.subjectId),
|
||||
fragmentKind: fragments.map((f) => f.fragmentKind),
|
||||
boxLeft: fragments.map((f) => f.boxLeft),
|
||||
boxTop: fragments.map((f) => f.boxTop),
|
||||
boxRight: fragments.map((f) => f.boxRight),
|
||||
boxBottom: fragments.map((f) => f.boxBottom),
|
||||
lineIndex: fragments.map((f) => f.lineIndex),
|
||||
flowIndex: fragments.map((f) => f.flowIndex),
|
||||
parentFragmentId: fragments.map((f) => f.parentFragmentId),
|
||||
}
|
||||
|
||||
const transformsTable = {
|
||||
transformId: transforms.map((t) => t.transformId),
|
||||
subjectId: transforms.map((t) => t.subjectId),
|
||||
matrixStart: transforms.map((t) => t.matrixStart),
|
||||
matrixLength: transforms.map((t) => t.matrixLength),
|
||||
originX: transforms.map((t) => t.originX),
|
||||
originY: transforms.map((t) => t.originY),
|
||||
}
|
||||
|
||||
const stylesTable = {
|
||||
subjectId: styles.map((s) => s.subjectId),
|
||||
display: styles.map((s) => s.display),
|
||||
position: styles.map((s) => s.position),
|
||||
zIndexKind: styles.map((s) => s.zIndexKind),
|
||||
zIndexValue: styles.map((s) => s.zIndexValue),
|
||||
overflowX: styles.map((s) => s.overflowX),
|
||||
overflowY: styles.map((s) => s.overflowY),
|
||||
opacity: styles.map((s) => s.opacity),
|
||||
visibility: styles.map((s) => s.visibility),
|
||||
containFlags: styles.map((s) => s.containFlags),
|
||||
pointerEvents: styles.map((s) => s.pointerEvents),
|
||||
lineHeight: styles.map((s) => s.lineHeight),
|
||||
fontFamilyStringId: styles.map((s) => s.fontFamilyStringId),
|
||||
fontSize: styles.map((s) => s.fontSize),
|
||||
fontWeight: styles.map((s) => s.fontWeight),
|
||||
}
|
||||
|
||||
return {
|
||||
sceneId: request.requestId,
|
||||
snapshotId: 'default',
|
||||
env: {
|
||||
viewportWidth: request.env.viewportWidth,
|
||||
viewportHeight: request.env.viewportHeight,
|
||||
deviceScaleFactor: 1,
|
||||
colorScheme: request.env.colorScheme,
|
||||
pointer: request.env.pointer,
|
||||
hover: false,
|
||||
reducedMotion: 'no-preference',
|
||||
locale: 'en',
|
||||
writingMode: 'horizontal-tb',
|
||||
},
|
||||
source: {
|
||||
url: request.sceneTarget.url,
|
||||
browserName: 'chromium',
|
||||
browserVersion: '',
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: new Date().toISOString(),
|
||||
},
|
||||
strings: dom.strings,
|
||||
subjects: {
|
||||
ids: subjectIds,
|
||||
domNodeId: domNodeIds,
|
||||
subjectKind: subjectKinds,
|
||||
primaryBoxId: primaryBoxIds,
|
||||
firstFragmentId: firstFragmentIds,
|
||||
fragmentCount: fragmentCounts,
|
||||
firstTextRunId: firstTextRunIds,
|
||||
textRunCount: textRunCounts,
|
||||
},
|
||||
dom,
|
||||
frames: {
|
||||
frameId: [],
|
||||
frameKind: [],
|
||||
ownerSubjectId: [],
|
||||
parentFrameId: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
axisMatrixStart: [],
|
||||
clipRectId: [],
|
||||
scrollContainerId: [],
|
||||
writingMode: [],
|
||||
},
|
||||
matrices: { values: matrices },
|
||||
rects: {
|
||||
rectId: [],
|
||||
left: [],
|
||||
top: [],
|
||||
right: [],
|
||||
bottom: [],
|
||||
},
|
||||
boxes: boxesTable,
|
||||
visualBoxes: visualBoxesTable,
|
||||
fragments: fragmentsTable,
|
||||
transforms: transformsTable,
|
||||
styles: stylesTable,
|
||||
topology: topology.topology,
|
||||
scroll: topology.scroll,
|
||||
clipping: topology.clipping,
|
||||
provenance: [],
|
||||
confidence: [],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an empty snapshot for error cases.
|
||||
*/
|
||||
function createEmptySnapshot(request: ExtractorRequest): GeometryWorldSnapshot {
|
||||
return {
|
||||
sceneId: request.requestId,
|
||||
snapshotId: 'default',
|
||||
env: {
|
||||
viewportWidth: request.env.viewportWidth,
|
||||
viewportHeight: request.env.viewportHeight,
|
||||
deviceScaleFactor: 1,
|
||||
colorScheme: request.env.colorScheme,
|
||||
pointer: request.env.pointer,
|
||||
hover: false,
|
||||
reducedMotion: 'no-preference',
|
||||
locale: 'en',
|
||||
writingMode: 'horizontal-tb',
|
||||
},
|
||||
source: {
|
||||
url: request.sceneTarget.url,
|
||||
browserName: 'chromium',
|
||||
browserVersion: '',
|
||||
engine: 'chromium-cdp',
|
||||
extractedAt: new Date().toISOString(),
|
||||
},
|
||||
strings: [],
|
||||
subjects: {
|
||||
ids: [],
|
||||
domNodeId: [],
|
||||
subjectKind: [],
|
||||
primaryBoxId: [],
|
||||
firstFragmentId: [],
|
||||
fragmentCount: [],
|
||||
firstTextRunId: [],
|
||||
textRunCount: [],
|
||||
},
|
||||
dom: { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 },
|
||||
frames: {
|
||||
frameId: [],
|
||||
frameKind: [],
|
||||
ownerSubjectId: [],
|
||||
parentFrameId: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
axisMatrixStart: [],
|
||||
clipRectId: [],
|
||||
scrollContainerId: [],
|
||||
writingMode: [],
|
||||
},
|
||||
matrices: { values: [] },
|
||||
rects: {
|
||||
rectId: [],
|
||||
left: [],
|
||||
top: [],
|
||||
right: [],
|
||||
bottom: [],
|
||||
},
|
||||
boxes: {
|
||||
boxId: [],
|
||||
subjectId: [],
|
||||
frameId: [],
|
||||
borderLeft: [],
|
||||
borderTop: [],
|
||||
borderRight: [],
|
||||
borderBottom: [],
|
||||
paddingLeft: [],
|
||||
paddingTop: [],
|
||||
paddingRight: [],
|
||||
paddingBottom: [],
|
||||
contentLeft: [],
|
||||
contentTop: [],
|
||||
contentRight: [],
|
||||
contentBottom: [],
|
||||
},
|
||||
visualBoxes: {
|
||||
boxId: [],
|
||||
subjectId: [],
|
||||
frameId: [],
|
||||
borderLeft: [],
|
||||
borderTop: [],
|
||||
borderRight: [],
|
||||
borderBottom: [],
|
||||
paddingLeft: [],
|
||||
paddingTop: [],
|
||||
paddingRight: [],
|
||||
paddingBottom: [],
|
||||
contentLeft: [],
|
||||
contentTop: [],
|
||||
contentRight: [],
|
||||
contentBottom: [],
|
||||
},
|
||||
fragments: {
|
||||
fragmentId: [],
|
||||
subjectId: [],
|
||||
fragmentKind: [],
|
||||
boxLeft: [],
|
||||
boxTop: [],
|
||||
boxRight: [],
|
||||
boxBottom: [],
|
||||
lineIndex: [],
|
||||
flowIndex: [],
|
||||
parentFragmentId: [],
|
||||
},
|
||||
transforms: {
|
||||
transformId: [],
|
||||
subjectId: [],
|
||||
matrixStart: [],
|
||||
matrixLength: [],
|
||||
originX: [],
|
||||
originY: [],
|
||||
},
|
||||
styles: {
|
||||
subjectId: [],
|
||||
display: [],
|
||||
position: [],
|
||||
zIndexKind: [],
|
||||
zIndexValue: [],
|
||||
overflowX: [],
|
||||
overflowY: [],
|
||||
opacity: [],
|
||||
visibility: [],
|
||||
containFlags: [],
|
||||
pointerEvents: [],
|
||||
lineHeight: [],
|
||||
fontFamilyStringId: [],
|
||||
fontSize: [],
|
||||
fontWeight: [],
|
||||
},
|
||||
topology: {
|
||||
containingBlockOf: [],
|
||||
nearestPositionedAncestorOf: [],
|
||||
scrollContainerOf: [],
|
||||
stackingContextOf: [],
|
||||
formattingContextOf: [],
|
||||
clippingRootOf: [],
|
||||
paintOrderBucket: [],
|
||||
paintOrderIndex: [],
|
||||
},
|
||||
scroll: [],
|
||||
clipping: [],
|
||||
provenance: [],
|
||||
confidence: [],
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user