974 lines
28 KiB
TypeScript
974 lines
28 KiB
TypeScript
/**
|
|
* Main Extraction Orchestrator
|
|
*
|
|
* Consumes an extractor request plan, coordinates CDP session
|
|
* management, DOM resolution, geometry capture, style extraction,
|
|
* and topology building.
|
|
*
|
|
* Returns raw browser facts matching the geometry world schema,
|
|
* with provenance metadata and graceful partial handling.
|
|
*/
|
|
|
|
import type { DiagnosticCode } from 'imhotep-core'
|
|
import type { CDPSession, CDPSessionManager } from './session.js'
|
|
import { extractDOM, resolveSelector, type DOMExtractionResult, type SelectorMatch } from './dom.js'
|
|
import {
|
|
extractBoxModels,
|
|
extractFragments,
|
|
extractTransforms,
|
|
extractVisualBoxes,
|
|
type GeometryExtractionResult,
|
|
type BoxRecord,
|
|
type FragmentRecord,
|
|
type TransformRecord,
|
|
} from './geometry.js'
|
|
import { extractStyles, type StyleRecord, type StyleExtractionConfig } from './styles.js'
|
|
import { extractTopology, type TopologyExtractionResult } from './topology.js'
|
|
|
|
/**
|
|
* Extractor request plan.
|
|
*/
|
|
export interface ExtractorRequest {
|
|
requestId: string
|
|
sceneTarget: {
|
|
pageRef: unknown
|
|
url: string
|
|
}
|
|
env: {
|
|
viewportWidth: number
|
|
viewportHeight: number
|
|
colorScheme: string
|
|
pointer: string
|
|
}
|
|
subjects: Array<{ id: string; selector: string; nodeId?: number; backendNodeId?: number }>
|
|
requiredFacts: {
|
|
geometry?: boolean
|
|
fragments?: boolean
|
|
styles?: boolean | string[]
|
|
topology?: boolean | string[]
|
|
text?: boolean
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Provenance entry for a fact.
|
|
*/
|
|
export interface ProvenanceEntry {
|
|
factId: number
|
|
extractionStepId: number
|
|
sourceKind: number
|
|
sourceRef: number
|
|
}
|
|
|
|
/**
|
|
* Confidence entry for a fact.
|
|
*/
|
|
export interface ConfidenceEntry {
|
|
factId: number
|
|
confidence: number
|
|
reasonCode: number
|
|
}
|
|
|
|
/**
|
|
* Extraction trace entry.
|
|
*/
|
|
export interface ExtractionTraceEntry {
|
|
stepId: number
|
|
factKind: string
|
|
status: 'ok' | 'partial' | 'error'
|
|
}
|
|
|
|
/**
|
|
* Extractor response.
|
|
*
|
|
* Canonical shape uses snapshots (array). The singular snapshot field is
|
|
* kept for backward compatibility but deprecated — use snapshots[0].
|
|
*/
|
|
export interface ExtractorResponse {
|
|
requestId: string
|
|
status: 'ok' | 'partial' | 'error'
|
|
/** @deprecated Use snapshots[0] instead. Kept for backward compatibility. */
|
|
snapshot: GeometryWorldSnapshot
|
|
/** Canonical shape: array of snapshots for multi-state extraction. */
|
|
snapshots: GeometryWorldSnapshot[]
|
|
diagnostics: ExtractorDiagnostic[]
|
|
extractionTrace: {
|
|
steps: ExtractionTraceEntry[]
|
|
timings: Array<{ stepId: number; startMs: number; endMs: number }>
|
|
protocolCalls: Array<{ stepId: number; protocol: string; method: string; params: Record<string, unknown> }>
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Diagnostic for extraction failures.
|
|
*/
|
|
export interface ExtractorDiagnostic {
|
|
code: DiagnosticCode
|
|
severity: 'warning' | 'error'
|
|
message: string
|
|
subjectId?: string
|
|
selector?: string
|
|
}
|
|
|
|
/**
|
|
* Geometry world snapshot.
|
|
* Mirrors the geometry world schema from the core contracts.
|
|
*/
|
|
export interface GeometryWorldSnapshot {
|
|
sceneId: string
|
|
snapshotId: string
|
|
env: {
|
|
viewportWidth: number
|
|
viewportHeight: number
|
|
deviceScaleFactor: number
|
|
colorScheme: string
|
|
pointer: string
|
|
hover: boolean
|
|
reducedMotion: string
|
|
locale: string
|
|
writingMode: string
|
|
}
|
|
source: {
|
|
url: string
|
|
browserName: string
|
|
browserVersion: string
|
|
engine: string
|
|
extractedAt: string
|
|
}
|
|
strings: string[]
|
|
subjects: {
|
|
ids: number[]
|
|
domNodeId: number[]
|
|
subjectKind: number[]
|
|
primaryBoxId: number[]
|
|
firstFragmentId: number[]
|
|
fragmentCount: number[]
|
|
firstTextRunId: number[]
|
|
textRunCount: number[]
|
|
}
|
|
dom: DOMExtractionResult
|
|
frames: {
|
|
frameId: number[]
|
|
frameKind: number[]
|
|
ownerSubjectId: number[]
|
|
parentFrameId: number[]
|
|
originX: number[]
|
|
originY: number[]
|
|
axisMatrixStart: number[]
|
|
clipRectId: number[]
|
|
scrollContainerId: number[]
|
|
writingMode: number[]
|
|
}
|
|
matrices: {
|
|
values: number[]
|
|
}
|
|
rects: {
|
|
rectId: number[]
|
|
left: number[]
|
|
top: number[]
|
|
right: number[]
|
|
bottom: number[]
|
|
}
|
|
boxes: {
|
|
boxId: number[]
|
|
subjectId: number[]
|
|
frameId: number[]
|
|
borderLeft: number[]
|
|
borderTop: number[]
|
|
borderRight: number[]
|
|
borderBottom: number[]
|
|
paddingLeft: number[]
|
|
paddingTop: number[]
|
|
paddingRight: number[]
|
|
paddingBottom: number[]
|
|
contentLeft: number[]
|
|
contentTop: number[]
|
|
contentRight: number[]
|
|
contentBottom: number[]
|
|
}
|
|
visualBoxes: {
|
|
boxId: number[]
|
|
subjectId: number[]
|
|
frameId: number[]
|
|
borderLeft: number[]
|
|
borderTop: number[]
|
|
borderRight: number[]
|
|
borderBottom: number[]
|
|
paddingLeft: number[]
|
|
paddingTop: number[]
|
|
paddingRight: number[]
|
|
paddingBottom: number[]
|
|
contentLeft: number[]
|
|
contentTop: number[]
|
|
contentRight: number[]
|
|
contentBottom: number[]
|
|
}
|
|
fragments: {
|
|
fragmentId: number[]
|
|
subjectId: number[]
|
|
fragmentKind: number[]
|
|
boxLeft: number[]
|
|
boxTop: number[]
|
|
boxRight: number[]
|
|
boxBottom: number[]
|
|
lineIndex: number[]
|
|
flowIndex: number[]
|
|
parentFragmentId: number[]
|
|
}
|
|
transforms: {
|
|
transformId: number[]
|
|
subjectId: number[]
|
|
matrixStart: number[]
|
|
matrixLength: number[]
|
|
originX: number[]
|
|
originY: number[]
|
|
}
|
|
styles: {
|
|
subjectId: number[]
|
|
display: number[]
|
|
position: number[]
|
|
zIndexKind: number[]
|
|
zIndexValue: number[]
|
|
overflowX: number[]
|
|
overflowY: number[]
|
|
opacity: number[]
|
|
visibility: number[]
|
|
containFlags: number[]
|
|
pointerEvents: number[]
|
|
lineHeight: number[]
|
|
fontFamilyStringId: number[]
|
|
fontSize: number[]
|
|
fontWeight: number[]
|
|
}
|
|
topology: TopologyExtractionResult['topology']
|
|
scroll: TopologyExtractionResult['scroll']
|
|
clipping: TopologyExtractionResult['clipping']
|
|
provenance: ProvenanceEntry[]
|
|
confidence: ConfidenceEntry[]
|
|
}
|
|
|
|
/**
|
|
* Main extractor class.
|
|
*
|
|
* Orchestrates the full extraction pipeline:
|
|
* 1. Attach CDP session
|
|
* 2. Resolve selectors to node IDs
|
|
* 3. Extract DOM tree
|
|
* 4. Extract geometry (boxes, fragments, transforms)
|
|
* 5. Extract styles
|
|
* 6. Extract topology
|
|
* 7. Build geometry world snapshot
|
|
* 8. Return response with provenance and diagnostics
|
|
*/
|
|
export class CDPExtractor {
|
|
private readonly sessionManager: CDPSessionManager
|
|
|
|
constructor(sessionManager: CDPSessionManager) {
|
|
this.sessionManager = sessionManager
|
|
}
|
|
|
|
/**
|
|
* Execute extraction according to the request plan.
|
|
*/
|
|
async extract(request: ExtractorRequest): Promise<ExtractorResponse> {
|
|
const startTime = Date.now()
|
|
const diagnostics: ExtractorDiagnostic[] = []
|
|
const traceSteps: ExtractionTraceEntry[] = []
|
|
const traceTimings: Array<{ stepId: number; startMs: number; endMs: number }> = []
|
|
const traceCalls: Array<{
|
|
stepId: number
|
|
protocol: string
|
|
method: string
|
|
params: Record<string, unknown>
|
|
}> = []
|
|
|
|
function recordStep(
|
|
stepId: number,
|
|
factKind: string,
|
|
status: 'ok' | 'partial' | 'error',
|
|
startMs: number,
|
|
endMs: number
|
|
) {
|
|
traceSteps.push({ stepId, factKind, status })
|
|
traceTimings.push({ stepId, startMs, endMs })
|
|
}
|
|
|
|
let session: CDPSession
|
|
try {
|
|
session = await this.sessionManager.attach()
|
|
// Enable required CDP domains before extraction.
|
|
// Use cached enablement to avoid redundant round-trips.
|
|
await this.sessionManager.enableDomain('DOM')
|
|
await this.sessionManager.enableDomain('CSS')
|
|
await this.sessionManager.enableDomain('Runtime')
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_CDP_SESSION_ATTACH_FAILED',
|
|
severity: 'error',
|
|
message: `Failed to attach CDP session: ${message}`,
|
|
})
|
|
const emptySnapshot = createEmptySnapshot(request)
|
|
return {
|
|
requestId: request.requestId,
|
|
status: 'error',
|
|
snapshot: emptySnapshot,
|
|
snapshots: [emptySnapshot],
|
|
diagnostics,
|
|
extractionTrace: {
|
|
steps: traceSteps,
|
|
timings: traceTimings,
|
|
protocolCalls: traceCalls,
|
|
},
|
|
}
|
|
}
|
|
|
|
// --- Step 1: Extract DOM ---
|
|
// Extract DOM first so the full tree is pushed to the frontend.
|
|
// This ensures backendNodeIds returned by querySelectorAll remain valid.
|
|
const domStepStart = Date.now()
|
|
let domResult: DOMExtractionResult
|
|
try {
|
|
domResult = await extractDOM(session)
|
|
recordStep(1, 'dom-tree', 'ok', domStepStart, Date.now())
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_DOM_EXTRACTION_FAILED',
|
|
severity: 'error',
|
|
message: `DOM extraction failed: ${message}`,
|
|
})
|
|
domResult = { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 }
|
|
recordStep(1, 'dom-tree', 'error', domStepStart, Date.now())
|
|
}
|
|
|
|
// --- Step 2: Resolve selectors ---
|
|
const selectorStepStart = Date.now()
|
|
const backendNodeIds: number[] = []
|
|
const nodeIds: number[] = []
|
|
const subjectIds: number[] = []
|
|
const selectorDiagnosticsStart = diagnostics.length
|
|
const resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }> = []
|
|
|
|
// Resolve all selectors sequentially to avoid CDP race conditions
|
|
// when multiple querySelectorAll calls run concurrently.
|
|
// Callers that already resolved selectors can pass nodeId/backendNodeId
|
|
// to avoid a duplicate querySelectorAll + describeNode pass.
|
|
const selectorResults: Array<{ subject: typeof request.subjects[0]; matches: Awaited<ReturnType<typeof resolveSelector>>; error: string | null }> = []
|
|
for (const subject of request.subjects) {
|
|
if (subject.nodeId !== undefined && subject.backendNodeId !== undefined) {
|
|
selectorResults.push({
|
|
subject,
|
|
matches: [{ nodeId: subject.nodeId, backendNodeId: subject.backendNodeId }],
|
|
error: null,
|
|
})
|
|
continue
|
|
}
|
|
try {
|
|
const matches = await resolveSelector(session, subject.selector)
|
|
selectorResults.push({ subject, matches, error: null })
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
selectorResults.push({ subject, matches: [], error: message })
|
|
}
|
|
}
|
|
|
|
for (const { subject, matches, error } of selectorResults) {
|
|
if (error) {
|
|
diagnostics.push({
|
|
code: 'IMH_SELECTOR_RESOLUTION_FAILED',
|
|
severity: 'error',
|
|
message: `Failed to resolve selector "${subject.selector}": ${error}`,
|
|
subjectId: subject.id,
|
|
selector: subject.selector,
|
|
})
|
|
} else if (matches.length === 0) {
|
|
diagnostics.push({
|
|
code: 'IMH_SELECTOR_NO_MATCH',
|
|
severity: 'warning',
|
|
message: `Selector "${subject.selector}" matched 0 elements.`,
|
|
subjectId: subject.id,
|
|
selector: subject.selector,
|
|
})
|
|
} else {
|
|
for (const match of matches) {
|
|
backendNodeIds.push(match.backendNodeId)
|
|
nodeIds.push(match.nodeId)
|
|
subjectIds.push(resolvedSubjects.length)
|
|
resolvedSubjects.push({ ...subject, backendNodeId: match.backendNodeId, nodeId: match.nodeId })
|
|
}
|
|
}
|
|
}
|
|
const selectorDiagnosticsAdded = diagnostics.length > selectorDiagnosticsStart
|
|
recordStep(2, 'selector-resolution', selectorDiagnosticsAdded ? 'partial' : 'ok', selectorStepStart, Date.now())
|
|
|
|
// --- Step 3: Extract Geometry ---
|
|
const geometryStepStart = Date.now()
|
|
let boxRecords: BoxRecord[] = []
|
|
let fragmentRecords: FragmentRecord[] = []
|
|
let transformRecords: TransformRecord[] = []
|
|
let matrixValues: number[] = []
|
|
|
|
let visualBoxRecords: BoxRecord[] = []
|
|
|
|
if (request.requiredFacts.geometry !== false && backendNodeIds.length > 0) {
|
|
try {
|
|
const { boxes, errors } = await extractBoxModels(session, backendNodeIds, subjectIds)
|
|
boxRecords = boxes
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_BOX_MODEL_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Box model extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_BOX_MODEL_FAILED',
|
|
severity: 'error',
|
|
message: `Box model extraction failed: ${message}`,
|
|
})
|
|
}
|
|
|
|
// Extract visual boxes (post-transform coordinates via getBoundingClientRect)
|
|
try {
|
|
const { boxes, errors } = await extractVisualBoxes(session, backendNodeIds, subjectIds)
|
|
visualBoxRecords = boxes
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_VISUAL_BOX_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Visual box extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_VISUAL_BOX_FAILED',
|
|
severity: 'error',
|
|
message: `Visual box extraction failed: ${message}`,
|
|
})
|
|
}
|
|
|
|
if (request.requiredFacts.fragments) {
|
|
try {
|
|
const { fragments, errors } = await extractFragments(session, backendNodeIds, subjectIds)
|
|
fragmentRecords = fragments
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_FRAGMENT_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Fragment extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_FRAGMENT_FAILED',
|
|
severity: 'error',
|
|
message: `Fragment extraction failed: ${message}`,
|
|
})
|
|
}
|
|
}
|
|
|
|
try {
|
|
const { transforms, matrices, errors } = await extractTransforms(session, backendNodeIds, subjectIds)
|
|
transformRecords = transforms
|
|
matrixValues = matrices
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_TRANSFORM_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Transform extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_TRANSFORM_FAILED',
|
|
severity: 'error',
|
|
message: `Transform extraction failed: ${message}`,
|
|
})
|
|
}
|
|
}
|
|
recordStep(
|
|
3,
|
|
'geometry',
|
|
diagnostics.some((d) => d.code.startsWith('IMH_BOX_MODEL') || d.code.startsWith('IMH_FRAGMENT') || d.code.startsWith('IMH_TRANSFORM'))
|
|
? 'partial'
|
|
: 'ok',
|
|
geometryStepStart,
|
|
Date.now()
|
|
)
|
|
|
|
// --- Step 4: Extract Styles ---
|
|
const styleStepStart = Date.now()
|
|
let styleRecords: StyleRecord[] = []
|
|
|
|
if (request.requiredFacts.styles !== false && backendNodeIds.length > 0) {
|
|
const styleConfig: StyleExtractionConfig =
|
|
Array.isArray(request.requiredFacts.styles)
|
|
? { all: false, properties: request.requiredFacts.styles }
|
|
: { all: true }
|
|
|
|
try {
|
|
const { styles, errors, strings: updatedStrings } = await extractStyles(
|
|
session,
|
|
backendNodeIds,
|
|
subjectIds,
|
|
styleConfig,
|
|
domResult.strings
|
|
)
|
|
styleRecords = styles
|
|
domResult.strings = updatedStrings
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_STYLE_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Style extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_STYLE_FAILED',
|
|
severity: 'error',
|
|
message: `Style extraction failed: ${message}`,
|
|
})
|
|
}
|
|
}
|
|
recordStep(4, 'styles', diagnostics.some((d) => d.code.startsWith('IMH_STYLE')) ? 'partial' : 'ok', styleStepStart, Date.now())
|
|
|
|
// --- Step 5: Extract Topology ---
|
|
const topologyStepStart = Date.now()
|
|
let topologyResult: TopologyExtractionResult = {
|
|
scroll: [],
|
|
clipping: [],
|
|
topology: {
|
|
containingBlockOf: [],
|
|
nearestPositionedAncestorOf: [],
|
|
scrollContainerOf: [],
|
|
stackingContextOf: [],
|
|
formattingContextOf: [],
|
|
clippingRootOf: [],
|
|
paintOrderBucket: [],
|
|
paintOrderIndex: [],
|
|
},
|
|
}
|
|
|
|
if (request.requiredFacts.topology !== false && backendNodeIds.length > 0) {
|
|
try {
|
|
const { result, errors } = await extractTopology(session, backendNodeIds, subjectIds)
|
|
topologyResult = result
|
|
for (const e of errors) {
|
|
diagnostics.push({
|
|
code: 'IMH_TOPOLOGY_PARTIAL',
|
|
severity: 'warning',
|
|
message: `Topology extraction failed for node ${e.backendNodeId}: ${e.reason}`,
|
|
})
|
|
}
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err)
|
|
diagnostics.push({
|
|
code: 'IMH_TOPOLOGY_FAILED',
|
|
severity: 'error',
|
|
message: `Topology extraction failed: ${message}`,
|
|
})
|
|
}
|
|
}
|
|
recordStep(5, 'topology', diagnostics.some((d) => d.code.startsWith('IMH_TOPOLOGY')) ? 'partial' : 'ok', topologyStepStart, Date.now())
|
|
|
|
// --- Build Geometry World Snapshot ---
|
|
const snapshot = buildSnapshot(
|
|
request,
|
|
domResult,
|
|
boxRecords,
|
|
visualBoxRecords,
|
|
fragmentRecords,
|
|
transformRecords,
|
|
matrixValues,
|
|
styleRecords,
|
|
topologyResult,
|
|
resolvedSubjects
|
|
)
|
|
|
|
// Determine overall status
|
|
const hasErrors = diagnostics.some((d) => d.severity === 'error')
|
|
const hasWarnings = diagnostics.some((d) => d.severity === 'warning')
|
|
const status: ExtractorResponse['status'] = hasErrors ? 'error' : hasWarnings ? 'partial' : 'ok'
|
|
|
|
// Add provenance for extraction steps
|
|
let factId = 0
|
|
const provenance: ProvenanceEntry[] = []
|
|
for (const step of traceSteps) {
|
|
provenance.push({
|
|
factId: factId++,
|
|
extractionStepId: step.stepId,
|
|
sourceKind: 1, // CDP protocol
|
|
sourceRef: step.stepId,
|
|
})
|
|
}
|
|
snapshot.provenance = provenance
|
|
|
|
return {
|
|
requestId: request.requestId,
|
|
status,
|
|
snapshot,
|
|
snapshots: [snapshot],
|
|
diagnostics,
|
|
extractionTrace: {
|
|
steps: traceSteps,
|
|
timings: traceTimings,
|
|
protocolCalls: traceCalls,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build a geometry world snapshot from extracted raw data.
|
|
*/
|
|
function buildSnapshot(
|
|
request: ExtractorRequest,
|
|
dom: DOMExtractionResult,
|
|
boxes: BoxRecord[],
|
|
visualBoxes: BoxRecord[],
|
|
fragments: FragmentRecord[],
|
|
transforms: TransformRecord[],
|
|
matrices: number[],
|
|
styles: StyleRecord[],
|
|
topology: TopologyExtractionResult,
|
|
resolvedSubjects: Array<{ id: string; selector: string; backendNodeId: number; nodeId: number }>
|
|
): GeometryWorldSnapshot {
|
|
// Build subjects table
|
|
const subjectIds: number[] = []
|
|
const domNodeIds: number[] = []
|
|
const subjectKinds: number[] = []
|
|
const primaryBoxIds: number[] = []
|
|
const firstFragmentIds: number[] = []
|
|
const fragmentCounts: number[] = []
|
|
const firstTextRunIds: number[] = []
|
|
const textRunCounts: number[] = []
|
|
|
|
const boxBySubject = new Map<number, number>()
|
|
for (const b of boxes) {
|
|
boxBySubject.set(b.subjectId, b.boxId)
|
|
}
|
|
|
|
const fragmentsBySubject = new Map<number, number[]>()
|
|
for (const f of fragments) {
|
|
if (!fragmentsBySubject.has(f.subjectId)) {
|
|
fragmentsBySubject.set(f.subjectId, [])
|
|
}
|
|
fragmentsBySubject.get(f.subjectId)!.push(f.fragmentId)
|
|
}
|
|
|
|
for (let i = 0; i < resolvedSubjects.length; i++) {
|
|
const rs = resolvedSubjects[i]
|
|
subjectIds.push(i)
|
|
domNodeIds.push(rs.backendNodeId)
|
|
subjectKinds.push(1) // element
|
|
primaryBoxIds.push(boxBySubject.get(i) ?? 0)
|
|
|
|
const frags = fragmentsBySubject.get(i)
|
|
if (frags && frags.length > 0) {
|
|
firstFragmentIds.push(frags[0])
|
|
fragmentCounts.push(frags.length)
|
|
} else {
|
|
firstFragmentIds.push(0)
|
|
fragmentCounts.push(0)
|
|
}
|
|
|
|
firstTextRunIds.push(0)
|
|
textRunCounts.push(0)
|
|
}
|
|
|
|
// Build boxes table arrays
|
|
const boxesTable = {
|
|
boxId: boxes.map((b) => b.boxId),
|
|
subjectId: boxes.map((b) => b.subjectId),
|
|
frameId: boxes.map((b) => b.frameId),
|
|
borderLeft: boxes.map((b) => b.borderLeft),
|
|
borderTop: boxes.map((b) => b.borderTop),
|
|
borderRight: boxes.map((b) => b.borderRight),
|
|
borderBottom: boxes.map((b) => b.borderBottom),
|
|
paddingLeft: boxes.map((b) => b.paddingLeft),
|
|
paddingTop: boxes.map((b) => b.paddingTop),
|
|
paddingRight: boxes.map((b) => b.paddingRight),
|
|
paddingBottom: boxes.map((b) => b.paddingBottom),
|
|
contentLeft: boxes.map((b) => b.contentLeft),
|
|
contentTop: boxes.map((b) => b.contentTop),
|
|
contentRight: boxes.map((b) => b.contentRight),
|
|
contentBottom: boxes.map((b) => b.contentBottom),
|
|
}
|
|
|
|
const visualBoxesTable = {
|
|
boxId: visualBoxes.map((b) => b.boxId),
|
|
subjectId: visualBoxes.map((b) => b.subjectId),
|
|
frameId: visualBoxes.map((b) => b.frameId),
|
|
borderLeft: visualBoxes.map((b) => b.borderLeft),
|
|
borderTop: visualBoxes.map((b) => b.borderTop),
|
|
borderRight: visualBoxes.map((b) => b.borderRight),
|
|
borderBottom: visualBoxes.map((b) => b.borderBottom),
|
|
paddingLeft: visualBoxes.map((b) => b.paddingLeft),
|
|
paddingTop: visualBoxes.map((b) => b.paddingTop),
|
|
paddingRight: visualBoxes.map((b) => b.paddingRight),
|
|
paddingBottom: visualBoxes.map((b) => b.paddingBottom),
|
|
contentLeft: visualBoxes.map((b) => b.contentLeft),
|
|
contentTop: visualBoxes.map((b) => b.contentTop),
|
|
contentRight: visualBoxes.map((b) => b.contentRight),
|
|
contentBottom: visualBoxes.map((b) => b.contentBottom),
|
|
}
|
|
|
|
const fragmentsTable = {
|
|
fragmentId: fragments.map((f) => f.fragmentId),
|
|
subjectId: fragments.map((f) => f.subjectId),
|
|
fragmentKind: fragments.map((f) => f.fragmentKind),
|
|
boxLeft: fragments.map((f) => f.boxLeft),
|
|
boxTop: fragments.map((f) => f.boxTop),
|
|
boxRight: fragments.map((f) => f.boxRight),
|
|
boxBottom: fragments.map((f) => f.boxBottom),
|
|
lineIndex: fragments.map((f) => f.lineIndex),
|
|
flowIndex: fragments.map((f) => f.flowIndex),
|
|
parentFragmentId: fragments.map((f) => f.parentFragmentId),
|
|
}
|
|
|
|
const transformsTable = {
|
|
transformId: transforms.map((t) => t.transformId),
|
|
subjectId: transforms.map((t) => t.subjectId),
|
|
matrixStart: transforms.map((t) => t.matrixStart),
|
|
matrixLength: transforms.map((t) => t.matrixLength),
|
|
originX: transforms.map((t) => t.originX),
|
|
originY: transforms.map((t) => t.originY),
|
|
}
|
|
|
|
const stylesTable = {
|
|
subjectId: styles.map((s) => s.subjectId),
|
|
display: styles.map((s) => s.display),
|
|
position: styles.map((s) => s.position),
|
|
zIndexKind: styles.map((s) => s.zIndexKind),
|
|
zIndexValue: styles.map((s) => s.zIndexValue),
|
|
overflowX: styles.map((s) => s.overflowX),
|
|
overflowY: styles.map((s) => s.overflowY),
|
|
opacity: styles.map((s) => s.opacity),
|
|
visibility: styles.map((s) => s.visibility),
|
|
containFlags: styles.map((s) => s.containFlags),
|
|
pointerEvents: styles.map((s) => s.pointerEvents),
|
|
lineHeight: styles.map((s) => s.lineHeight),
|
|
fontFamilyStringId: styles.map((s) => s.fontFamilyStringId),
|
|
fontSize: styles.map((s) => s.fontSize),
|
|
fontWeight: styles.map((s) => s.fontWeight),
|
|
}
|
|
|
|
return {
|
|
sceneId: request.requestId,
|
|
snapshotId: 'default',
|
|
env: {
|
|
viewportWidth: request.env.viewportWidth,
|
|
viewportHeight: request.env.viewportHeight,
|
|
deviceScaleFactor: 1,
|
|
colorScheme: request.env.colorScheme,
|
|
pointer: request.env.pointer,
|
|
hover: false,
|
|
reducedMotion: 'no-preference',
|
|
locale: 'en',
|
|
writingMode: 'horizontal-tb',
|
|
},
|
|
source: {
|
|
url: request.sceneTarget.url,
|
|
browserName: 'chromium',
|
|
browserVersion: '',
|
|
engine: 'chromium-cdp',
|
|
extractedAt: new Date().toISOString(),
|
|
},
|
|
strings: dom.strings,
|
|
subjects: {
|
|
ids: subjectIds,
|
|
domNodeId: domNodeIds,
|
|
subjectKind: subjectKinds,
|
|
primaryBoxId: primaryBoxIds,
|
|
firstFragmentId: firstFragmentIds,
|
|
fragmentCount: fragmentCounts,
|
|
firstTextRunId: firstTextRunIds,
|
|
textRunCount: textRunCounts,
|
|
},
|
|
dom,
|
|
frames: {
|
|
frameId: [],
|
|
frameKind: [],
|
|
ownerSubjectId: [],
|
|
parentFrameId: [],
|
|
originX: [],
|
|
originY: [],
|
|
axisMatrixStart: [],
|
|
clipRectId: [],
|
|
scrollContainerId: [],
|
|
writingMode: [],
|
|
},
|
|
matrices: { values: matrices },
|
|
rects: {
|
|
rectId: [],
|
|
left: [],
|
|
top: [],
|
|
right: [],
|
|
bottom: [],
|
|
},
|
|
boxes: boxesTable,
|
|
visualBoxes: visualBoxesTable,
|
|
fragments: fragmentsTable,
|
|
transforms: transformsTable,
|
|
styles: stylesTable,
|
|
topology: topology.topology,
|
|
scroll: topology.scroll,
|
|
clipping: topology.clipping,
|
|
provenance: [],
|
|
confidence: [],
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create an empty snapshot for error cases.
|
|
*/
|
|
function createEmptySnapshot(request: ExtractorRequest): GeometryWorldSnapshot {
|
|
return {
|
|
sceneId: request.requestId,
|
|
snapshotId: 'default',
|
|
env: {
|
|
viewportWidth: request.env.viewportWidth,
|
|
viewportHeight: request.env.viewportHeight,
|
|
deviceScaleFactor: 1,
|
|
colorScheme: request.env.colorScheme,
|
|
pointer: request.env.pointer,
|
|
hover: false,
|
|
reducedMotion: 'no-preference',
|
|
locale: 'en',
|
|
writingMode: 'horizontal-tb',
|
|
},
|
|
source: {
|
|
url: request.sceneTarget.url,
|
|
browserName: 'chromium',
|
|
browserVersion: '',
|
|
engine: 'chromium-cdp',
|
|
extractedAt: new Date().toISOString(),
|
|
},
|
|
strings: [],
|
|
subjects: {
|
|
ids: [],
|
|
domNodeId: [],
|
|
subjectKind: [],
|
|
primaryBoxId: [],
|
|
firstFragmentId: [],
|
|
fragmentCount: [],
|
|
firstTextRunId: [],
|
|
textRunCount: [],
|
|
},
|
|
dom: { nodes: [], strings: [], nodeIdToIndex: new Map(), rootIndex: 0 },
|
|
frames: {
|
|
frameId: [],
|
|
frameKind: [],
|
|
ownerSubjectId: [],
|
|
parentFrameId: [],
|
|
originX: [],
|
|
originY: [],
|
|
axisMatrixStart: [],
|
|
clipRectId: [],
|
|
scrollContainerId: [],
|
|
writingMode: [],
|
|
},
|
|
matrices: { values: [] },
|
|
rects: {
|
|
rectId: [],
|
|
left: [],
|
|
top: [],
|
|
right: [],
|
|
bottom: [],
|
|
},
|
|
boxes: {
|
|
boxId: [],
|
|
subjectId: [],
|
|
frameId: [],
|
|
borderLeft: [],
|
|
borderTop: [],
|
|
borderRight: [],
|
|
borderBottom: [],
|
|
paddingLeft: [],
|
|
paddingTop: [],
|
|
paddingRight: [],
|
|
paddingBottom: [],
|
|
contentLeft: [],
|
|
contentTop: [],
|
|
contentRight: [],
|
|
contentBottom: [],
|
|
},
|
|
visualBoxes: {
|
|
boxId: [],
|
|
subjectId: [],
|
|
frameId: [],
|
|
borderLeft: [],
|
|
borderTop: [],
|
|
borderRight: [],
|
|
borderBottom: [],
|
|
paddingLeft: [],
|
|
paddingTop: [],
|
|
paddingRight: [],
|
|
paddingBottom: [],
|
|
contentLeft: [],
|
|
contentTop: [],
|
|
contentRight: [],
|
|
contentBottom: [],
|
|
},
|
|
fragments: {
|
|
fragmentId: [],
|
|
subjectId: [],
|
|
fragmentKind: [],
|
|
boxLeft: [],
|
|
boxTop: [],
|
|
boxRight: [],
|
|
boxBottom: [],
|
|
lineIndex: [],
|
|
flowIndex: [],
|
|
parentFragmentId: [],
|
|
},
|
|
transforms: {
|
|
transformId: [],
|
|
subjectId: [],
|
|
matrixStart: [],
|
|
matrixLength: [],
|
|
originX: [],
|
|
originY: [],
|
|
},
|
|
styles: {
|
|
subjectId: [],
|
|
display: [],
|
|
position: [],
|
|
zIndexKind: [],
|
|
zIndexValue: [],
|
|
overflowX: [],
|
|
overflowY: [],
|
|
opacity: [],
|
|
visibility: [],
|
|
containFlags: [],
|
|
pointerEvents: [],
|
|
lineHeight: [],
|
|
fontFamilyStringId: [],
|
|
fontSize: [],
|
|
fontWeight: [],
|
|
},
|
|
topology: {
|
|
containingBlockOf: [],
|
|
nearestPositionedAncestorOf: [],
|
|
scrollContainerOf: [],
|
|
stackingContextOf: [],
|
|
formattingContextOf: [],
|
|
clippingRootOf: [],
|
|
paintOrderBucket: [],
|
|
paintOrderIndex: [],
|
|
},
|
|
scroll: [],
|
|
clipping: [],
|
|
provenance: [],
|
|
confidence: [],
|
|
}
|
|
}
|