145 lines
4.6 KiB
TypeScript
145 lines
4.6 KiB
TypeScript
|
|
/**
|
||
|
|
* Plan deduplication and batching.
|
||
|
|
*
|
||
|
|
* Multiple environment cases, state snapshots, or timeline modes may produce
|
||
|
|
* identical extraction needs. This module collapses duplicate plans and groups
|
||
|
|
* compatible requests into batches so the runtime can issue bulk calls.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import type { Environment, StateSnapshot, TimelineSnapshot } from 'imhotep-core'
|
||
|
|
import type { RequiredFacts, Diagnostic } from './requirements.js'
|
||
|
|
import type { SelectorPlan } from './selector.js'
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Extraction Request Shape
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A single unit of work for the extractor runtime.
|
||
|
|
*
|
||
|
|
* One request corresponds to one env case + one state plan + one timeline plan.
|
||
|
|
* The runtime may still merge multiple requests into a single browser session
|
||
|
|
* if their env and subject sets are compatible.
|
||
|
|
*/
|
||
|
|
export interface ExtractionRequest {
|
||
|
|
/** Unique id for this request. */
|
||
|
|
requestId: string
|
||
|
|
/** Environment case id. */
|
||
|
|
envCaseId: string
|
||
|
|
/** Environment parameters (viewport, colorScheme, etc). */
|
||
|
|
env: Partial<Environment>
|
||
|
|
/** State snapshots to capture under this environment. */
|
||
|
|
stateSnapshots: Array<{ id: string; kind: string; name?: string }>
|
||
|
|
/** Timeline sampling plan. */
|
||
|
|
timeline: { mode: string; samples?: number[] }
|
||
|
|
/** Subjects whose selectors must be resolved. */
|
||
|
|
subjects: SelectorPlan[]
|
||
|
|
/** Union of all facts needed by active clauses in this context. */
|
||
|
|
requiredFacts: RequiredFacts
|
||
|
|
/** Diagnostics accumulated while building this request. */
|
||
|
|
diagnostics: Diagnostic[]
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Deduplication
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Remove duplicate extraction requests.
|
||
|
|
*
|
||
|
|
* Two requests are duplicates when their env, state snapshot ids, timeline mode,
|
||
|
|
* subject selectors, and required facts are identical. The first requestId is kept.
|
||
|
|
*/
|
||
|
|
export function deduplicateRequests(requests: ExtractionRequest[]): ExtractionRequest[] {
|
||
|
|
const seen = new Map<string, ExtractionRequest>()
|
||
|
|
const deduped: ExtractionRequest[] = []
|
||
|
|
|
||
|
|
for (const req of requests) {
|
||
|
|
const key = requestKey(req)
|
||
|
|
if (seen.has(key)) continue
|
||
|
|
seen.set(key, req)
|
||
|
|
deduped.push(req)
|
||
|
|
}
|
||
|
|
|
||
|
|
return deduped
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Build a deterministic string key for an extraction request. */
|
||
|
|
function requestKey(req: ExtractionRequest): string {
|
||
|
|
return JSON.stringify({
|
||
|
|
env: req.env,
|
||
|
|
stateSnapshotIds: req.stateSnapshots.map((s) => s.id).sort(),
|
||
|
|
timelineMode: req.timeline.mode,
|
||
|
|
subjectSelectors: req.subjects.map((s) => s.selector).sort(),
|
||
|
|
requiredFacts: serializeRequiredFacts(req.requiredFacts),
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Serialize RequiredFacts into a plain, sortable object. */
|
||
|
|
function serializeRequiredFacts(facts: RequiredFacts): Record<string, unknown> {
|
||
|
|
return {
|
||
|
|
geometry: facts.geometry,
|
||
|
|
fragments: facts.fragments,
|
||
|
|
styles: Array.from(facts.styles).sort(),
|
||
|
|
topology: Array.from(facts.topology).sort(),
|
||
|
|
text: facts.text,
|
||
|
|
scroll: facts.scroll,
|
||
|
|
clipping: facts.clipping,
|
||
|
|
paint: facts.paint,
|
||
|
|
visibility: facts.visibility,
|
||
|
|
transforms: facts.transforms,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Batching
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A batch is a group of extraction requests that can share a single browser
|
||
|
|
* session because their environments are compatible.
|
||
|
|
*
|
||
|
|
* For V1 the batching rule is simple: requests with the exact same env
|
||
|
|
* (viewport, colorScheme, pointer, etc) can be batched.
|
||
|
|
*/
|
||
|
|
export interface ExtractionBatch {
|
||
|
|
/** Id for the batch. */
|
||
|
|
batchId: string
|
||
|
|
/** Shared environment for every request in the batch. */
|
||
|
|
env: Partial<Environment>
|
||
|
|
/** Requests grouped into this batch. */
|
||
|
|
requests: ExtractionRequest[]
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Group deduplicated requests into batches by environment compatibility.
|
||
|
|
*
|
||
|
|
* Requests with identical env objects end up in the same batch.
|
||
|
|
*/
|
||
|
|
export function batchRequests(requests: ExtractionRequest[]): ExtractionBatch[] {
|
||
|
|
const groups = new Map<string, ExtractionRequest[]>()
|
||
|
|
|
||
|
|
for (const req of requests) {
|
||
|
|
const envKey = JSON.stringify(req.env)
|
||
|
|
const group = groups.get(envKey)
|
||
|
|
if (group) {
|
||
|
|
group.push(req)
|
||
|
|
} else {
|
||
|
|
groups.set(envKey, [req])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let batchCounter = 0
|
||
|
|
const batches: ExtractionBatch[] = []
|
||
|
|
|
||
|
|
for (const [envKey, group] of groups) {
|
||
|
|
batches.push({
|
||
|
|
batchId: `batch_${++batchCounter}`,
|
||
|
|
env: JSON.parse(envKey) as Partial<Environment>,
|
||
|
|
requests: group,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
return batches
|
||
|
|
}
|