/** * Plan deduplication and batching. * * Multiple environment cases, state snapshots, or timeline modes may produce * identical extraction needs. This module collapses duplicate plans and groups * compatible requests into batches so the runtime can issue bulk calls. */ import type { Environment, StateSnapshot, TimelineSnapshot } from 'imhotep-core' import type { RequiredFacts, Diagnostic } from './requirements.js' import type { SelectorPlan } from './selector.js' // --------------------------------------------------------------------------- // Extraction Request Shape // --------------------------------------------------------------------------- /** * A single unit of work for the extractor runtime. * * One request corresponds to one env case + one state plan + one timeline plan. * The runtime may still merge multiple requests into a single browser session * if their env and subject sets are compatible. */ export interface ExtractionRequest { /** Unique id for this request. */ requestId: string /** Environment case id. */ envCaseId: string /** Environment parameters (viewport, colorScheme, etc). */ env: Partial /** State snapshots to capture under this environment. */ stateSnapshots: Array<{ id: string; kind: string; name?: string }> /** Timeline sampling plan. */ timeline: { mode: string; samples?: number[] } /** Subjects whose selectors must be resolved. */ subjects: SelectorPlan[] /** Union of all facts needed by active clauses in this context. */ requiredFacts: RequiredFacts /** Diagnostics accumulated while building this request. */ diagnostics: Diagnostic[] } // --------------------------------------------------------------------------- // Deduplication // --------------------------------------------------------------------------- /** * Remove duplicate extraction requests. * * Two requests are duplicates when their env, state snapshot ids, timeline mode, * subject selectors, and required facts are identical. The first requestId is kept. */ export function deduplicateRequests(requests: ExtractionRequest[]): ExtractionRequest[] { const seen = new Map() const deduped: ExtractionRequest[] = [] for (const req of requests) { const key = requestKey(req) if (seen.has(key)) continue seen.set(key, req) deduped.push(req) } return deduped } /** Build a deterministic string key for an extraction request. */ function requestKey(req: ExtractionRequest): string { return JSON.stringify({ env: req.env, stateSnapshotIds: req.stateSnapshots.map((s) => s.id).sort(), timelineMode: req.timeline.mode, subjectSelectors: req.subjects.map((s) => s.selector).sort(), requiredFacts: serializeRequiredFacts(req.requiredFacts), }) } /** Serialize RequiredFacts into a plain, sortable object. */ function serializeRequiredFacts(facts: RequiredFacts): Record { return { geometry: facts.geometry, fragments: facts.fragments, styles: Array.from(facts.styles).sort(), topology: Array.from(facts.topology).sort(), text: facts.text, scroll: facts.scroll, clipping: facts.clipping, paint: facts.paint, visibility: facts.visibility, transforms: facts.transforms, } } // --------------------------------------------------------------------------- // Batching // --------------------------------------------------------------------------- /** * A batch is a group of extraction requests that can share a single browser * session because their environments are compatible. * * For V1 the batching rule is simple: requests with the exact same env * (viewport, colorScheme, pointer, etc) can be batched. */ export interface ExtractionBatch { /** Id for the batch. */ batchId: string /** Shared environment for every request in the batch. */ env: Partial /** Requests grouped into this batch. */ requests: ExtractionRequest[] } /** * Group deduplicated requests into batches by environment compatibility. * * Requests with identical env objects end up in the same batch. */ export function batchRequests(requests: ExtractionRequest[]): ExtractionBatch[] { const groups = new Map() for (const req of requests) { const envKey = JSON.stringify(req.env) const group = groups.get(envKey) if (group) { group.push(req) } else { groups.set(envKey, [req]) } } let batchCounter = 0 const batches: ExtractionBatch[] = [] for (const [envKey, group] of groups) { batches.push({ batchId: `batch_${++batchCounter}`, env: JSON.parse(envKey) as Partial, requests: group, }) } return batches }