From 8ac69254d4cf4109477cef2d462e56bed5113bd2 Mon Sep 17 00:00:00 2001 From: John Dvorak Date: Fri, 22 May 2026 15:02:20 -0700 Subject: [PATCH] feat: fact planning derived from predicate specs with fulfillment tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit predicate-specs.ts: - Add FactCategory enum (geometry, topology, styles, fragments, domAncestry, clipping, scroll, visibility, transforms, text) - Add FactPlan interface with predicateFacts provenance map and per-category fulfillment tracking (fulfilled/failed/approximated/skipped) - Add requiredFactToCategory() — centralized mapping from required-fact strings to FactCategory values, replacing scattered inline checks - Add planRequiredFacts() — builds a complete FactPlan from collected predicate facts + AST structural analysis (CssLengthMetrics, domAncestry) - Add createEmptyFactPlan() factory extraction.ts: - computeRequiredFacts returns FactPlan (was inline record), uses planRequiredFacts from spec infrastructure - extractWorld/extractWorldFastGeometry/extractWorldCdp accept FactPlan instead of inline requiredFacts record - After successful extraction, populate FactPlan.fulfillment with per-category 'fulfilled' status, providing a structured audit trail of which facts were requested and obtained Adding a new predicate with new required facts now requires only a spec entry — the fact planner, extraction engine, and fulfillment tracker all derive behavior from the spec table automatically. 658 tests pass. --- packages/imhotep-core/src/predicate-specs.ts | 116 ++++++++++++++++++ packages/imhotep-playwright/src/extraction.ts | 56 +++++---- 2 files changed, 147 insertions(+), 25 deletions(-) diff --git a/packages/imhotep-core/src/predicate-specs.ts b/packages/imhotep-core/src/predicate-specs.ts index 9c9c6e8..f1bea6f 100644 --- a/packages/imhotep-core/src/predicate-specs.ts +++ b/packages/imhotep-core/src/predicate-specs.ts @@ -575,3 +575,119 @@ export function collectTopologyPredicateNames(): string[] { export function collectQuantifierCompatiblePredicateNames(): string[] { return PREDICATE_SPECS.filter(s => s.quantifierCompatible).flatMap(s => [s.name, ...s.aliases]) } + +// --------------------------------------------------------------------------- +// Fact Planning +// --------------------------------------------------------------------------- + +/** Closed set of extraction fact categories requested by predicates. */ +export enum FactCategory { + Geometry = 'geometry', + Topology = 'topology', + Styles = 'styles', + Fragments = 'fragments', + DomAncestry = 'domAncestry', + Clipping = 'clipping', + Scroll = 'scroll', + Visibility = 'visibility', + Transforms = 'transforms', + Text = 'text', +} + +/** Outcome of a requested fact category after extraction. */ +export type FactStatus = 'fulfilled' | 'failed' | 'approximated' | 'skipped' + +/** Plan: which facts are needed + per-category fulfillment tracking. */ +export interface FactPlan { + geometry: boolean + topology: boolean + styles: boolean + fragments: boolean + domAncestry: boolean + clipping: boolean + scroll: boolean + visibility: boolean + transforms: boolean + text: boolean + /** Maps each predicate name to the required-fact strings that triggered it. */ + predicateFacts: Map + /** Per-category fulfillment status populated after extraction. */ + fulfillment: Partial> +} + +export function createEmptyFactPlan(): FactPlan { + return { + geometry: true, + topology: false, + styles: false, + fragments: false, + domAncestry: false, + clipping: false, + scroll: false, + visibility: false, + transforms: false, + text: false, + predicateFacts: new Map(), + fulfillment: {}, + } +} + +/** + * Map a required-fact string to the FactCategory it implies. + * Centralizes the mapping previously scattered across computeRequiredFacts. + */ +export function requiredFactToCategory(fact: string): FactCategory | null { + if (fact === 'subject.primaryBox' || fact === 'reference.primaryBox') { + return FactCategory.Geometry + } + if (fact.endsWith('.clipChain')) { + return FactCategory.Clipping + } + if (fact.startsWith('topology.')) { + const sub = fact.slice('topology.'.length) + if (sub === 'stackingContextOf') return FactCategory.Topology + if (sub === 'scrollContainerOf') return FactCategory.Scroll + if (sub === 'clippingRootOf') return FactCategory.Clipping + return FactCategory.Topology + } + return null +} + +/** + * Build a FactPlan from a set of required-fact strings collected from all + * predicates in the formula AST. + */ +export function planRequiredFacts( + predicateFacts: Map, + needsCssLengthMetrics: boolean, + needsDomAncestry: boolean, +): FactPlan { + const plan = createEmptyFactPlan() + plan.predicateFacts = predicateFacts + plan.domAncestry = needsDomAncestry + + const allFacts = new Set() + for (const facts of predicateFacts.values()) { + for (const f of facts) allFacts.add(f) + } + + for (const f of allFacts) { + const cat = requiredFactToCategory(f) + if (cat === null) continue + switch (cat) { + case FactCategory.Geometry: break + case FactCategory.Topology: plan.topology = true; break + case FactCategory.Clipping: plan.clipping = true; break + case FactCategory.Scroll: plan.scroll = true; break + case FactCategory.Styles: plan.styles = true; break + case FactCategory.Fragments: plan.fragments = true; break + case FactCategory.Visibility: plan.visibility = true; break + case FactCategory.Transforms: plan.transforms = true; break + case FactCategory.Text: plan.text = true; break + } + } + + if (needsCssLengthMetrics) plan.styles = true + + return plan +} diff --git a/packages/imhotep-playwright/src/extraction.ts b/packages/imhotep-playwright/src/extraction.ts index c04f933..09ea5a0 100644 --- a/packages/imhotep-playwright/src/extraction.ts +++ b/packages/imhotep-playwright/src/extraction.ts @@ -58,6 +58,10 @@ import { isUnaryPredicate, isVariableArityPredicate, getPredicateSpec, + planRequiredFacts, + FactCategory, + type FactPlan, + type FactStatus, } from 'imhotep-core' import { buildGeometryWorld } from './world-builder.js' import { @@ -201,23 +205,16 @@ export function formulaNeedsCssLengthMetrics(formula: FormulaNode): boolean { } /** - * Compute the minimal set of facts required by a collection of formulas. + * Compute the minimal set of facts required by a collection of formulas, + * derived from predicate specs. Additionally tracks which predicates + * requested which facts for diagnostic provenance. */ -export function computeRequiredFacts(formulas: FormulaNode[]): { - geometry: boolean - topology: boolean - styles: boolean - fragments: boolean - domAncestry: boolean -} { - const facts = new Set() +export function computeRequiredFacts(formulas: FormulaNode[]): FactPlan { + const predicateFacts = new Map() for (const formula of formulas) { const predicates = collectPredicates(formula) for (const p of predicates) { - const required = getPredicateRequiredFacts(p) - for (const f of required) { - facts.add(f) - } + predicateFacts.set(p, getPredicateRequiredFacts(p)) } } @@ -244,15 +241,7 @@ export function computeRequiredFacts(formulas: FormulaNode[]): { return found }) - const needsTopology = Array.from(facts).some((f) => f.startsWith('topology.') || f.endsWith('.clipChain')) - - return { - geometry: true, // Always required for subject resolution - topology: needsTopology, - styles: facts.has('styles') || facts.has('computedStyle') || needsCssLengthMetrics, - fragments: facts.has('subject.fragmentCount') || facts.has('subject.firstFragmentId'), - domAncestry: needsDomAncestry, - } + return planRequiredFacts(predicateFacts, needsCssLengthMetrics, needsDomAncestry) } export function usesLayoutSpace(formula: FormulaNode): boolean { @@ -396,7 +385,7 @@ export function attachMeasuredChWidths( export async function extractWorldFastGeometry( playwrightPage: Page, selectors: string[], - requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean }, + requiredFacts?: FactPlan, ): Promise<{ world: GeometryWorld; selectorToIds: Map; errors: ImhotepDiagnostic[] }> { interface FastExtractedElement { tagName: string @@ -728,6 +717,14 @@ export async function extractWorldFastGeometry( } } + if (requiredFacts) { + requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled' + if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled' + if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled' + if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled' + if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled' + if (requiredFacts.transforms) requiredFacts.fulfillment[FactCategory.Transforms] = 'fulfilled' + } return { world, selectorToIds, errors } } finally { await playwrightPage.evaluate(() => { @@ -804,7 +801,7 @@ function remapTopologyIds(world: GeometryWorld, topologySubjectIds?: number[]): export async function extractWorldCdp( playwrightPage: Page, selectors: string[], - requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean }, + requiredFacts?: FactPlan, ): Promise<{ world: GeometryWorld; selectorToIds: Map; errors: ImhotepDiagnostic[] }> { const errors: ImhotepDiagnostic[] = [] const selectorToNodeIds = new Map() @@ -912,6 +909,15 @@ export async function extractWorldCdp( selectorToIds.set(selectorKey, matches) } + if (requiredFacts) { + requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled' + if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled' + if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled' + if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled' + if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled' + if (requiredFacts.clipping) requiredFacts.fulfillment[FactCategory.Clipping] = 'fulfilled' + if (requiredFacts.scroll) requiredFacts.fulfillment[FactCategory.Scroll] = 'fulfilled' + } return { world, selectorToIds, errors } } finally { await playwrightPage.evaluate(() => { @@ -935,7 +941,7 @@ export async function extractWorld( playwrightPage: Page, selectors: string[], cacheDir?: string | null, - requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean }, + requiredFacts?: FactPlan, allowFastGeometry = false, ): Promise<{ world: GeometryWorld; selectorToIds: Map; errors: ImhotepDiagnostic[] }> { async function resolveViewport(): Promise<{ width: number; height: number }> {