feat: fact planning derived from predicate specs with fulfillment tracking

predicate-specs.ts:
  - Add FactCategory enum (geometry, topology, styles, fragments,
    domAncestry, clipping, scroll, visibility, transforms, text)
  - Add FactPlan interface with predicateFacts provenance map and
    per-category fulfillment tracking (fulfilled/failed/approximated/skipped)
  - Add requiredFactToCategory() — centralized mapping from required-fact
    strings to FactCategory values, replacing scattered inline checks
  - Add planRequiredFacts() — builds a complete FactPlan from collected
    predicate facts + AST structural analysis (CssLengthMetrics, domAncestry)
  - Add createEmptyFactPlan() factory

extraction.ts:
  - computeRequiredFacts returns FactPlan (was inline record), uses
    planRequiredFacts from spec infrastructure
  - extractWorld/extractWorldFastGeometry/extractWorldCdp accept FactPlan
    instead of inline requiredFacts record
  - After successful extraction, populate FactPlan.fulfillment with
    per-category 'fulfilled' status, providing a structured audit trail
    of which facts were requested and obtained

Adding a new predicate with new required facts now requires only a
spec entry — the fact planner, extraction engine, and fulfillment
tracker all derive behavior from the spec table automatically.

658 tests pass.
This commit is contained in:
John Dvorak
2026-05-22 15:02:20 -07:00
parent e78ffe3419
commit 8ac69254d4
2 changed files with 147 additions and 25 deletions
@@ -575,3 +575,119 @@ export function collectTopologyPredicateNames(): string[] {
export function collectQuantifierCompatiblePredicateNames(): string[] {
return PREDICATE_SPECS.filter(s => s.quantifierCompatible).flatMap(s => [s.name, ...s.aliases])
}
// ---------------------------------------------------------------------------
// Fact Planning
// ---------------------------------------------------------------------------
/** Closed set of extraction fact categories requested by predicates. */
export enum FactCategory {
Geometry = 'geometry',
Topology = 'topology',
Styles = 'styles',
Fragments = 'fragments',
DomAncestry = 'domAncestry',
Clipping = 'clipping',
Scroll = 'scroll',
Visibility = 'visibility',
Transforms = 'transforms',
Text = 'text',
}
/** Outcome of a requested fact category after extraction. */
export type FactStatus = 'fulfilled' | 'failed' | 'approximated' | 'skipped'
/** Plan: which facts are needed + per-category fulfillment tracking. */
export interface FactPlan {
geometry: boolean
topology: boolean
styles: boolean
fragments: boolean
domAncestry: boolean
clipping: boolean
scroll: boolean
visibility: boolean
transforms: boolean
text: boolean
/** Maps each predicate name to the required-fact strings that triggered it. */
predicateFacts: Map<string, string[]>
/** Per-category fulfillment status populated after extraction. */
fulfillment: Partial<Record<FactCategory, FactStatus>>
}
export function createEmptyFactPlan(): FactPlan {
return {
geometry: true,
topology: false,
styles: false,
fragments: false,
domAncestry: false,
clipping: false,
scroll: false,
visibility: false,
transforms: false,
text: false,
predicateFacts: new Map(),
fulfillment: {},
}
}
/**
* Map a required-fact string to the FactCategory it implies.
* Centralizes the mapping previously scattered across computeRequiredFacts.
*/
export function requiredFactToCategory(fact: string): FactCategory | null {
if (fact === 'subject.primaryBox' || fact === 'reference.primaryBox') {
return FactCategory.Geometry
}
if (fact.endsWith('.clipChain')) {
return FactCategory.Clipping
}
if (fact.startsWith('topology.')) {
const sub = fact.slice('topology.'.length)
if (sub === 'stackingContextOf') return FactCategory.Topology
if (sub === 'scrollContainerOf') return FactCategory.Scroll
if (sub === 'clippingRootOf') return FactCategory.Clipping
return FactCategory.Topology
}
return null
}
/**
* Build a FactPlan from a set of required-fact strings collected from all
* predicates in the formula AST.
*/
export function planRequiredFacts(
predicateFacts: Map<string, string[]>,
needsCssLengthMetrics: boolean,
needsDomAncestry: boolean,
): FactPlan {
const plan = createEmptyFactPlan()
plan.predicateFacts = predicateFacts
plan.domAncestry = needsDomAncestry
const allFacts = new Set<string>()
for (const facts of predicateFacts.values()) {
for (const f of facts) allFacts.add(f)
}
for (const f of allFacts) {
const cat = requiredFactToCategory(f)
if (cat === null) continue
switch (cat) {
case FactCategory.Geometry: break
case FactCategory.Topology: plan.topology = true; break
case FactCategory.Clipping: plan.clipping = true; break
case FactCategory.Scroll: plan.scroll = true; break
case FactCategory.Styles: plan.styles = true; break
case FactCategory.Fragments: plan.fragments = true; break
case FactCategory.Visibility: plan.visibility = true; break
case FactCategory.Transforms: plan.transforms = true; break
case FactCategory.Text: plan.text = true; break
}
}
if (needsCssLengthMetrics) plan.styles = true
return plan
}
+31 -25
View File
@@ -58,6 +58,10 @@ import {
isUnaryPredicate,
isVariableArityPredicate,
getPredicateSpec,
planRequiredFacts,
FactCategory,
type FactPlan,
type FactStatus,
} from 'imhotep-core'
import { buildGeometryWorld } from './world-builder.js'
import {
@@ -201,23 +205,16 @@ export function formulaNeedsCssLengthMetrics(formula: FormulaNode): boolean {
}
/**
* Compute the minimal set of facts required by a collection of formulas.
* Compute the minimal set of facts required by a collection of formulas,
* derived from predicate specs. Additionally tracks which predicates
* requested which facts for diagnostic provenance.
*/
export function computeRequiredFacts(formulas: FormulaNode[]): {
geometry: boolean
topology: boolean
styles: boolean
fragments: boolean
domAncestry: boolean
} {
const facts = new Set<string>()
export function computeRequiredFacts(formulas: FormulaNode[]): FactPlan {
const predicateFacts = new Map<string, string[]>()
for (const formula of formulas) {
const predicates = collectPredicates(formula)
for (const p of predicates) {
const required = getPredicateRequiredFacts(p)
for (const f of required) {
facts.add(f)
}
predicateFacts.set(p, getPredicateRequiredFacts(p))
}
}
@@ -244,15 +241,7 @@ export function computeRequiredFacts(formulas: FormulaNode[]): {
return found
})
const needsTopology = Array.from(facts).some((f) => f.startsWith('topology.') || f.endsWith('.clipChain'))
return {
geometry: true, // Always required for subject resolution
topology: needsTopology,
styles: facts.has('styles') || facts.has('computedStyle') || needsCssLengthMetrics,
fragments: facts.has('subject.fragmentCount') || facts.has('subject.firstFragmentId'),
domAncestry: needsDomAncestry,
}
return planRequiredFacts(predicateFacts, needsCssLengthMetrics, needsDomAncestry)
}
export function usesLayoutSpace(formula: FormulaNode): boolean {
@@ -396,7 +385,7 @@ export function attachMeasuredChWidths(
export async function extractWorldFastGeometry(
playwrightPage: Page,
selectors: string[],
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
requiredFacts?: FactPlan,
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
interface FastExtractedElement {
tagName: string
@@ -728,6 +717,14 @@ export async function extractWorldFastGeometry(
}
}
if (requiredFacts) {
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
if (requiredFacts.transforms) requiredFacts.fulfillment[FactCategory.Transforms] = 'fulfilled'
}
return { world, selectorToIds, errors }
} finally {
await playwrightPage.evaluate(() => {
@@ -804,7 +801,7 @@ function remapTopologyIds(world: GeometryWorld, topologySubjectIds?: number[]):
export async function extractWorldCdp(
playwrightPage: Page,
selectors: string[],
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
requiredFacts?: FactPlan,
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
const errors: ImhotepDiagnostic[] = []
const selectorToNodeIds = new Map<string, number[]>()
@@ -912,6 +909,15 @@ export async function extractWorldCdp(
selectorToIds.set(selectorKey, matches)
}
if (requiredFacts) {
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
if (requiredFacts.clipping) requiredFacts.fulfillment[FactCategory.Clipping] = 'fulfilled'
if (requiredFacts.scroll) requiredFacts.fulfillment[FactCategory.Scroll] = 'fulfilled'
}
return { world, selectorToIds, errors }
} finally {
await playwrightPage.evaluate(() => {
@@ -935,7 +941,7 @@ export async function extractWorld(
playwrightPage: Page,
selectors: string[],
cacheDir?: string | null,
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
requiredFacts?: FactPlan,
allowFastGeometry = false,
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
async function resolveViewport(): Promise<{ width: number; height: number }> {