feat: fact planning derived from predicate specs with fulfillment tracking
predicate-specs.ts:
- Add FactCategory enum (geometry, topology, styles, fragments,
domAncestry, clipping, scroll, visibility, transforms, text)
- Add FactPlan interface with predicateFacts provenance map and
per-category fulfillment tracking (fulfilled/failed/approximated/skipped)
- Add requiredFactToCategory() — centralized mapping from required-fact
strings to FactCategory values, replacing scattered inline checks
- Add planRequiredFacts() — builds a complete FactPlan from collected
predicate facts + AST structural analysis (CssLengthMetrics, domAncestry)
- Add createEmptyFactPlan() factory
extraction.ts:
- computeRequiredFacts returns FactPlan (was inline record), uses
planRequiredFacts from spec infrastructure
- extractWorld/extractWorldFastGeometry/extractWorldCdp accept FactPlan
instead of inline requiredFacts record
- After successful extraction, populate FactPlan.fulfillment with
per-category 'fulfilled' status, providing a structured audit trail
of which facts were requested and obtained
Adding a new predicate with new required facts now requires only a
spec entry — the fact planner, extraction engine, and fulfillment
tracker all derive behavior from the spec table automatically.
658 tests pass.
This commit is contained in:
@@ -575,3 +575,119 @@ export function collectTopologyPredicateNames(): string[] {
|
||||
export function collectQuantifierCompatiblePredicateNames(): string[] {
|
||||
return PREDICATE_SPECS.filter(s => s.quantifierCompatible).flatMap(s => [s.name, ...s.aliases])
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fact Planning
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Closed set of extraction fact categories requested by predicates. */
|
||||
export enum FactCategory {
|
||||
Geometry = 'geometry',
|
||||
Topology = 'topology',
|
||||
Styles = 'styles',
|
||||
Fragments = 'fragments',
|
||||
DomAncestry = 'domAncestry',
|
||||
Clipping = 'clipping',
|
||||
Scroll = 'scroll',
|
||||
Visibility = 'visibility',
|
||||
Transforms = 'transforms',
|
||||
Text = 'text',
|
||||
}
|
||||
|
||||
/** Outcome of a requested fact category after extraction. */
|
||||
export type FactStatus = 'fulfilled' | 'failed' | 'approximated' | 'skipped'
|
||||
|
||||
/** Plan: which facts are needed + per-category fulfillment tracking. */
|
||||
export interface FactPlan {
|
||||
geometry: boolean
|
||||
topology: boolean
|
||||
styles: boolean
|
||||
fragments: boolean
|
||||
domAncestry: boolean
|
||||
clipping: boolean
|
||||
scroll: boolean
|
||||
visibility: boolean
|
||||
transforms: boolean
|
||||
text: boolean
|
||||
/** Maps each predicate name to the required-fact strings that triggered it. */
|
||||
predicateFacts: Map<string, string[]>
|
||||
/** Per-category fulfillment status populated after extraction. */
|
||||
fulfillment: Partial<Record<FactCategory, FactStatus>>
|
||||
}
|
||||
|
||||
export function createEmptyFactPlan(): FactPlan {
|
||||
return {
|
||||
geometry: true,
|
||||
topology: false,
|
||||
styles: false,
|
||||
fragments: false,
|
||||
domAncestry: false,
|
||||
clipping: false,
|
||||
scroll: false,
|
||||
visibility: false,
|
||||
transforms: false,
|
||||
text: false,
|
||||
predicateFacts: new Map(),
|
||||
fulfillment: {},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a required-fact string to the FactCategory it implies.
|
||||
* Centralizes the mapping previously scattered across computeRequiredFacts.
|
||||
*/
|
||||
export function requiredFactToCategory(fact: string): FactCategory | null {
|
||||
if (fact === 'subject.primaryBox' || fact === 'reference.primaryBox') {
|
||||
return FactCategory.Geometry
|
||||
}
|
||||
if (fact.endsWith('.clipChain')) {
|
||||
return FactCategory.Clipping
|
||||
}
|
||||
if (fact.startsWith('topology.')) {
|
||||
const sub = fact.slice('topology.'.length)
|
||||
if (sub === 'stackingContextOf') return FactCategory.Topology
|
||||
if (sub === 'scrollContainerOf') return FactCategory.Scroll
|
||||
if (sub === 'clippingRootOf') return FactCategory.Clipping
|
||||
return FactCategory.Topology
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a FactPlan from a set of required-fact strings collected from all
|
||||
* predicates in the formula AST.
|
||||
*/
|
||||
export function planRequiredFacts(
|
||||
predicateFacts: Map<string, string[]>,
|
||||
needsCssLengthMetrics: boolean,
|
||||
needsDomAncestry: boolean,
|
||||
): FactPlan {
|
||||
const plan = createEmptyFactPlan()
|
||||
plan.predicateFacts = predicateFacts
|
||||
plan.domAncestry = needsDomAncestry
|
||||
|
||||
const allFacts = new Set<string>()
|
||||
for (const facts of predicateFacts.values()) {
|
||||
for (const f of facts) allFacts.add(f)
|
||||
}
|
||||
|
||||
for (const f of allFacts) {
|
||||
const cat = requiredFactToCategory(f)
|
||||
if (cat === null) continue
|
||||
switch (cat) {
|
||||
case FactCategory.Geometry: break
|
||||
case FactCategory.Topology: plan.topology = true; break
|
||||
case FactCategory.Clipping: plan.clipping = true; break
|
||||
case FactCategory.Scroll: plan.scroll = true; break
|
||||
case FactCategory.Styles: plan.styles = true; break
|
||||
case FactCategory.Fragments: plan.fragments = true; break
|
||||
case FactCategory.Visibility: plan.visibility = true; break
|
||||
case FactCategory.Transforms: plan.transforms = true; break
|
||||
case FactCategory.Text: plan.text = true; break
|
||||
}
|
||||
}
|
||||
|
||||
if (needsCssLengthMetrics) plan.styles = true
|
||||
|
||||
return plan
|
||||
}
|
||||
|
||||
@@ -58,6 +58,10 @@ import {
|
||||
isUnaryPredicate,
|
||||
isVariableArityPredicate,
|
||||
getPredicateSpec,
|
||||
planRequiredFacts,
|
||||
FactCategory,
|
||||
type FactPlan,
|
||||
type FactStatus,
|
||||
} from 'imhotep-core'
|
||||
import { buildGeometryWorld } from './world-builder.js'
|
||||
import {
|
||||
@@ -201,23 +205,16 @@ export function formulaNeedsCssLengthMetrics(formula: FormulaNode): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the minimal set of facts required by a collection of formulas.
|
||||
* Compute the minimal set of facts required by a collection of formulas,
|
||||
* derived from predicate specs. Additionally tracks which predicates
|
||||
* requested which facts for diagnostic provenance.
|
||||
*/
|
||||
export function computeRequiredFacts(formulas: FormulaNode[]): {
|
||||
geometry: boolean
|
||||
topology: boolean
|
||||
styles: boolean
|
||||
fragments: boolean
|
||||
domAncestry: boolean
|
||||
} {
|
||||
const facts = new Set<string>()
|
||||
export function computeRequiredFacts(formulas: FormulaNode[]): FactPlan {
|
||||
const predicateFacts = new Map<string, string[]>()
|
||||
for (const formula of formulas) {
|
||||
const predicates = collectPredicates(formula)
|
||||
for (const p of predicates) {
|
||||
const required = getPredicateRequiredFacts(p)
|
||||
for (const f of required) {
|
||||
facts.add(f)
|
||||
}
|
||||
predicateFacts.set(p, getPredicateRequiredFacts(p))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,15 +241,7 @@ export function computeRequiredFacts(formulas: FormulaNode[]): {
|
||||
return found
|
||||
})
|
||||
|
||||
const needsTopology = Array.from(facts).some((f) => f.startsWith('topology.') || f.endsWith('.clipChain'))
|
||||
|
||||
return {
|
||||
geometry: true, // Always required for subject resolution
|
||||
topology: needsTopology,
|
||||
styles: facts.has('styles') || facts.has('computedStyle') || needsCssLengthMetrics,
|
||||
fragments: facts.has('subject.fragmentCount') || facts.has('subject.firstFragmentId'),
|
||||
domAncestry: needsDomAncestry,
|
||||
}
|
||||
return planRequiredFacts(predicateFacts, needsCssLengthMetrics, needsDomAncestry)
|
||||
}
|
||||
|
||||
export function usesLayoutSpace(formula: FormulaNode): boolean {
|
||||
@@ -396,7 +385,7 @@ export function attachMeasuredChWidths(
|
||||
export async function extractWorldFastGeometry(
|
||||
playwrightPage: Page,
|
||||
selectors: string[],
|
||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
||||
requiredFacts?: FactPlan,
|
||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||
interface FastExtractedElement {
|
||||
tagName: string
|
||||
@@ -728,6 +717,14 @@ export async function extractWorldFastGeometry(
|
||||
}
|
||||
}
|
||||
|
||||
if (requiredFacts) {
|
||||
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
|
||||
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
|
||||
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
|
||||
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
|
||||
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
|
||||
if (requiredFacts.transforms) requiredFacts.fulfillment[FactCategory.Transforms] = 'fulfilled'
|
||||
}
|
||||
return { world, selectorToIds, errors }
|
||||
} finally {
|
||||
await playwrightPage.evaluate(() => {
|
||||
@@ -804,7 +801,7 @@ function remapTopologyIds(world: GeometryWorld, topologySubjectIds?: number[]):
|
||||
export async function extractWorldCdp(
|
||||
playwrightPage: Page,
|
||||
selectors: string[],
|
||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
||||
requiredFacts?: FactPlan,
|
||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||
const errors: ImhotepDiagnostic[] = []
|
||||
const selectorToNodeIds = new Map<string, number[]>()
|
||||
@@ -912,6 +909,15 @@ export async function extractWorldCdp(
|
||||
selectorToIds.set(selectorKey, matches)
|
||||
}
|
||||
|
||||
if (requiredFacts) {
|
||||
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
|
||||
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
|
||||
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
|
||||
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
|
||||
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
|
||||
if (requiredFacts.clipping) requiredFacts.fulfillment[FactCategory.Clipping] = 'fulfilled'
|
||||
if (requiredFacts.scroll) requiredFacts.fulfillment[FactCategory.Scroll] = 'fulfilled'
|
||||
}
|
||||
return { world, selectorToIds, errors }
|
||||
} finally {
|
||||
await playwrightPage.evaluate(() => {
|
||||
@@ -935,7 +941,7 @@ export async function extractWorld(
|
||||
playwrightPage: Page,
|
||||
selectors: string[],
|
||||
cacheDir?: string | null,
|
||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
||||
requiredFacts?: FactPlan,
|
||||
allowFastGeometry = false,
|
||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||
async function resolveViewport(): Promise<{ width: number; height: number }> {
|
||||
|
||||
Reference in New Issue
Block a user