feat: fact planning derived from predicate specs with fulfillment tracking
predicate-specs.ts:
- Add FactCategory enum (geometry, topology, styles, fragments,
domAncestry, clipping, scroll, visibility, transforms, text)
- Add FactPlan interface with predicateFacts provenance map and
per-category fulfillment tracking (fulfilled/failed/approximated/skipped)
- Add requiredFactToCategory() — centralized mapping from required-fact
strings to FactCategory values, replacing scattered inline checks
- Add planRequiredFacts() — builds a complete FactPlan from collected
predicate facts + AST structural analysis (CssLengthMetrics, domAncestry)
- Add createEmptyFactPlan() factory
extraction.ts:
- computeRequiredFacts returns FactPlan (was inline record), uses
planRequiredFacts from spec infrastructure
- extractWorld/extractWorldFastGeometry/extractWorldCdp accept FactPlan
instead of inline requiredFacts record
- After successful extraction, populate FactPlan.fulfillment with
per-category 'fulfilled' status, providing a structured audit trail
of which facts were requested and obtained
Adding a new predicate with new required facts now requires only a
spec entry — the fact planner, extraction engine, and fulfillment
tracker all derive behavior from the spec table automatically.
658 tests pass.
This commit is contained in:
@@ -575,3 +575,119 @@ export function collectTopologyPredicateNames(): string[] {
|
|||||||
export function collectQuantifierCompatiblePredicateNames(): string[] {
|
export function collectQuantifierCompatiblePredicateNames(): string[] {
|
||||||
return PREDICATE_SPECS.filter(s => s.quantifierCompatible).flatMap(s => [s.name, ...s.aliases])
|
return PREDICATE_SPECS.filter(s => s.quantifierCompatible).flatMap(s => [s.name, ...s.aliases])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Fact Planning
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Closed set of extraction fact categories requested by predicates. */
|
||||||
|
export enum FactCategory {
|
||||||
|
Geometry = 'geometry',
|
||||||
|
Topology = 'topology',
|
||||||
|
Styles = 'styles',
|
||||||
|
Fragments = 'fragments',
|
||||||
|
DomAncestry = 'domAncestry',
|
||||||
|
Clipping = 'clipping',
|
||||||
|
Scroll = 'scroll',
|
||||||
|
Visibility = 'visibility',
|
||||||
|
Transforms = 'transforms',
|
||||||
|
Text = 'text',
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Outcome of a requested fact category after extraction. */
|
||||||
|
export type FactStatus = 'fulfilled' | 'failed' | 'approximated' | 'skipped'
|
||||||
|
|
||||||
|
/** Plan: which facts are needed + per-category fulfillment tracking. */
|
||||||
|
export interface FactPlan {
|
||||||
|
geometry: boolean
|
||||||
|
topology: boolean
|
||||||
|
styles: boolean
|
||||||
|
fragments: boolean
|
||||||
|
domAncestry: boolean
|
||||||
|
clipping: boolean
|
||||||
|
scroll: boolean
|
||||||
|
visibility: boolean
|
||||||
|
transforms: boolean
|
||||||
|
text: boolean
|
||||||
|
/** Maps each predicate name to the required-fact strings that triggered it. */
|
||||||
|
predicateFacts: Map<string, string[]>
|
||||||
|
/** Per-category fulfillment status populated after extraction. */
|
||||||
|
fulfillment: Partial<Record<FactCategory, FactStatus>>
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createEmptyFactPlan(): FactPlan {
|
||||||
|
return {
|
||||||
|
geometry: true,
|
||||||
|
topology: false,
|
||||||
|
styles: false,
|
||||||
|
fragments: false,
|
||||||
|
domAncestry: false,
|
||||||
|
clipping: false,
|
||||||
|
scroll: false,
|
||||||
|
visibility: false,
|
||||||
|
transforms: false,
|
||||||
|
text: false,
|
||||||
|
predicateFacts: new Map(),
|
||||||
|
fulfillment: {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map a required-fact string to the FactCategory it implies.
|
||||||
|
* Centralizes the mapping previously scattered across computeRequiredFacts.
|
||||||
|
*/
|
||||||
|
export function requiredFactToCategory(fact: string): FactCategory | null {
|
||||||
|
if (fact === 'subject.primaryBox' || fact === 'reference.primaryBox') {
|
||||||
|
return FactCategory.Geometry
|
||||||
|
}
|
||||||
|
if (fact.endsWith('.clipChain')) {
|
||||||
|
return FactCategory.Clipping
|
||||||
|
}
|
||||||
|
if (fact.startsWith('topology.')) {
|
||||||
|
const sub = fact.slice('topology.'.length)
|
||||||
|
if (sub === 'stackingContextOf') return FactCategory.Topology
|
||||||
|
if (sub === 'scrollContainerOf') return FactCategory.Scroll
|
||||||
|
if (sub === 'clippingRootOf') return FactCategory.Clipping
|
||||||
|
return FactCategory.Topology
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a FactPlan from a set of required-fact strings collected from all
|
||||||
|
* predicates in the formula AST.
|
||||||
|
*/
|
||||||
|
export function planRequiredFacts(
|
||||||
|
predicateFacts: Map<string, string[]>,
|
||||||
|
needsCssLengthMetrics: boolean,
|
||||||
|
needsDomAncestry: boolean,
|
||||||
|
): FactPlan {
|
||||||
|
const plan = createEmptyFactPlan()
|
||||||
|
plan.predicateFacts = predicateFacts
|
||||||
|
plan.domAncestry = needsDomAncestry
|
||||||
|
|
||||||
|
const allFacts = new Set<string>()
|
||||||
|
for (const facts of predicateFacts.values()) {
|
||||||
|
for (const f of facts) allFacts.add(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const f of allFacts) {
|
||||||
|
const cat = requiredFactToCategory(f)
|
||||||
|
if (cat === null) continue
|
||||||
|
switch (cat) {
|
||||||
|
case FactCategory.Geometry: break
|
||||||
|
case FactCategory.Topology: plan.topology = true; break
|
||||||
|
case FactCategory.Clipping: plan.clipping = true; break
|
||||||
|
case FactCategory.Scroll: plan.scroll = true; break
|
||||||
|
case FactCategory.Styles: plan.styles = true; break
|
||||||
|
case FactCategory.Fragments: plan.fragments = true; break
|
||||||
|
case FactCategory.Visibility: plan.visibility = true; break
|
||||||
|
case FactCategory.Transforms: plan.transforms = true; break
|
||||||
|
case FactCategory.Text: plan.text = true; break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (needsCssLengthMetrics) plan.styles = true
|
||||||
|
|
||||||
|
return plan
|
||||||
|
}
|
||||||
|
|||||||
@@ -58,6 +58,10 @@ import {
|
|||||||
isUnaryPredicate,
|
isUnaryPredicate,
|
||||||
isVariableArityPredicate,
|
isVariableArityPredicate,
|
||||||
getPredicateSpec,
|
getPredicateSpec,
|
||||||
|
planRequiredFacts,
|
||||||
|
FactCategory,
|
||||||
|
type FactPlan,
|
||||||
|
type FactStatus,
|
||||||
} from 'imhotep-core'
|
} from 'imhotep-core'
|
||||||
import { buildGeometryWorld } from './world-builder.js'
|
import { buildGeometryWorld } from './world-builder.js'
|
||||||
import {
|
import {
|
||||||
@@ -201,23 +205,16 @@ export function formulaNeedsCssLengthMetrics(formula: FormulaNode): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the minimal set of facts required by a collection of formulas.
|
* Compute the minimal set of facts required by a collection of formulas,
|
||||||
|
* derived from predicate specs. Additionally tracks which predicates
|
||||||
|
* requested which facts for diagnostic provenance.
|
||||||
*/
|
*/
|
||||||
export function computeRequiredFacts(formulas: FormulaNode[]): {
|
export function computeRequiredFacts(formulas: FormulaNode[]): FactPlan {
|
||||||
geometry: boolean
|
const predicateFacts = new Map<string, string[]>()
|
||||||
topology: boolean
|
|
||||||
styles: boolean
|
|
||||||
fragments: boolean
|
|
||||||
domAncestry: boolean
|
|
||||||
} {
|
|
||||||
const facts = new Set<string>()
|
|
||||||
for (const formula of formulas) {
|
for (const formula of formulas) {
|
||||||
const predicates = collectPredicates(formula)
|
const predicates = collectPredicates(formula)
|
||||||
for (const p of predicates) {
|
for (const p of predicates) {
|
||||||
const required = getPredicateRequiredFacts(p)
|
predicateFacts.set(p, getPredicateRequiredFacts(p))
|
||||||
for (const f of required) {
|
|
||||||
facts.add(f)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -244,15 +241,7 @@ export function computeRequiredFacts(formulas: FormulaNode[]): {
|
|||||||
return found
|
return found
|
||||||
})
|
})
|
||||||
|
|
||||||
const needsTopology = Array.from(facts).some((f) => f.startsWith('topology.') || f.endsWith('.clipChain'))
|
return planRequiredFacts(predicateFacts, needsCssLengthMetrics, needsDomAncestry)
|
||||||
|
|
||||||
return {
|
|
||||||
geometry: true, // Always required for subject resolution
|
|
||||||
topology: needsTopology,
|
|
||||||
styles: facts.has('styles') || facts.has('computedStyle') || needsCssLengthMetrics,
|
|
||||||
fragments: facts.has('subject.fragmentCount') || facts.has('subject.firstFragmentId'),
|
|
||||||
domAncestry: needsDomAncestry,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function usesLayoutSpace(formula: FormulaNode): boolean {
|
export function usesLayoutSpace(formula: FormulaNode): boolean {
|
||||||
@@ -396,7 +385,7 @@ export function attachMeasuredChWidths(
|
|||||||
export async function extractWorldFastGeometry(
|
export async function extractWorldFastGeometry(
|
||||||
playwrightPage: Page,
|
playwrightPage: Page,
|
||||||
selectors: string[],
|
selectors: string[],
|
||||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
requiredFacts?: FactPlan,
|
||||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||||
interface FastExtractedElement {
|
interface FastExtractedElement {
|
||||||
tagName: string
|
tagName: string
|
||||||
@@ -728,6 +717,14 @@ export async function extractWorldFastGeometry(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (requiredFacts) {
|
||||||
|
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
|
||||||
|
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
|
||||||
|
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
|
||||||
|
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
|
||||||
|
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
|
||||||
|
if (requiredFacts.transforms) requiredFacts.fulfillment[FactCategory.Transforms] = 'fulfilled'
|
||||||
|
}
|
||||||
return { world, selectorToIds, errors }
|
return { world, selectorToIds, errors }
|
||||||
} finally {
|
} finally {
|
||||||
await playwrightPage.evaluate(() => {
|
await playwrightPage.evaluate(() => {
|
||||||
@@ -804,7 +801,7 @@ function remapTopologyIds(world: GeometryWorld, topologySubjectIds?: number[]):
|
|||||||
export async function extractWorldCdp(
|
export async function extractWorldCdp(
|
||||||
playwrightPage: Page,
|
playwrightPage: Page,
|
||||||
selectors: string[],
|
selectors: string[],
|
||||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
requiredFacts?: FactPlan,
|
||||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||||
const errors: ImhotepDiagnostic[] = []
|
const errors: ImhotepDiagnostic[] = []
|
||||||
const selectorToNodeIds = new Map<string, number[]>()
|
const selectorToNodeIds = new Map<string, number[]>()
|
||||||
@@ -912,6 +909,15 @@ export async function extractWorldCdp(
|
|||||||
selectorToIds.set(selectorKey, matches)
|
selectorToIds.set(selectorKey, matches)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (requiredFacts) {
|
||||||
|
requiredFacts.fulfillment[FactCategory.Geometry] = 'fulfilled'
|
||||||
|
if (requiredFacts.topology) requiredFacts.fulfillment[FactCategory.Topology] = 'fulfilled'
|
||||||
|
if (requiredFacts.styles) requiredFacts.fulfillment[FactCategory.Styles] = 'fulfilled'
|
||||||
|
if (requiredFacts.fragments) requiredFacts.fulfillment[FactCategory.Fragments] = 'fulfilled'
|
||||||
|
if (requiredFacts.domAncestry) requiredFacts.fulfillment[FactCategory.DomAncestry] = 'fulfilled'
|
||||||
|
if (requiredFacts.clipping) requiredFacts.fulfillment[FactCategory.Clipping] = 'fulfilled'
|
||||||
|
if (requiredFacts.scroll) requiredFacts.fulfillment[FactCategory.Scroll] = 'fulfilled'
|
||||||
|
}
|
||||||
return { world, selectorToIds, errors }
|
return { world, selectorToIds, errors }
|
||||||
} finally {
|
} finally {
|
||||||
await playwrightPage.evaluate(() => {
|
await playwrightPage.evaluate(() => {
|
||||||
@@ -935,7 +941,7 @@ export async function extractWorld(
|
|||||||
playwrightPage: Page,
|
playwrightPage: Page,
|
||||||
selectors: string[],
|
selectors: string[],
|
||||||
cacheDir?: string | null,
|
cacheDir?: string | null,
|
||||||
requiredFacts?: { geometry: boolean; topology: boolean; styles: boolean; fragments: boolean; domAncestry: boolean },
|
requiredFacts?: FactPlan,
|
||||||
allowFastGeometry = false,
|
allowFastGeometry = false,
|
||||||
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
): Promise<{ world: GeometryWorld; selectorToIds: Map<string, number[]>; errors: ImhotepDiagnostic[] }> {
|
||||||
async function resolveViewport(): Promise<{ width: number; height: number }> {
|
async function resolveViewport(): Promise<{ width: number; height: number }> {
|
||||||
|
|||||||
Reference in New Issue
Block a user