v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

This commit is contained in:
John Dvorak
2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
{
"name": "imhotep-reporter",
"version": "1.0.0",
"type": "module",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/anomalyco/imhotep.git"
},
"engines": {
"node": ">=18.0.0"
},
"main": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"scripts": {
"build": "tsc",
"test": "node --test dist/**/*.test.js"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
},
"dependencies": {
"imhotep-core": "^1.0.0"
}
}
@@ -0,0 +1,335 @@
/**
* Canonical failure formatter.
*
* Transforms raw ImhotepResult diagnostics into narrative reports with
* five sections per failing clause:
* - Expected: human-readable assertion intent
* - Observed: measured metrics that caused failure
* - Why this matters: one-sentence semantic impact
* - Replay: reproduction command or seed/case info
* - Next: concrete next steps to investigate/fix
*
* Supports both human-readable text and structured JSON output.
*/
import type { ImhotepResult, ClauseResult, ClauseStatus } from 'imhotep-core';
import type { Diagnostic } from './diagnostics.js';
/**
* A single canonical failure entry with all five required sections.
*/
export interface CanonicalFailureEntry {
/** Human-readable assertion intent (from clauseLabel). */
expected: string;
/** Measured metrics that caused the failure. */
observed: string;
/** One-sentence explanation of the semantic impact. */
why: string;
/** Reproduction command or seed/case info. */
replay: string;
/** Concrete next steps to investigate or fix. */
next: string;
}
/**
* The complete canonical report, either as structured data or rendered string.
*/
export interface CanonicalReport {
/** Overall pass/fail status. */
passed: boolean;
/** Number of failing clauses. */
failureCount: number;
/** Individual failure entries. */
failures: CanonicalFailureEntry[];
}
/**
* Options controlling canonical formatter output.
*/
export interface CanonicalFormatOptions {
/** Output format: 'text' for terminal, 'json' for machine. */
format?: 'text' | 'json';
/** Max line width for text wrapping. */
maxWidth?: number;
/** Include seed in replay section when available. */
seed?: number;
}
// ---------------------------------------------------------------------------
// Failure Type Detection
// ---------------------------------------------------------------------------
function detectFailureType(clause: ClauseResult, diagnostic: Diagnostic | undefined): 'relation' | 'size' | 'cardinality' | 'parse' | 'selector' | 'unknown' {
const code = diagnostic?.code ?? '';
if (code.startsWith('IMH_RELATION_') || code === 'IMH_ALIGNMENT_FAILED' || code === 'IMH_PREDICATE_FAILED') {
return 'relation';
}
if (code.startsWith('IMH_SIZE_')) {
return 'size';
}
if (code.startsWith('IMH_CARDINALITY_')) {
return 'cardinality';
}
if (code.startsWith('IMH_PARSE_')) {
return 'parse';
}
if (code === 'IMH_SELECTOR_ZERO_MATCHES') {
return 'selector';
}
return 'unknown';
}
// ---------------------------------------------------------------------------
// Section Builders
// ---------------------------------------------------------------------------
function buildExpected(clause: ClauseResult, _diagnostic: Diagnostic | undefined, _type: string): string {
return clause.clauseLabel ?? `Clause ${clause.clauseId}`;
}
function buildObserved(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
const metrics = clause.metrics ?? {};
const diagMetrics = diagnostic?.metrics ?? {};
const allMetrics = { ...diagMetrics, ...metrics };
const parts: string[] = [];
if (type === 'relation') {
const gap = allMetrics.observedGap ?? allMetrics.gap;
const minGap = allMetrics.minGap;
if (gap !== undefined) parts.push(`measured gap is ${gap}px`);
if (minGap !== undefined) parts.push(`minimum required gap is ${minGap}px`);
} else if (type === 'size') {
const observed = allMetrics.observedWidth ?? allMetrics.observedHeight ?? allMetrics.observedSize;
const expected = allMetrics.minWidth ?? allMetrics.minHeight ?? allMetrics.minSize ?? allMetrics.expected;
const prop = allMetrics.observedWidth !== undefined ? 'width' : allMetrics.observedHeight !== undefined ? 'height' : 'size';
if (observed !== undefined) parts.push(`${prop} is ${observed}px`);
if (expected !== undefined) parts.push(`expected ${prop} is ${expected}px`);
} else if (type === 'cardinality') {
const observed = allMetrics.observedCount;
const expected = allMetrics.expectedCount;
if (observed !== undefined) parts.push(`found ${observed} element(s)`);
if (expected !== undefined) parts.push(`expected ${expected} element(s)`);
} else if (type === 'parse') {
parts.push(diagnostic?.message ?? 'parse error occurred');
} else if (type === 'selector') {
parts.push(`selector resolved to 0 elements`);
} else {
if (Object.keys(allMetrics).length > 0) {
parts.push(
Object.entries(allMetrics)
.map(([k, v]) => `${k}=${v}`)
.join(', '),
);
}
if (parts.length === 0) {
parts.push(diagnostic?.message ?? 'failure details unavailable');
}
}
return parts.join('; ');
}
function buildWhy(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
const selector = diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
switch (type) {
case 'relation':
return `The spatial relationship between elements is violated, breaking layout expectations for "${selector}".`;
case 'size':
return `The element "${selector}" does not meet size constraints, which may cause overflow or clipping.`;
case 'cardinality':
return `The expected number of elements for "${selector}" was not found, indicating a missing or duplicate component.`;
case 'parse':
return `The assertion could not be parsed, so Imhotep cannot evaluate the intended contract.`;
case 'selector':
return `The selector "${selector}" matched nothing, so the assertion has no subject to evaluate.`;
default:
return `The assertion failed, indicating a contract violation or extraction problem.`;
}
}
function buildReplay(clause: ClauseResult, _diagnostic: Diagnostic | undefined, type: string, seed?: number): string {
const parts: string[] = [];
if (seed !== undefined) {
parts.push(`seed: ${seed}`);
}
parts.push(`clause: ${clause.clauseId}`);
if (_diagnostic?.sourceRef?.line !== undefined) {
parts.push(`line: ${_diagnostic.sourceRef.line}`);
}
if (_diagnostic?.sourceRef?.column !== undefined) {
parts.push(`column: ${_diagnostic.sourceRef.column}`);
}
if (type === 'selector' || type === 'cardinality') {
const selector = _diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
parts.push(`selector: "${selector}"`);
}
return parts.join(', ');
}
function buildNext(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
const hints = diagnostic?.fixHints ?? [];
if (hints.length > 0) {
return hints.join(' ');
}
switch (type) {
case 'relation':
return `Inspect the layout in the browser devtools and adjust element positions or gap thresholds.`;
case 'size':
return `Check the element dimensions with ui.extract() and adjust the expected size or CSS.`;
case 'cardinality':
return `Verify the selector matches the intended elements; use ui.extract() to debug.`;
case 'parse':
return `Fix the assertion syntax: use single-quoted selectors and valid relation keywords.`;
case 'selector':
return `Verify the selector is correct and the element exists in the DOM at evaluation time.`;
default:
return `Review the diagnostic message and metrics to determine the root cause.`;
}
}
function extractSelectorFromLabel(label: string | undefined): string {
if (!label) return 'unknown';
const match = label.match(/'([^']+)'/);
return match?.[1] ?? 'unknown';
}
// ---------------------------------------------------------------------------
// Report Assembly
// ---------------------------------------------------------------------------
function buildCanonicalReport(result: ImhotepResult, options?: CanonicalFormatOptions): CanonicalReport {
const failures: CanonicalFailureEntry[] = [];
for (const clause of result.clauseResults) {
if (clause.status !== 'fail' && clause.status !== 'error') {
continue;
}
// Find the primary diagnostic for this clause.
let diagnostic: Diagnostic | undefined;
const diagCode = clause.diagnostics?.[0];
if (diagCode) {
diagnostic = result.diagnostics.find(
(d: any) => d.code === diagCode && (d.clauseId === clause.clauseId || !d.clauseId),
) as Diagnostic | undefined;
}
if (!diagnostic) {
diagnostic = result.diagnostics.find((d: any) => d.clauseId === clause.clauseId) as Diagnostic | undefined;
}
const type = detectFailureType(clause, diagnostic);
failures.push({
expected: buildExpected(clause, diagnostic, type),
observed: buildObserved(clause, diagnostic, type),
why: buildWhy(clause, diagnostic, type),
replay: buildReplay(clause, diagnostic, type, options?.seed),
next: buildNext(clause, diagnostic, type),
});
}
return {
passed: result.passed,
failureCount: failures.length,
failures,
};
}
// ---------------------------------------------------------------------------
// Text Rendering (80-char wrapping)
// ---------------------------------------------------------------------------
function wrapLine(line: string, maxWidth: number): string[] {
if (line.length <= maxWidth) return [line];
const words = line.split(' ');
const lines: string[] = [];
let current = '';
for (const word of words) {
if (current.length + word.length + 1 > maxWidth) {
lines.push(current);
current = word;
} else {
current = current ? `${current} ${word}` : word;
}
}
if (current) lines.push(current);
return lines;
}
function renderEntryText(entry: CanonicalFailureEntry, maxWidth: number): string {
const lines: string[] = [];
lines.push('');
lines.push('─'.repeat(maxWidth));
lines.push('');
const sections = [
{ label: 'Expected', value: entry.expected },
{ label: 'Observed', value: entry.observed },
{ label: 'Why this matters', value: entry.why },
{ label: 'Replay', value: entry.replay },
{ label: 'Next', value: entry.next },
];
for (const section of sections) {
lines.push(`${section.label}:`);
const wrapped = wrapLine(section.value, maxWidth - 2);
for (const w of wrapped) {
lines.push(` ${w}`);
}
lines.push('');
}
return lines.join('\n');
}
export function renderCanonicalText(result: ImhotepResult, options?: CanonicalFormatOptions): string {
const report = buildCanonicalReport(result, options);
const maxWidth = options?.maxWidth ?? 80;
if (report.failureCount === 0) {
return 'All checks passed. No failures to report.';
}
const lines: string[] = [];
lines.push('='.repeat(maxWidth));
lines.push('IMHOTEP CANONICAL FAILURE REPORT');
lines.push(`${report.failureCount} failure(s) across ${result.clauseResults.length} clause(s)`);
lines.push('='.repeat(maxWidth));
for (const entry of report.failures) {
lines.push(renderEntryText(entry, maxWidth));
}
return lines.join('\n');
}
// ---------------------------------------------------------------------------
// JSON Rendering
// ---------------------------------------------------------------------------
export function renderCanonicalJson(result: ImhotepResult, options?: CanonicalFormatOptions): string {
const report = buildCanonicalReport(result, options);
return JSON.stringify(report, null, 2);
}
// ---------------------------------------------------------------------------
// Main Entry Point
// ---------------------------------------------------------------------------
/**
* Format an ImhotepResult into a canonical failure report.
*
* @param result - The evaluation result from checkAll or property runs.
* @param options - Format options (text or json, wrapping, seed).
* @returns A string containing the formatted report.
*/
export function formatCanonical(result: ImhotepResult, options?: CanonicalFormatOptions): string {
const format = options?.format ?? 'text';
if (format === 'json') {
return renderCanonicalJson(result, options);
}
return renderCanonicalText(result, options);
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,423 @@
/**
* Diagnostic object creation and formatting.
*
* Diagnostics carry codes, positions, related facts, and fix hints.
* They map proof outcomes back to author source spans.
*
* Extended for V1.1 with property-run diagnostics (sampled runs,
* seeds, counterexamples) to support Riley's 2 AM replay workflow.
*/
import type { TraceEvent, TraceRefs } from './traces.js';
import {
IMH_RELATION_LEFT_OF_FAILED,
IMH_RELATION_RIGHT_OF_FAILED,
IMH_RELATION_ABOVE_FAILED,
IMH_RELATION_BELOW_FAILED,
IMH_RELATION_ALIGNED_FAILED,
IMH_RELATION_CENTERED_FAILED,
IMH_FACT_OBSERVED_GAP,
lookupCode,
} from './codes.js';
/**
* Severity of a diagnostic.
*/
export type Severity = 'error' | 'warning' | 'info';
/**
* High-level category for routing and suggestion logic.
*/
export type DiagnosticCategory =
| 'parse-error'
| 'validation-error'
| 'resolution-error'
| 'extraction-error'
| 'contract-failure'
| 'indeterminate-result'
| 'internal-error';
/**
* Source position inside the original authoring document.
*/
export interface Position {
start: { line: number; column: number; offset: number };
end: { line: number; column: number; offset: number };
}
/**
* A fact that is related to the diagnostic but not the primary cause.
*/
export interface RelatedFact {
code?: string;
message: string;
position?: Position;
}
/**
* A fix hint is an actionable string the user can follow.
*/
export type FixHint = string;
/**
* Canonical source reference for a diagnostic.
*/
export interface SourceRef {
file?: string;
line?: number;
column?: number;
selector?: string;
}
/**
* Automated fix suggestion attached to a diagnostic.
*/
export interface SuggestedFix {
action: string;
target: string;
value: string;
rationale: string;
}
/**
* A rich diagnostic object.
*/
export interface Diagnostic {
diagnosticId: string;
code: string;
severity: Severity;
category: DiagnosticCategory;
message: string;
position?: Position;
source?: string;
clauseId?: string;
sceneId?: string;
snapshotId?: string;
envCaseId?: string;
/** Subject resolution origin for semantic selectors (Stream 7). */
subjectOrigin?: string;
related: RelatedFact[];
traceRef?: string;
fixHints: FixHint[];
/** V1 required: structured metrics associated with this diagnostic. */
metrics: Record<string, number>;
/** V1 required: canonical source reference. */
sourceRef: SourceRef;
/** V1 optional: automated fix suggestion. */
suggestedFix?: SuggestedFix;
}
// ---------------------------------------------------------------------------
// Sampled-Run Diagnostic Types (V1.1)
// ---------------------------------------------------------------------------
/**
* A diagnostic produced during a sampled or enumerated property run.
* Captures the seed, run index, input snapshot, and scene result so
* Riley can replay the exact failing input with a single seed number.
*/
export interface PropertyDiagnostic extends Diagnostic {
seed?: number;
runIndex?: number;
inputSnapshot?: unknown;
runResult?: 'pass' | 'fail' | 'error';
counterexample?: unknown;
shrunkCounterexample?: unknown;
}
/**
* Blueprint for building a PropertyDiagnostic.
*/
export interface PropertyDiagnosticBlueprint extends DiagnosticBlueprint {
seed?: number;
runIndex?: number;
inputSnapshot?: unknown;
runResult?: 'pass' | 'fail' | 'error';
counterexample?: unknown;
shrunkCounterexample?: unknown;
}
/**
* Build a PropertyDiagnostic from a blueprint.
*/
export function makePropertyDiagnostic(
blueprint: PropertyDiagnosticBlueprint,
deps: { idGen(): string },
): PropertyDiagnostic {
const base = makeDiagnostic(blueprint, deps);
return {
...base,
seed: blueprint.seed,
runIndex: blueprint.runIndex,
inputSnapshot: blueprint.inputSnapshot,
runResult: blueprint.runResult,
counterexample: blueprint.counterexample,
shrunkCounterexample: blueprint.shrunkCounterexample,
};
}
/**
* Inputs needed to build a diagnostic from a proof or system error.
* All optional fields are injected by the caller.
*/
export interface DiagnosticBlueprint {
code: string;
severity?: Severity;
category: DiagnosticCategory;
message: string;
position?: Position;
source?: string;
clauseId?: string;
sceneId?: string;
snapshotId?: string;
envCaseId?: string;
related?: RelatedFact[];
traceRef?: string;
fixHints?: FixHint[];
metrics?: Record<string, number>;
sourceRef?: SourceRef;
suggestedFix?: SuggestedFix;
}
/**
* Build a Diagnostic from a blueprint, applying safe defaults.
*/
export function makeDiagnostic(
blueprint: DiagnosticBlueprint,
deps: { idGen(): string },
): Diagnostic {
return {
diagnosticId: deps.idGen(),
code: blueprint.code,
severity: blueprint.severity ?? 'error',
category: blueprint.category,
message: blueprint.message,
position: blueprint.position,
source: blueprint.source,
clauseId: blueprint.clauseId,
sceneId: blueprint.sceneId,
snapshotId: blueprint.snapshotId,
envCaseId: blueprint.envCaseId,
related: blueprint.related ?? [],
traceRef: blueprint.traceRef,
fixHints: blueprint.fixHints ?? [],
metrics: blueprint.metrics ?? {},
sourceRef: blueprint.sourceRef ?? {},
};
}
/**
* Format a diagnostic into a single-line summary for quick scanning.
*/
export function formatDiagnosticCompact(d: Diagnostic): string {
const loc = d.position
? `${d.position.start.line}:${d.position.start.column}`
: 'unknown';
return `[${d.severity.toUpperCase()} ${d.code} @ ${loc}] ${d.message}`;
}
/**
* Format a diagnostic into a multi-line human-readable string.
*/
export function formatDiagnosticFull(d: Diagnostic): string {
const lines: string[] = [];
lines.push(`${d.severity.toUpperCase()}: ${d.message}`);
lines.push(` code: ${d.code}`);
if (d.position) {
lines.push(
` at: line ${d.position.start.line}, column ${d.position.start.column}`,
);
}
if (d.clauseId) {
lines.push(` clause: ${d.clauseId}`);
}
if (d.related.length > 0) {
lines.push(' related:');
for (const r of d.related) {
lines.push(` - ${r.message}`);
}
}
if (d.fixHints.length > 0) {
lines.push(' fix hints:');
for (const h of d.fixHints) {
lines.push(` - ${h}`);
}
}
return lines.join('\n');
}
/**
* Minimal proof shape expected by the reporter.
* The solver contract defines the full proof object.
*/
export interface ProofLike {
proofId: string;
clauseId: string;
outcome: 'pass' | 'fail';
truth: 'determinate' | 'indeterminate' | 'approximate';
failedPredicate?: { op: string; left: number; right: number };
witness?: {
subjectId?: number;
referenceId?: number;
envCaseId?: string;
snapshotId?: string;
};
}
/**
* Rich relation proof carrying all metadata needed for a detailed
* contract-failure diagnostic. Produced by the solver / pipeline
* when a spatial-relation clause fails.
*/
export interface RelationProof extends ProofLike {
/** CSS selector that identified the subject element. */
subjectSelector: string;
/** CSS selector that identified the reference element. */
referenceSelector: string;
/** Relation name, e.g. "leftOf", "above". */
relation: string;
/** Expected bound values (minGap, maxGap, etc.). */
expected: Record<string, number>;
/** Observed / measured values (observedGap, etc.). */
observed: Record<string, number>;
/** Border-box rect of the subject element. */
subjectRect: { left: number; top: number; right: number; bottom: number };
/** Border-box rect of the reference element. */
referenceRect: { left: number; top: number; right: number; bottom: number };
/** Frame identifier (viewport, containing-block, etc.). */
frame: string;
}
const RELATION_TO_CODE: Record<string, string> = {
leftOf: IMH_RELATION_LEFT_OF_FAILED,
rightOf: IMH_RELATION_RIGHT_OF_FAILED,
above: IMH_RELATION_ABOVE_FAILED,
below: IMH_RELATION_BELOW_FAILED,
alignedWith: IMH_RELATION_ALIGNED_FAILED,
centeredWithin: IMH_RELATION_CENTERED_FAILED,
};
/**
* Build a rich relation-specific diagnostic from a RelationProof.
*
* Includes measured values, expected bounds, element rects, and
* frame context so the author knows exactly what failed and why.
*/
export function buildRelationDiagnostic(
proof: RelationProof,
deps: { idGen(): string },
): Diagnostic {
const code = RELATION_TO_CODE[proof.relation] ?? lookupCode(IMH_RELATION_LEFT_OF_FAILED)?.code ?? 'IMH_RELATION_FAILED';
const parts: string[] = [
`Relation "${proof.relation}" failed for subject "${proof.subjectSelector}" vs reference "${proof.referenceSelector}".`,
];
if (proof.observed.observedGap !== undefined && proof.expected.minGap !== undefined) {
parts.push(`Measured gap is ${proof.observed.observedGap}px, but minimum required gap is ${proof.expected.minGap}px.`);
}
if (proof.observed.observedGap !== undefined && proof.expected.maxGap !== undefined) {
parts.push(`Maximum allowed gap is ${proof.expected.maxGap}px.`);
}
const related: RelatedFact[] = [
{
code: IMH_FACT_OBSERVED_GAP,
message: `Subject rect: [${fmtRect(proof.subjectRect)}]`,
},
{
code: IMH_FACT_OBSERVED_GAP,
message: `Reference rect: [${fmtRect(proof.referenceRect)}]`,
},
{
code: 'IMH_FRAME_CONTEXT',
message: `Frame: ${proof.frame}`,
},
];
if (proof.failedPredicate) {
related.push({
code: 'IMH_PREDICATE_FAILED',
message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
});
}
return makeDiagnostic(
{
code,
category: 'contract-failure',
message: parts.join(' '),
clauseId: proof.clauseId,
related,
fixHints: [
`Check the layout in frame "${proof.frame}".`,
`Subject: ${proof.subjectSelector} at [${fmtRect(proof.subjectRect)}]`,
`Reference: ${proof.referenceSelector} at [${fmtRect(proof.referenceRect)}]`,
],
},
deps,
);
}
function fmtRect(r: { left: number; top: number; right: number; bottom: number }): string {
return `left=${r.left} top=${r.top} right=${r.right} bottom=${r.bottom}`;
}
/**
* Create a contract-failure diagnostic from a proof object.
* The proof shape is defined by the solver contract.
*/
export function diagnosticFromProof(
proof: ProofLike,
deps: {
idGen(): string;
codeForClause(clauseId: string): string;
messageForClause(clauseId: string, proof: ProofLike): string;
fixHintsForClause(clauseId: string, proof: ProofLike): string[];
},
): Diagnostic | null {
if (proof.outcome !== 'fail') {
return null;
}
const category: DiagnosticCategory =
proof.truth === 'indeterminate'
? 'indeterminate-result'
: 'contract-failure';
const related: RelatedFact[] = [];
if (proof.failedPredicate) {
related.push({
code: 'IMH_PREDICATE_FAILED',
message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
});
}
if (proof.witness) {
if (proof.witness.envCaseId) {
related.push({
code: 'IMH_WITNESS_ENV',
message: `Environment case: ${proof.witness.envCaseId}`,
});
}
if (proof.witness.snapshotId) {
related.push({
code: 'IMH_WITNESS_SNAPSHOT',
message: `Snapshot: ${proof.witness.snapshotId}`,
});
}
}
return makeDiagnostic(
{
code: deps.codeForClause(proof.clauseId),
category,
message: deps.messageForClause(proof.clauseId, proof),
clauseId: proof.clauseId,
envCaseId: proof.witness?.envCaseId,
snapshotId: proof.witness?.snapshotId,
related,
fixHints: deps.fixHintsForClause(proof.clauseId, proof),
},
deps,
);
}
@@ -0,0 +1,260 @@
/**
* Pattern-matching failure analyzer for Imhotep diagnostics.
*
* Maps failure codes + observed metrics to likely causes and concrete fixes.
* Fail-closed: unknown failures return undefined (no analysis attached).
*
* Inspired by Apophis cross-pollination research (analyzeFailure pipeline).
*/
/**
* Structured analysis attached to a diagnostic when a pattern rule matches.
*/
export interface FailureAnalysis {
/** Human-readable classification of the root cause. */
likelyCause: string;
/** Ordered list of concrete fixes the user can apply. */
suggestedFixes: string[];
/** Severity override from the rule (may differ from diagnostic severity). */
severity: 'error' | 'warning' | 'info';
}
/**
* Input context passed to each rule. Rules inspect the failure code,
* the diagnostic message, and any metrics extracted from the proof.
*/
export interface FailureContext {
code: string;
message: string;
metrics: Record<string, number>;
category?: string;
}
/**
* A single pattern rule: predicate + analyzer factory.
*/
export interface FailureRule {
/** Unique rule identifier for debugging and telemetry. */
ruleId: string;
/** Returns true when this rule applies to the given context. */
matches(ctx: FailureContext): boolean;
/** Produces the analysis for a matched context. */
analyze(ctx: FailureContext): FailureAnalysis;
}
// ---------------------------------------------------------------------------
// Built-in pattern rules
// ---------------------------------------------------------------------------
/** Extract a numeric metric by key, returning undefined if missing or NaN. */
function getMetric(metrics: Record<string, number>, key: string): number | undefined {
const v = metrics[key];
if (typeof v !== 'number' || Number.isNaN(v)) return undefined;
return v;
}
/** Parse gap from a diagnostic message as fallback when metrics are absent. */
function parseGapFromMessage(message: string): number | undefined {
const m = message.match(/gap\s+is\s+(-?[\d.]+)px/i);
if (!m) return undefined;
const v = parseFloat(m[1]);
return Number.isNaN(v) ? undefined : v;
}
/** Parse width from a diagnostic message as fallback when metrics are absent. */
function parseWidthFromMessage(message: string): number | undefined {
const m = message.match(/width\s+is\s+(-?[\d.]+)px/i);
if (!m) return undefined;
const v = parseFloat(m[1]);
return Number.isNaN(v) ? undefined : v;
}
/** Parse observed count from cardinality messages. */
function parseCountFromMessage(message: string): number | undefined {
const m = message.match(/resolved to\s+(\d+)\s+element/i);
if (!m) return undefined;
return parseInt(m[1], 10);
}
const RULE_LEFT_OF_OVERLAP: FailureRule = {
ruleId: 'leftOfOverlap',
matches(ctx) {
if (ctx.code !== 'IMH_RELATION_LEFT_OF_FAILED') return false;
const gap = getMetric(ctx.metrics, 'observedGap') ?? parseGapFromMessage(ctx.message);
return gap !== undefined && gap < 0;
},
analyze(_ctx) {
return {
likelyCause: 'Horizontal overlap',
suggestedFixes: [
'Check that the subject element has a positive horizontal margin from the reference.',
'Verify neither element is using negative margins or float that causes overlap.',
'Consider adding clearfix or adjusting flex/grid gap settings.',
],
severity: 'error',
};
},
};
const RULE_INSIDE_OVERFLOW: FailureRule = {
ruleId: 'insideOverflow',
matches(ctx) {
if (ctx.code !== 'IMH_RELATION_INSIDE_FAILED') return false;
const subjectW = getMetric(ctx.metrics, 'subjectWidth');
const subjectH = getMetric(ctx.metrics, 'subjectHeight');
const containerW = getMetric(ctx.metrics, 'containerWidth');
const containerH = getMetric(ctx.metrics, 'containerHeight');
if (subjectW !== undefined && containerW !== undefined && subjectW > containerW) return true;
if (subjectH !== undefined && containerH !== undefined && subjectH > containerH) return true;
// Fallback: detect overflow from message heuristics
if (ctx.message.toLowerCase().includes('overflow') || ctx.message.toLowerCase().includes('larger')) return true;
return false;
},
analyze(_ctx) {
return {
likelyCause: 'Overflow or box-sizing issue',
suggestedFixes: [
'Check padding on the container — subject may be larger than content-box allows.',
'Ensure box-sizing: border-box is applied so padding does not increase total size.',
'Verify the subject dimensions do not exceed the container inner width/height.',
],
severity: 'error',
};
},
};
const RULE_SIZE_TOUCH_TARGET: FailureRule = {
ruleId: 'sizeTouchTarget',
matches(ctx) {
if (ctx.code !== 'IMH_SIZE_AT_LEAST_FAILED') return false;
const width = getMetric(ctx.metrics, 'observedWidth') ?? parseWidthFromMessage(ctx.message);
return width !== undefined && width < 44;
},
analyze(_ctx) {
return {
likelyCause: 'Touch target too small',
suggestedFixes: [
'Increase min-width to at least 44px to meet WCAG 2.5.5 / mobile accessibility guidelines.',
'Check that padding or border is not collapsing the clickable area.',
'Consider using a larger font size or icon scale if the element is text-based.',
],
severity: 'warning',
};
},
};
const RULE_CARDINALITY_MISSING: FailureRule = {
ruleId: 'cardinalityMissing',
matches(ctx) {
if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
return count === 0;
},
analyze(_ctx) {
return {
likelyCause: 'Missing element',
suggestedFixes: [
'Verify the selector matches an element that is actually rendered in the DOM.',
'Check for conditional rendering that may hide the element in this state.',
'Use ui.extract(selector) to debug what the page currently contains.',
],
severity: 'error',
};
},
};
const RULE_CARDINALITY_DUPLICATE: FailureRule = {
ruleId: 'cardinalityDuplicate',
matches(ctx) {
if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
return count !== undefined && count > 1;
},
analyze(_ctx) {
return {
likelyCause: 'Duplicate matches',
suggestedFixes: [
'Check for repeated data-testid or class names across sibling elements.',
'Scope the selector more tightly (e.g. add a parent prefix).',
'Use a semantic selector (getByRole, getByLabelText) for disambiguation.',
],
severity: 'error',
};
},
};
const RULE_SELECTOR_ZERO_MATCHES: FailureRule = {
ruleId: 'selectorZeroMatches',
matches(ctx) {
return ctx.code === 'IMH_SELECTOR_ZERO_MATCHES';
},
analyze(_ctx) {
return {
likelyCause: 'Selector not found',
suggestedFixes: [
'Use ui.extract(selector) to verify the selector resolves to at least one element.',
'Check that the selector is valid CSS and the element is in the DOM.',
'For semantic selectors, confirm the accessible name or role is correct.',
],
severity: 'error',
};
},
};
/** Default rule set shipped with V1.0. */
export const DEFAULT_FAILURE_RULES: FailureRule[] = [
RULE_LEFT_OF_OVERLAP,
RULE_INSIDE_OVERFLOW,
RULE_SIZE_TOUCH_TARGET,
RULE_CARDINALITY_MISSING,
RULE_CARDINALITY_DUPLICATE,
RULE_SELECTOR_ZERO_MATCHES,
];
// ---------------------------------------------------------------------------
// Analyzer engine
// ---------------------------------------------------------------------------
/**
* Analyze a failure context against a set of rules.
*
* @param ctx — failure context (code, message, metrics)
* @param rules — rule set to evaluate (defaults to built-in rules)
* @returns FailureAnalysis if a rule matches, undefined otherwise (fail-closed)
*/
export function analyzeFailure(
ctx: FailureContext,
rules: FailureRule[] = DEFAULT_FAILURE_RULES,
): FailureAnalysis | undefined {
for (const rule of rules) {
if (rule.matches(ctx)) {
return rule.analyze(ctx);
}
}
return undefined;
}
/**
* Convenience: analyze a raw diagnostic-like object and attach the result
* as an `analysis` property.
*
* @param diagnostic — any object with code, message, metrics, and optional category
* @param rules — rule set to evaluate
* @returns the same object with `analysis` field added when a rule matches
*/
export function attachFailureAnalysis<T extends { code: string; message: string; metrics?: Record<string, number>; category?: string }>(
diagnostic: T,
rules: FailureRule[] = DEFAULT_FAILURE_RULES,
): T & { analysis?: FailureAnalysis } {
const ctx: FailureContext = {
code: diagnostic.code,
message: diagnostic.message,
metrics: diagnostic.metrics ?? {},
category: diagnostic.category,
};
const analysis = analyzeFailure(ctx, rules);
if (analysis) {
return { ...diagnostic, analysis };
}
return diagnostic;
}
+120
View File
@@ -0,0 +1,120 @@
/**
* Human-readable reporter output.
*
* Turns diagnostics, traces, and shrink results into plain text
* suitable for terminal reading.
*/
import type { Diagnostic } from './diagnostics.js';
import type { TraceEvent } from './traces.js';
import type { ShrinkResult } from './shrink.js';
/**
* Options for human formatting.
* Injected so callers control colors, verbosity, etc.
*/
export interface HumanFormatOptions {
// show trace events after each diagnostic
showTraces?: boolean;
// show shrink summary when available
showShrink?: boolean;
// max related facts to print
maxRelated?: number;
}
/**
* Render a list of diagnostics into a human-readable string.
*/
export function renderHumanReport(
diagnostics: Diagnostic[],
traces: readonly TraceEvent[],
shrinkResults: Map<string, ShrinkResult>,
opts: HumanFormatOptions = {},
): string {
const lines: string[] = [];
for (const d of diagnostics) {
lines.push(renderDiagnostic(d, opts));
if (opts.showShrink && d.clauseId && shrinkResults.has(d.clauseId)) {
const shrink = shrinkResults.get(d.clauseId)!;
lines.push(renderShrink(shrink));
}
if (opts.showTraces && d.traceRef) {
const relevant = traces.filter(
(t) => t.traceEventId === d.traceRef || t.refs.diagnosticId === d.diagnosticId,
);
if (relevant.length > 0) {
lines.push(' trace:');
for (const t of relevant) {
lines.push(` ${t.phase} at ${t.at}`);
}
}
}
}
return lines.join('\n');
}
/**
* Render a single diagnostic in human form.
*/
export function renderDiagnostic(
d: Diagnostic,
opts: HumanFormatOptions = {},
): string {
const lines: string[] = [];
const prefix = d.severity === 'error' ? '✖' : d.severity === 'warning' ? '⚠' : '';
lines.push(`${prefix} ${d.message}`);
lines.push(` ${d.code}`);
if (d.position) {
lines.push(
` at line ${d.position.start.line}, column ${d.position.start.column}`,
);
}
const maxRelated = opts.maxRelated ?? 5;
if (d.related.length > 0) {
lines.push(' related:');
for (const r of d.related.slice(0, maxRelated)) {
lines.push(`${r.message}`);
}
if (d.related.length > maxRelated) {
lines.push(` … and ${d.related.length - maxRelated} more`);
}
}
if (d.fixHints.length > 0) {
lines.push(' hints:');
for (const h of d.fixHints) {
lines.push(`${h}`);
}
}
if (d.suggestedFix) {
lines.push(' suggested fix:');
lines.push(` action: ${d.suggestedFix.action}`);
lines.push(` target: ${d.suggestedFix.target}`);
lines.push(` value: ${d.suggestedFix.value}`);
lines.push(` rationale: ${d.suggestedFix.rationale}`);
}
return lines.join('\n');
}
/**
* Render a shrink result summary.
*/
export function renderShrink(result: ShrinkResult): string {
const lines: string[] = [];
lines.push(' shrink:');
if (result.reduced) {
lines.push(` reduced across: ${result.axes.join(', ')}`);
lines.push(` steps: ${result.steps}`);
} else {
lines.push(' no reduction possible');
}
return lines.join('\n');
}
+133
View File
@@ -0,0 +1,133 @@
/**
* imhotep-reporter
*
* Diagnostics, traceability, and witness shrinking for Imhotep.
* Makes failures explainable with rich diagnostics, trace chains,
* and minimal failing witnesses.
*/
// Trace event model
export {
createTraceBuilder,
findClauseTraces,
traceChainForProof,
} from './traces.js';
export type {
TracePhase,
TraceRefs,
TraceEvent,
TraceBuilder,
} from './traces.js';
// Diagnostic objects and formatting
export {
makeDiagnostic,
formatDiagnosticCompact,
formatDiagnosticFull,
diagnosticFromProof,
} from './diagnostics.js';
export type {
Severity,
DiagnosticCategory,
Position,
RelatedFact,
FixHint,
Diagnostic,
DiagnosticBlueprint,
ProofLike,
PropertyDiagnostic,
PropertyDiagnosticBlueprint,
SourceRef,
SuggestedFix,
} from './diagnostics.js';
export {
makePropertyDiagnostic,
buildRelationDiagnostic,
} from './diagnostics.js';
export type {
RelationProof,
} from './diagnostics.js';
// Diagnostic code registry
export {
lookupCode,
listCodes,
formatMessage,
} from './codes.js';
export type {
CodeEntry,
Severity as CodeSeverity,
Category as CodeCategory,
} from './codes.js';
// Witness shrinking
export { shrinkWitness } from './shrink.js';
export type {
Witness,
ShrinkResult,
ShrinkAxis,
StillFails,
} from './shrink.js';
// Property-run diagnostics and replay (V1.1)
export {
buildPropertyDiagnostics,
buildEnumeratedPropertyDiagnostics,
buildShrunkPropertyDiagnostics,
} from './property-diagnostics.js';
export {
buildReplayMetadata,
buildReplayScript,
buildReplayFromResult,
replay,
} from './replay.js';
export type {
ReplayMetadata,
ReplayScript,
RunResult,
ReplayExecutor,
} from './replay.js';
// Human-readable reporter
export { renderHumanReport, renderDiagnostic, renderShrink } from './human.js';
export type { HumanFormatOptions } from './human.js';
// JSON reporter
export { renderJsonReport, buildJsonReport } from './json.js';
export type {
JsonFormatOptions,
JsonReport,
JsonDiagnostic,
} from './json.js';
// Suggestion engine
export {
createDefaultSuggestionRegistry,
attachSuggestions,
} from './suggestions.js';
export type { Suggestion, SuggestionRegistry } from './suggestions.js';
// Canonical failure formatter (Workstream J)
export {
formatCanonical,
renderCanonicalText,
renderCanonicalJson,
} from './canonical-formatter.js';
export type {
CanonicalFailureEntry,
CanonicalReport,
CanonicalFormatOptions,
} from './canonical-formatter.js';
// Failure analyzer (Workstream K)
export {
analyzeFailure,
attachFailureAnalysis,
DEFAULT_FAILURE_RULES,
} from './failure-analyzer.js';
export type {
FailureAnalysis,
FailureContext,
FailureRule,
} from './failure-analyzer.js';
+132
View File
@@ -0,0 +1,132 @@
/**
* JSON reporter output.
*
* Produces a structured JSON representation of the full evaluation
* result for CI systems, editors, and other tooling.
*/
import type { Diagnostic, SourceRef, SuggestedFix } from './diagnostics.js';
import type { TraceEvent } from './traces.js';
import type { ShrinkResult } from './shrink.js';
/**
* Options for JSON formatting.
*/
export interface JsonFormatOptions {
// indent size; null means no pretty-printing
indent?: number | null;
// include trace events in output
includeTraces?: boolean;
// include shrink details
includeShrink?: boolean;
}
/**
* The top-level JSON report shape.
*/
export interface JsonReport {
version: 1;
summary: {
totalDiagnostics: number;
errorCount: number;
warningCount: number;
infoCount: number;
};
diagnostics: JsonDiagnostic[];
traces?: TraceEvent[];
shrinkResults?: Record<string, ShrinkResult>;
}
/**
* A diagnostic serialized for JSON.
*/
export interface JsonDiagnostic {
diagnosticId: string;
code: string;
severity: string;
category: string;
message: string;
position?: Diagnostic['position'];
source?: string;
clauseId?: string;
sceneId?: string;
snapshotId?: string;
envCaseId?: string;
related: { code?: string; message: string; position?: Diagnostic['position'] }[];
traceRef?: string;
fixHints: string[];
metrics: Record<string, number>;
sourceRef: SourceRef;
suggestedFix?: SuggestedFix;
}
/**
* Render the complete report as a JSON string.
*/
export function renderJsonReport(
diagnostics: Diagnostic[],
traces: readonly TraceEvent[],
shrinkResults: Map<string, ShrinkResult>,
opts: JsonFormatOptions = {},
): string {
const report = buildJsonReport(diagnostics, traces, shrinkResults, opts);
const space = opts.indent === null ? undefined : opts.indent ?? 2;
return JSON.stringify(report, null, space);
}
/**
* Build the report object without stringifying.
*/
export function buildJsonReport(
diagnostics: Diagnostic[],
traces: readonly TraceEvent[],
shrinkResults: Map<string, ShrinkResult>,
opts: JsonFormatOptions = {},
): JsonReport {
const summary = {
totalDiagnostics: diagnostics.length,
errorCount: diagnostics.filter((d) => d.severity === 'error').length,
warningCount: diagnostics.filter((d) => d.severity === 'warning').length,
infoCount: diagnostics.filter((d) => d.severity === 'info').length,
};
const jsonDiagnostics = diagnostics.map((d) => diagnosticToJson(d));
const report: JsonReport = {
version: 1,
summary,
diagnostics: jsonDiagnostics,
};
if (opts.includeTraces) {
report.traces = traces.slice();
}
if (opts.includeShrink && shrinkResults.size > 0) {
report.shrinkResults = Object.fromEntries(shrinkResults);
}
return report;
}
function diagnosticToJson(d: Diagnostic): JsonDiagnostic {
return {
diagnosticId: d.diagnosticId,
code: d.code,
severity: d.severity,
category: d.category,
message: d.message,
position: d.position,
source: d.source,
clauseId: d.clauseId,
sceneId: d.sceneId,
snapshotId: d.snapshotId,
envCaseId: d.envCaseId,
related: d.related,
traceRef: d.traceRef,
fixHints: d.fixHints,
metrics: d.metrics,
sourceRef: d.sourceRef,
suggestedFix: d.suggestedFix,
};
}
@@ -0,0 +1,247 @@
/**
* Tests for property-run diagnostics and render-input shrinking.
*
* Verifies that sampled and enumerated results produce rich diagnostics
* with replay metadata, and that shrinking works across render-input axes.
*/
import { describe, it } from 'node:test';
import assert from 'node:assert';
import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results';
import {
buildPropertyDiagnostics,
buildEnumeratedPropertyDiagnostics,
buildShrunkPropertyDiagnostics,
} from './property-diagnostics.js';
import { shrinkWitness, type Witness } from './shrink.js';
let _id = 0;
function fakeId() {
return `id_${++_id}`;
}
// ---------------------------------------------------------------------------
// Property Diagnostics Tests
// ---------------------------------------------------------------------------
describe('property diagnostics', () => {
it('builds pass diagnostic for sampled run', () => {
const result: SampledRunResult = {
mode: 'sampled',
seed: 42,
numRuns: 100,
passed: true,
diagnostics: [],
};
const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
assert.strictEqual(diagnostics.length, 1);
assert.strictEqual(diagnostics[0].code, 'IMH_PROPERTY_PASSED');
assert.strictEqual(diagnostics[0].seed, 42);
assert.strictEqual(diagnostics[0].runResult, 'pass');
});
it('builds failure diagnostic with counterexample', () => {
const result: SampledRunResult = {
mode: 'sampled',
seed: 123,
numRuns: 50,
passed: false,
counterexample: { size: 'sm', disabled: true },
shrunkCounterexample: { size: 'sm', disabled: true },
diagnostics: [],
};
const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
assert.strictEqual(diagnostics.length, 2); // primary + replay
const primary = diagnostics.find((d) => d.code === 'IMH_PROPERTY_FAILED');
assert.ok(primary);
assert.strictEqual(primary!.seed, 123);
assert.strictEqual(primary!.runResult, 'fail');
assert.deepStrictEqual(primary!.counterexample, { size: 'sm', disabled: true });
assert.ok(primary!.message.includes('seed 123'));
});
it('builds enumerated pass diagnostic', () => {
const result: EnumeratedRunResult = {
mode: 'enumerated-determinate',
totalCases: 6,
passed: true,
diagnostics: [],
};
const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
assert.strictEqual(diagnostics.length, 1);
assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_PASSED');
assert.strictEqual(diagnostics[0].runResult, 'pass');
});
it('builds enumerated failure diagnostic with failing case', () => {
const result: EnumeratedRunResult = {
mode: 'enumerated-determinate',
totalCases: 6,
passed: false,
failingCase: { size: 'lg' },
diagnostics: [],
};
const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
assert.strictEqual(diagnostics.length, 1);
assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_FAILED');
assert.strictEqual(diagnostics[0].runResult, 'fail');
assert.deepStrictEqual(diagnostics[0].inputSnapshot, { size: 'lg' });
assert.ok(diagnostics[0].message.includes('lg'));
});
it('builds shrunk diagnostic with shrunk input', () => {
const result: SampledRunResult = {
mode: 'sampled',
seed: 7,
numRuns: 100,
passed: false,
counterexample: { size: 'sm', disabled: true, label: 'hello world' },
diagnostics: [],
};
const shrunkInput = { size: 'sm', disabled: true };
const diagnostics = buildShrunkPropertyDiagnostics(result, shrunkInput, { idGen: fakeId });
const shrunkDiag = diagnostics.find((d) => d.code === 'IMH_PROPERTY_SHRUNK');
assert.ok(shrunkDiag);
assert.deepStrictEqual(shrunkDiag!.shrunkCounterexample, shrunkInput);
assert.ok(shrunkDiag!.message.includes('sm'));
});
});
// ---------------------------------------------------------------------------
// Render-Input Shrink Tests
// ---------------------------------------------------------------------------
describe('shrink across render inputs', () => {
it('shrinks render props when redundant', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
renderProps: { size: 'sm', disabled: true, variant: 'primary' },
};
// Only 'disabled' matters for failure
const stillFails = (w: Witness) =>
w.renderProps?.disabled === true;
const result = shrinkWitness(witness, stillFails);
assert.strictEqual(result.reduced, true);
assert.ok(result.axes.includes('prop'));
assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
});
it('shrinks render args independently', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
renderArgs: { label: 'ok', icon: 'check', tone: 'positive' },
};
const stillFails = (w: Witness) =>
w.renderArgs?.icon === 'check';
const result = shrinkWitness(witness, stillFails);
assert.strictEqual(result.reduced, true);
assert.ok(result.axes.includes('arg'));
assert.deepStrictEqual(result.witness.renderArgs, { icon: 'check' });
});
it('shrinks query params', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
queryParams: { theme: 'dark', debug: '1', version: '2' },
};
const stillFails = (w: Witness) =>
w.queryParams?.theme === 'dark';
const result = shrinkWitness(witness, stillFails);
assert.strictEqual(result.reduced, true);
assert.ok(result.axes.includes('query-param'));
assert.deepStrictEqual(result.witness.queryParams, { theme: 'dark' });
});
it('shrinks fixture inputs', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
fixtureInputs: { count: 5, layout: 'grid', animated: true },
};
const stillFails = (w: Witness) =>
w.fixtureInputs?.layout === 'grid';
const result = shrinkWitness(witness, stillFails);
assert.strictEqual(result.reduced, true);
assert.ok(result.axes.includes('fixture-input'));
assert.deepStrictEqual(result.witness.fixtureInputs, { layout: 'grid' });
});
it('does not shrink when all render props are required', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
renderProps: { a: 1, b: 2 },
};
// All axes are required: removing anything causes failure to disappear
const stillFails = (w: Witness) =>
w.envCases.length === 1 &&
w.snapshots.length === 1 &&
w.subjects.length === 1 &&
w.facts.length === 1 &&
Object.keys(w.renderProps ?? {}).length === 2;
const result = shrinkWitness(witness, stillFails);
assert.strictEqual(result.reduced, false);
assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
});
});
@@ -0,0 +1,151 @@
/**
* Property-run diagnostics builder.
*
* Converts sampled and enumerated run results into rich diagnostics
* with full replay metadata: seed, run index, input snapshot.
*
* These diagnostics are what Riley sees in the CI failure report.
* Every seed, every shrink step, every counterexample is inspectable.
*/
import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results'
import {
makePropertyDiagnostic,
type PropertyDiagnostic,
type PropertyDiagnosticBlueprint,
} from './diagnostics.js';
// ---------------------------------------------------------------------------
// Property Diagnostics Builder
// ---------------------------------------------------------------------------
export interface PropertyDiagnosticsOptions {
idGen(): string;
/** Optional formatter for input snapshots (e.g., JSON.stringify). */
formatInput?(input: unknown): string;
}
/**
* Build a full array of PropertyDiagnostics from a SampledRunResult.
*
* On failure, emits one primary diagnostic with the counterexample and
* one per-run diagnostic for each failing run so the trace is complete.
*/
export function buildPropertyDiagnostics(
result: SampledRunResult,
options: PropertyDiagnosticsOptions,
): PropertyDiagnostic[] {
const diagnostics: PropertyDiagnostic[] = [];
if (result.passed) {
diagnostics.push(makePropertyDiagnostic({
code: 'IMH_PROPERTY_PASSED',
category: 'contract-failure',
message: `Property passed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
seed: result.seed,
runResult: 'pass',
}, options));
return diagnostics;
}
// Primary failure diagnostic
const primary: PropertyDiagnosticBlueprint = {
code: 'IMH_PROPERTY_FAILED',
category: 'contract-failure',
message: `Property failed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
seed: result.seed,
runResult: 'fail',
counterexample: result.counterexample,
shrunkCounterexample: result.shrunkCounterexample,
};
if (result.counterexample !== undefined) {
const inputStr = options.formatInput
? options.formatInput(result.counterexample)
: JSON.stringify(result.counterexample);
primary.message += ` Counterexample: ${inputStr}`;
primary.inputSnapshot = result.counterexample;
}
diagnostics.push(makePropertyDiagnostic(primary, options));
// Replay metadata diagnostic (always emitted on failure)
diagnostics.push(makePropertyDiagnostic({
code: 'IMH_PROPERTY_REPLAY',
category: 'internal-error',
message: `Replay: seed=${result.seed}, numRuns=${result.numRuns}`,
seed: result.seed,
runResult: 'fail',
}, options));
return diagnostics;
}
/**
* Build diagnostics from an EnumeratedRunResult.
*
* On failure, reports the exact failing case index and value.
*/
export function buildEnumeratedPropertyDiagnostics(
result: EnumeratedRunResult,
options: PropertyDiagnosticsOptions,
): PropertyDiagnostic[] {
const diagnostics: PropertyDiagnostic[] = [];
if (result.passed) {
diagnostics.push(makePropertyDiagnostic({
code: 'IMH_ENUMERATED_PASSED',
category: 'contract-failure',
message: `Enumerated property passed all ${result.totalCases} cases.`,
runResult: 'pass',
}, options));
return diagnostics;
}
const failingCaseStr = result.failingCase !== undefined
? (options.formatInput ? options.formatInput(result.failingCase) : JSON.stringify(result.failingCase))
: 'unknown';
diagnostics.push(makePropertyDiagnostic({
code: 'IMH_ENUMERATED_FAILED',
category: 'contract-failure',
message: `Enumerated property failed at case ${failingCaseStr} out of ${result.totalCases}.`,
runResult: 'fail',
inputSnapshot: result.failingCase,
}, options));
return diagnostics;
}
// ---------------------------------------------------------------------------
// Shrink-Aware Diagnostics
// ---------------------------------------------------------------------------
/**
* Build diagnostics that include shrink results.
*
* This is the diagnostic layer Riley uses: it tells her not just that
* the property failed, but what the smallest still-failing input is.
*/
export function buildShrunkPropertyDiagnostics(
result: SampledRunResult,
shrunkInput: unknown,
options: PropertyDiagnosticsOptions,
): PropertyDiagnostic[] {
const base = buildPropertyDiagnostics(result, options);
const shrunkStr = options.formatInput
? options.formatInput(shrunkInput)
: JSON.stringify(shrunkInput);
const shrinkDiagnostic = makePropertyDiagnostic({
code: 'IMH_PROPERTY_SHRUNK',
category: 'contract-failure',
message: `Shrunk counterexample: ${shrunkStr}`,
seed: result.seed,
runResult: 'fail',
shrunkCounterexample: shrunkInput,
}, options);
return [...base, shrinkDiagnostic];
}
@@ -0,0 +1,148 @@
/**
* Tests for executable replay.
*
* Verifies that replay metadata can be fed back into a replay executor
* and reproduce the same failure deterministically.
*/
import { describe, it } from 'node:test';
import assert from 'node:assert';
import {
replay,
buildReplayMetadata,
type ReplayMetadata,
type ReplayExecutor,
type RunResult,
} from './replay.js';
import { makeDiagnostic, type Diagnostic, type ProofLike } from './diagnostics.js';
let _id = 0;
function fakeId() {
return `id_${++_id}`;
}
describe('replay', () => {
it('replays a failing run and produces the same failure', async () => {
const metadata: ReplayMetadata = {
version: 1,
seed: 42,
numRuns: 100,
sceneTarget: { kind: 'fixture', fixtureId: 'test.html' },
inputDomain: { mode: 'enumerated', values: [] },
counterexample: { size: 'sm' },
timestamp: new Date().toISOString(),
};
const mockProof: ProofLike = {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
failedPredicate: { op: '>=', left: 10, right: 24 },
witness: { subjectId: 0, referenceId: 1 },
};
const mockResult = { status: 'fail', clauseId: 'c1' };
const executor: ReplayExecutor = {
buildWorld: async () => ({ sceneId: 'scene' }),
buildClauses: () => [{ clauseId: 'c1' }],
evaluate: () => ({
results: [mockResult],
proofs: [mockProof],
}),
isFailure: (r) => (r as any).status === 'fail',
toDiagnostic: (_r, proof) =>
makeDiagnostic(
{
code: 'IMH_REPLAY_FAIL',
category: 'contract-failure',
message: `Replayed failure for clause ${proof.clauseId}`,
},
{ idGen: fakeId },
),
};
const result: RunResult = await replay(metadata, executor);
assert.strictEqual(result.passed, false);
assert.strictEqual(result.seed, 42);
assert.strictEqual(result.diagnostics.length, 1);
assert.strictEqual(result.proofs.length, 1);
assert.strictEqual(result.proofs[0].outcome, 'fail');
assert.ok(
result.diagnostics[0].message.includes('Replayed failure'),
'diagnostic should mention replayed failure',
);
});
it('replays a passing run and reports pass', async () => {
const metadata: ReplayMetadata = {
version: 1,
seed: 7,
numRuns: 50,
sceneTarget: { kind: 'fixture', fixtureId: 'pass.html' },
inputDomain: { mode: 'enumerated', values: [] },
timestamp: new Date().toISOString(),
};
const mockProof: ProofLike = {
proofId: 'p2',
clauseId: 'c2',
outcome: 'pass',
truth: 'determinate',
};
const executor: ReplayExecutor = {
buildWorld: async () => ({ sceneId: 'scene' }),
buildClauses: () => [{ clauseId: 'c2' }],
evaluate: () => ({
results: [{ status: 'pass', clauseId: 'c2' }],
proofs: [mockProof],
}),
isFailure: (r) => (r as any).status === 'fail',
toDiagnostic: (_r, proof) =>
makeDiagnostic(
{
code: 'IMH_REPLAY_FAIL',
category: 'contract-failure',
message: `Replayed failure for clause ${proof.clauseId}`,
},
{ idGen: fakeId },
),
};
const result = await replay(metadata, executor);
assert.strictEqual(result.passed, true);
assert.strictEqual(result.diagnostics.length, 0);
assert.strictEqual(result.proofs[0].outcome, 'pass');
});
it('uses the same seed for deterministic reconstruction', async () => {
const metadata: ReplayMetadata = {
version: 1,
seed: 99,
numRuns: 10,
sceneTarget: { kind: 'fixture', fixtureId: 'seed.html' },
inputDomain: { mode: 'generated', arbitrary: null, seed: 99, numRuns: 10 },
counterexample: { label: 'x' },
timestamp: new Date().toISOString(),
};
const executor: ReplayExecutor = {
buildWorld: async () => ({}),
buildClauses: () => [],
evaluate: () => ({ results: [], proofs: [] }),
isFailure: () => false,
toDiagnostic: () =>
makeDiagnostic(
{ code: 'IMH_TEST', category: 'internal-error', message: 'test' },
{ idGen: fakeId },
),
};
const result = await replay(metadata, executor);
assert.strictEqual(result.seed, 99);
});
});
+218
View File
@@ -0,0 +1,218 @@
/**
* Replay metadata builder for sampled property runs.
*
* Given a SampledRunResult, produces a replay script or metadata object
* that can reproduce the exact run. Riley needs this for 2 AM debugging:
* she passes the seed to the runner and watches the identical failure.
*
* The replay payload includes everything needed to reconstruct the run:
* seed, numRuns, renderer, component, input domain.
*/
import type { SceneTarget, RenderCase } from 'imhotep-core/scene-target'
import type { InputDomain } from 'imhotep-core/property-contracts'
import type { SampledRunResult } from 'imhotep-core/property-results'
import { setDefaultContext, createDeterministicContext } from 'imhotep-core'
// ---------------------------------------------------------------------------
// Replay Metadata
// ---------------------------------------------------------------------------
export interface ReplayMetadata {
version: 1;
seed: number;
numRuns: number;
sceneTarget: SceneTarget;
inputDomain: InputDomain;
/** The counterexample input that failed, if any. */
counterexample?: unknown;
/** The shrunk counterexample, if shrinking was performed. */
shrunkCounterexample?: unknown;
/** Optional renderer adapter identifier for reconstruction. */
rendererAdapterId?: string;
/** ISO timestamp of when the original run occurred. */
timestamp: string;
}
/**
* Build replay metadata from a sampled run result.
*
* This is the deterministic receipt Riley uses: seed 42, run 7,
* counterexample { size: 'sm', disabled: true }.
*/
export function buildReplayMetadata(
result: SampledRunResult,
sceneTarget: SceneTarget,
inputDomain: InputDomain,
options?: {
rendererAdapterId?: string;
timestamp?: string;
},
): ReplayMetadata {
return {
version: 1,
seed: result.seed,
numRuns: result.numRuns,
sceneTarget,
inputDomain,
counterexample: result.counterexample,
shrunkCounterexample: result.shrunkCounterexample,
rendererAdapterId: options?.rendererAdapterId,
timestamp: options?.timestamp ?? new Date().toISOString(),
};
}
// ---------------------------------------------------------------------------
// Replay Script
// ---------------------------------------------------------------------------
export interface ReplayScript {
/** Human-readable description of how to replay. */
description: string;
/** Programmatic replay command (e.g., a shell command or JS snippet). */
command: string;
/** The metadata object for machine consumption. */
metadata: ReplayMetadata;
}
/**
* Build a replay script from metadata.
*
* Returns both a human-readable description and a programmatic command.
*/
export function buildReplayScript(
metadata: ReplayMetadata,
options?: {
runnerCommand?: string;
formatInput?(input: unknown): string;
},
): ReplayScript {
const inputStr = options?.formatInput
? options.formatInput(metadata.counterexample)
: metadata.counterexample !== undefined
? JSON.stringify(metadata.counterexample)
: 'unknown';
const description = [
`Replay sampled property run:`,
` seed: ${metadata.seed}`,
` numRuns: ${metadata.numRuns}`,
` counterexample: ${inputStr}`,
` timestamp: ${metadata.timestamp}`,
].join('\n');
const command = `${options?.runnerCommand ?? 'imhotep replay'} --seed ${metadata.seed} --num-runs ${metadata.numRuns}`;
return {
description,
command,
metadata,
};
}
// ---------------------------------------------------------------------------
// Replay from Result (Convenience)
// ---------------------------------------------------------------------------
/**
* Build a full replay script directly from a SampledRunResult.
*/
export function buildReplayFromResult(
result: SampledRunResult,
sceneTarget: SceneTarget,
inputDomain: InputDomain,
options?: {
rendererAdapterId?: string;
runnerCommand?: string;
formatInput?(input: unknown): string;
},
): ReplayScript {
const metadata = buildReplayMetadata(result, sceneTarget, inputDomain, {
rendererAdapterId: options?.rendererAdapterId,
});
return buildReplayScript(metadata, options);
}
// ---------------------------------------------------------------------------
// Executable Replay
// ---------------------------------------------------------------------------
import type { Diagnostic } from './diagnostics.js';
import type { ProofLike } from './diagnostics.js';
/**
* Result of executing a replay.
*/
export interface RunResult {
/** Whether the replayed run passed (no failures). */
passed: boolean;
/** Diagnostics emitted during the replay. */
diagnostics: Diagnostic[];
/** Proofs generated during the replay. */
proofs: ProofLike[];
/** Seed used for the replay. */
seed: number;
}
/**
* Executor interface injected by the caller.
*
* The reporter does not know how to mount scenes or evaluate clauses;
* it delegates those operations to the injected executor. This keeps
* the reporter backend-agnostic and testable.
*/
export interface ReplayExecutor {
/** Mount the scene described by metadata and extract a geometry world. */
buildWorld(metadata: ReplayMetadata): Promise<unknown>;
/** Build clause descriptors from the metadata. */
buildClauses(metadata: ReplayMetadata): unknown[];
/** Evaluate clauses against the world. */
evaluate(world: unknown, clauses: unknown[]): { results: unknown[]; proofs: ProofLike[] };
/** Check whether an evaluation result represents a failure. */
isFailure(result: unknown): boolean;
/** Convert a failing result + proof into a diagnostic. */
toDiagnostic(result: unknown, proof: ProofLike): Diagnostic;
}
/**
* Replay a failing property run from its metadata.
*
* Reconstructs the scene, rebuilds clauses, and re-evaluates using the
* same seed so Riley gets deterministic reproduction.
*
* @param metadata - The replay metadata captured from the original run.
* @param executor - Injected executor that knows how to mount and evaluate.
*/
export async function replay(
metadata: ReplayMetadata,
executor: ReplayExecutor,
): Promise<RunResult> {
// Ensure deterministic context during replay so ids, clocks, and rng
// match the original run exactly.
setDefaultContext(createDeterministicContext(metadata.seed));
const world = await executor.buildWorld(metadata);
const clauses = executor.buildClauses(metadata);
const evaluation = executor.evaluate(world, clauses);
const diagnostics: Diagnostic[] = [];
let hasFailure = false;
for (let i = 0; i < evaluation.results.length; i++) {
const result = evaluation.results[i];
const proof = evaluation.proofs[i];
if (executor.isFailure(result)) {
hasFailure = true;
if (proof) {
diagnostics.push(executor.toDiagnostic(result, proof));
}
}
}
return {
passed: !hasFailure,
diagnostics,
proofs: evaluation.proofs,
seed: metadata.seed,
};
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,177 @@
/**
* Tests for oracle-preserving witness shrinking.
*
* Verifies that shrinkWitness only accepts a reduced witness if the
* injected oracle still reports failure. If the oracle returns false
* (meaning the shrunk witness no longer reproduces the bug), the
* shrink must be rejected and the original witness returned unchanged.
*/
import { describe, it } from 'node:test';
import assert from 'node:assert';
import { shrinkWitness, type Witness, type ShrinkResult } from './shrink.js';
describe('shrink oracle preservation', () => {
it('accepts a shrunk witness when the oracle still fails', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1', 'env_2', 'env_3'],
snapshots: ['default'],
subjects: [1, 2, 3],
facts: [100, 101, 102],
};
// The failure reproduces as long as env_2 and subject 2 are present.
const oracle = (w: Witness) =>
w.envCases.includes('env_2') && w.subjects.includes(2);
const result: ShrinkResult = shrinkWitness(witness, oracle);
assert.strictEqual(result.reduced, true);
assert.deepStrictEqual(result.witness.envCases, ['env_2']);
assert.deepStrictEqual(result.witness.subjects, [2]);
assert.ok(result.axes.includes('env-case'));
assert.ok(result.axes.includes('subject'));
// Oracle must still fail on the final witness
assert.strictEqual(
oracle(result.witness),
true,
'final shrunk witness must still fail the oracle',
);
});
it('rejects the shrink when the oracle no longer fails', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1', 'env_2'],
snapshots: ['default'],
subjects: [1],
facts: [100],
};
// The failure ONLY reproduces with the exact original witness.
// Removing any axis item makes the failure disappear.
const oracle = (w: Witness) =>
w.envCases.length === 2 &&
w.envCases.includes('env_1') &&
w.snapshots.length === 1 &&
w.subjects.length === 1 &&
w.facts.length === 1;
const result: ShrinkResult = shrinkWitness(witness, oracle);
// Because every item is required, nothing can be removed.
// The validation step ensures the final witness still fails,
// and since the shrinker cannot remove anything without breaking
// the oracle, reduced must be false.
assert.strictEqual(result.reduced, false);
assert.deepStrictEqual(result.witness.envCases, ['env_1', 'env_2']);
assert.deepStrictEqual(result.witness.snapshots, ['default']);
assert.deepStrictEqual(result.witness.subjects, [1]);
assert.deepStrictEqual(result.witness.facts, [100]);
assert.deepStrictEqual(result.axes, []);
assert.strictEqual(result.steps, 0);
});
it('preserves failure across render-prop shrinking', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
renderProps: { size: 'lg', disabled: true, variant: 'primary' },
};
// Only the 'disabled' prop matters for the failure.
const oracle = (w: Witness) => w.renderProps?.disabled === true;
const result: ShrinkResult = shrinkWitness(witness, oracle);
assert.strictEqual(result.reduced, true);
assert.ok(result.axes.includes('prop'));
assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
assert.strictEqual(oracle(result.witness), true);
});
it('rejects render-prop shrink if oracle returns false', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['env_1'],
snapshots: ['default'],
subjects: [1],
facts: [100],
renderProps: { a: 1, b: 2 },
};
// Both props are required; removing either breaks reproduction.
// All other axes are also required.
const oracle = (w: Witness) =>
Object.keys(w.renderProps ?? {}).length === 2 &&
w.envCases.length === 1 &&
w.snapshots.length === 1 &&
w.subjects.length === 1 &&
w.facts.length === 1;
const result: ShrinkResult = shrinkWitness(witness, oracle);
assert.strictEqual(result.reduced, false);
assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
assert.strictEqual(oracle(result.witness), true);
});
it('original failure → shrink → smaller witness → prove still fails', () => {
const witness: Witness = {
proof: {
proofId: 'p1',
clauseId: 'c1',
outcome: 'fail',
truth: 'determinate',
},
envCases: ['desktop', 'tablet', 'mobile'],
snapshots: ['default', 'hover', 'focus'],
subjects: [10, 20, 30, 40],
facts: [1, 2, 3, 4, 5],
};
// The bug reproduces on desktop with subject 20 and fact 3.
const oracle = (w: Witness) =>
w.envCases.includes('desktop') &&
w.subjects.includes(20) &&
w.facts.includes(3);
const result: ShrinkResult = shrinkWitness(witness, oracle);
assert.strictEqual(result.reduced, true);
assert.deepStrictEqual(result.witness.envCases, ['desktop']);
assert.deepStrictEqual(result.witness.subjects, [20]);
assert.deepStrictEqual(result.witness.facts, [3]);
assert.strictEqual(
oracle(result.witness),
true,
'shrunk witness must still reproduce the original failure',
);
assert.ok(result.steps > 0);
});
});
+273
View File
@@ -0,0 +1,273 @@
/**
* Witness shrinking logic.
*
* Shrinking minimizes failing evidence across multiple axes:
* - environment case
* - state snapshot
* - timeline sample set
* - subject subset
* - contributing facts
* - clause group context
*
* Goal: produce the smallest still-failing witness that preserves
* explanatory value. This is a diagnostic minimizer, not merely
* a test minimizer.
*/
import type { ProofLike } from './diagnostics.js';
/**
* A shrinkable witness bundles the proof with the full context
* needed to attempt reduction.
*
* V1.1 extension: render inputs (props, args, query params, fixture inputs)
* are now shrinkable axes so Riley can isolate bugs to the smallest
* still-failing render configuration.
*/
export interface Witness {
proof: ProofLike;
envCases: string[];
snapshots: string[];
subjects: number[];
facts: number[];
/** Render-input axes for property-run shrinking (V1.1). */
renderProps?: Record<string, unknown>;
renderArgs?: Record<string, unknown>;
queryParams?: Record<string, unknown>;
fixtureInputs?: Record<string, unknown>;
}
/**
* Result of a shrink attempt.
*/
export interface ShrinkResult {
// true if the witness was reduced at all
reduced: boolean;
// the minimized witness
witness: Witness;
// which axes were shrunk
axes: ShrinkAxis[];
// how many reduction steps were applied
steps: number;
}
export type ShrinkAxis =
| 'env-case'
| 'snapshot'
| 'subject'
| 'fact'
| 'timeline'
| 'group-context'
| 'prop'
| 'arg'
| 'query-param'
| 'fixture-input';
/**
* Predicate injected by the caller.
* Returns true if the reduced witness still reproduces the failure.
*/
export type StillFails = (w: Witness) => boolean;
/**
* Shrink a witness by trying to drop one element at a time from
* each axis. Uses a naive delta-debugging style: try to remove each
* item individually; if the failure still reproduces, keep it out.
*/
export function shrinkWitness(
witness: Witness,
stillFails: StillFails,
): ShrinkResult {
const current = cloneWitness(witness);
let reduced = false;
const axes: ShrinkAxis[] = [];
let steps = 0;
// Shrink env cases
const envResult = shrinkArray(current.envCases, (arr) => {
const candidate = cloneWitness(current);
candidate.envCases = arr;
return stillFails(candidate);
});
if (envResult.reduced) {
current.envCases = envResult.value;
reduced = true;
axes.push('env-case');
steps += envResult.steps;
}
// Shrink snapshots
const snapResult = shrinkArray(current.snapshots, (arr) => {
const candidate = cloneWitness(current);
candidate.snapshots = arr;
return stillFails(candidate);
});
if (snapResult.reduced) {
current.snapshots = snapResult.value;
reduced = true;
axes.push('snapshot');
steps += snapResult.steps;
}
// Shrink subjects
const subjResult = shrinkArray(current.subjects, (arr) => {
const candidate = cloneWitness(current);
candidate.subjects = arr;
return stillFails(candidate);
});
if (subjResult.reduced) {
current.subjects = subjResult.value;
reduced = true;
axes.push('subject');
steps += subjResult.steps;
}
// Shrink facts
const factResult = shrinkArray(current.facts, (arr) => {
const candidate = cloneWitness(current);
candidate.facts = arr;
return stillFails(candidate);
});
if (factResult.reduced) {
current.facts = factResult.value;
reduced = true;
axes.push('fact');
steps += factResult.steps;
}
// Shrink render props (V1.1)
if (current.renderProps) {
const propResult = shrinkObject(current.renderProps, (obj) => {
const candidate = cloneWitness(current);
candidate.renderProps = obj;
return stillFails(candidate);
});
if (propResult.reduced) {
current.renderProps = propResult.value;
reduced = true;
axes.push('prop');
steps += propResult.steps;
}
}
// Shrink render args (V1.1)
if (current.renderArgs) {
const argResult = shrinkObject(current.renderArgs, (obj) => {
const candidate = cloneWitness(current);
candidate.renderArgs = obj;
return stillFails(candidate);
});
if (argResult.reduced) {
current.renderArgs = argResult.value;
reduced = true;
axes.push('arg');
steps += argResult.steps;
}
}
// Shrink query params (V1.1)
if (current.queryParams) {
const qpResult = shrinkObject(current.queryParams, (obj) => {
const candidate = cloneWitness(current);
candidate.queryParams = obj;
return stillFails(candidate);
});
if (qpResult.reduced) {
current.queryParams = qpResult.value;
reduced = true;
axes.push('query-param');
steps += qpResult.steps;
}
}
// Shrink fixture inputs (V1.1)
if (current.fixtureInputs) {
const fiResult = shrinkObject(current.fixtureInputs, (obj) => {
const candidate = cloneWitness(current);
candidate.fixtureInputs = obj;
return stillFails(candidate);
});
if (fiResult.reduced) {
current.fixtureInputs = fiResult.value;
reduced = true;
axes.push('fixture-input');
steps += fiResult.steps;
}
}
// Oracle validation: the shrunk witness must still reproduce the failure.
// If the oracle says it does not fail, discard the shrink and return original.
if (reduced && !stillFails(current)) {
return { reduced: false, witness: cloneWitness(witness), axes: [], steps: 0 };
}
return { reduced, witness: current, axes, steps };
}
/**
* Try to remove each element from arr one by one.
* If the predicate still holds with the element removed, drop it.
*/
function shrinkArray<T>(
arr: T[],
predicate: (reduced: T[]) => boolean,
): { reduced: boolean; value: T[]; steps: number } {
let current = arr.slice();
let changed = false;
let steps = 0;
for (let i = current.length - 1; i >= 0; i--) {
const candidate = current.slice(0, i).concat(current.slice(i + 1));
steps++;
if (predicate(candidate)) {
current = candidate;
changed = true;
// continue checking from the same index because items shifted left
i = Math.min(i, current.length);
}
}
return { reduced: changed, value: current, steps };
}
/**
* Try to remove each key from an object one by one.
* If the predicate still holds with the key removed, drop it.
*/
function shrinkObject<T extends Record<string, unknown>>(
obj: T,
predicate: (reduced: T) => boolean,
): { reduced: boolean; value: T; steps: number } {
let current = { ...obj } as T;
let changed = false;
let steps = 0;
const keys = Object.keys(current);
for (let i = keys.length - 1; i >= 0; i--) {
const key = keys[i];
const candidate = { ...current } as T;
delete (candidate as Record<string, unknown>)[key];
steps++;
if (predicate(candidate)) {
current = candidate;
changed = true;
i = Math.min(i, Object.keys(current).length);
}
}
return { reduced: changed, value: current, steps };
}
function cloneWitness(w: Witness): Witness {
return {
proof: w.proof,
envCases: w.envCases.slice(),
snapshots: w.snapshots.slice(),
subjects: w.subjects.slice(),
facts: w.facts.slice(),
renderProps: w.renderProps ? { ...w.renderProps } : undefined,
renderArgs: w.renderArgs ? { ...w.renderArgs } : undefined,
queryParams: w.queryParams ? { ...w.queryParams } : undefined,
fixtureInputs: w.fixtureInputs ? { ...w.fixtureInputs } : undefined,
};
}
@@ -0,0 +1,120 @@
/**
* Usage error suggestion engine.
*
* When a system-use error occurs (parse, validation, resolution,
* extraction), this module produces actionable suggestions based on
* the diagnostic code and category.
*/
import type { Diagnostic, DiagnosticCategory } from './diagnostics.js';
import {
IMH_SELECTOR_ZERO_MATCHES,
IMH_FRAME_AMBIGUOUS,
IMH_VALID_INVALID_UNIT,
IMH_VALID_ILLEGAL_RELATION_OPTION,
IMH_EXTRACT_PARTIAL,
IMH_INDETERMINATE_MISSING_FACT,
} from './codes.js';
/**
* A suggestion carries a message and an optional example snippet.
*/
export interface Suggestion {
message: string;
example?: string;
}
/**
* Registry of code patterns to suggestion factories.
* Injected so tests can substitute or extend suggestions.
*/
export interface SuggestionRegistry {
lookup(code: string, category: DiagnosticCategory): Suggestion[];
}
/**
* Default suggestion knowledge base.
*/
export function createDefaultSuggestionRegistry(): SuggestionRegistry {
const map = new Map<string, Suggestion[]>();
// Resolution errors
map.set(IMH_SELECTOR_ZERO_MATCHES, [
{
message: 'The selector matched no elements. Verify the selector is correct and the element is present in the DOM.',
example: "await expect('.buy-button').to.be.visible()",
},
{
message: 'If the element is rendered conditionally, add a wait or guard.',
},
]);
map.set(IMH_FRAME_AMBIGUOUS, [
{
message: 'Narrow the subject selector so it matches a single element.',
},
{
message: 'Use expectAll(...) if multiple subjects are intended.',
},
]);
// Validation errors
map.set(IMH_VALID_INVALID_UNIT, [
{
message: 'Use a supported unit: px, rem, em, %, vh, vw, or jnd.',
example: "{ minGap: 16, unit: 'px' }",
},
]);
map.set(IMH_VALID_ILLEGAL_RELATION_OPTION, [
{
message: 'Check the allowed options for this relation in the documentation.',
},
]);
// Extraction errors
map.set(IMH_EXTRACT_PARTIAL, [
{
message: 'Some facts were unavailable. Check that the page is fully loaded.',
},
{
message: 'If the fact is unsupported for this element type, simplify the assertion.',
},
]);
// Indeterminate results
map.set(IMH_INDETERMINATE_MISSING_FACT, [
{
message: 'A required fact was missing. Check extractor output for warnings.',
},
]);
return {
lookup(code, _category) {
return map.get(code) ?? [];
},
};
}
/**
* Attach suggestions to a diagnostic by looking up its code.
* Returns a new diagnostic with fixHints extended.
*/
export function attachSuggestions(
diagnostic: Diagnostic,
registry: SuggestionRegistry,
): Diagnostic {
const suggestions = registry.lookup(diagnostic.code, diagnostic.category);
if (suggestions.length === 0) {
return diagnostic;
}
const newHints = suggestions.map((s) => {
return s.example ? `${s.message} Example: ${s.example}` : s.message;
});
return {
...diagnostic,
fixHints: [...diagnostic.fixHints, ...newHints],
};
}
+115
View File
@@ -0,0 +1,115 @@
/**
* Trace event model for Imhotep.
*
* Every evaluation produces a chain of trace events that link
* source spans → AST → IR → execution → proof → diagnostic.
*
* Trace events are cheap to keep in compact form and expand on demand.
*/
/**
* Well-known phases in the evaluation pipeline.
*/
export type TracePhase =
| 'parse-started'
| 'ast-created'
| 'ir-normalized'
| 'fact-requirements-computed'
| 'extraction-started'
| 'extraction-step-completed'
| 'world-normalized'
| 'clause-evaluated'
| 'proof-created'
| 'witness-shrunk'
| 'diagnostic-emitted';
/**
* Cross-references that tie a trace event to other entities.
*/
export interface TraceRefs {
clauseId?: string;
proofId?: string;
snapshotId?: string;
diagnosticId?: string;
astNodeId?: string;
envCaseId?: string;
}
/**
* A single trace event.
*/
export interface TraceEvent {
traceEventId: string;
phase: TracePhase;
at: number; // epoch ms
refs: TraceRefs;
payload?: Record<string, unknown>;
}
/**
* A builder that accumulates trace events during evaluation.
* Injected into each pipeline stage so stages stay pure.
*/
export interface TraceBuilder {
emit(event: Omit<TraceEvent, 'traceEventId' | 'at'>): TraceEvent;
events(): readonly TraceEvent[];
}
/**
* Factory for creating a concrete TraceBuilder.
* Uses dependency injection so callers can supply id generation and timing.
*/
export function createTraceBuilder(deps: {
idGen(): string;
now(): number;
}): TraceBuilder {
const buffer: TraceEvent[] = [];
return {
emit(event) {
const full: TraceEvent = {
traceEventId: deps.idGen(),
phase: event.phase,
at: deps.now(),
refs: event.refs,
payload: event.payload,
};
buffer.push(full);
return full;
},
events() {
return buffer;
},
};
}
/**
* Convenience: find the first trace event for a given clause.
*/
export function findClauseTraces(
events: readonly TraceEvent[],
clauseId: string,
): TraceEvent[] {
return events.filter(
(e) => e.refs.clauseId === clauseId,
);
}
/**
* Convenience: reconstruct the evaluation chain for a proof.
*/
export function traceChainForProof(
events: readonly TraceEvent[],
proofId: string,
): TraceEvent[] {
return events.filter(
(e) =>
e.refs.proofId === proofId ||
(e.phase === 'clause-evaluated' &&
events.some(
(later) =>
later.traceEventId === e.traceEventId &&
later.refs.proofId === proofId,
)),
);
}
+13
View File
@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"paths": {},
"composite": false,
"noEmitOnError": false
},
"include": [
"src/**/*"
]
}