v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)
This commit is contained in:
@@ -0,0 +1,335 @@
|
||||
/**
|
||||
* Canonical failure formatter.
|
||||
*
|
||||
* Transforms raw ImhotepResult diagnostics into narrative reports with
|
||||
* five sections per failing clause:
|
||||
* - Expected: human-readable assertion intent
|
||||
* - Observed: measured metrics that caused failure
|
||||
* - Why this matters: one-sentence semantic impact
|
||||
* - Replay: reproduction command or seed/case info
|
||||
* - Next: concrete next steps to investigate/fix
|
||||
*
|
||||
* Supports both human-readable text and structured JSON output.
|
||||
*/
|
||||
|
||||
import type { ImhotepResult, ClauseResult, ClauseStatus } from 'imhotep-core';
|
||||
import type { Diagnostic } from './diagnostics.js';
|
||||
|
||||
/**
|
||||
* A single canonical failure entry with all five required sections.
|
||||
*/
|
||||
export interface CanonicalFailureEntry {
|
||||
/** Human-readable assertion intent (from clauseLabel). */
|
||||
expected: string;
|
||||
/** Measured metrics that caused the failure. */
|
||||
observed: string;
|
||||
/** One-sentence explanation of the semantic impact. */
|
||||
why: string;
|
||||
/** Reproduction command or seed/case info. */
|
||||
replay: string;
|
||||
/** Concrete next steps to investigate or fix. */
|
||||
next: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* The complete canonical report, either as structured data or rendered string.
|
||||
*/
|
||||
export interface CanonicalReport {
|
||||
/** Overall pass/fail status. */
|
||||
passed: boolean;
|
||||
/** Number of failing clauses. */
|
||||
failureCount: number;
|
||||
/** Individual failure entries. */
|
||||
failures: CanonicalFailureEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Options controlling canonical formatter output.
|
||||
*/
|
||||
export interface CanonicalFormatOptions {
|
||||
/** Output format: 'text' for terminal, 'json' for machine. */
|
||||
format?: 'text' | 'json';
|
||||
/** Max line width for text wrapping. */
|
||||
maxWidth?: number;
|
||||
/** Include seed in replay section when available. */
|
||||
seed?: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Failure Type Detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function detectFailureType(clause: ClauseResult, diagnostic: Diagnostic | undefined): 'relation' | 'size' | 'cardinality' | 'parse' | 'selector' | 'unknown' {
|
||||
const code = diagnostic?.code ?? '';
|
||||
if (code.startsWith('IMH_RELATION_') || code === 'IMH_ALIGNMENT_FAILED' || code === 'IMH_PREDICATE_FAILED') {
|
||||
return 'relation';
|
||||
}
|
||||
if (code.startsWith('IMH_SIZE_')) {
|
||||
return 'size';
|
||||
}
|
||||
if (code.startsWith('IMH_CARDINALITY_')) {
|
||||
return 'cardinality';
|
||||
}
|
||||
if (code.startsWith('IMH_PARSE_')) {
|
||||
return 'parse';
|
||||
}
|
||||
if (code === 'IMH_SELECTOR_ZERO_MATCHES') {
|
||||
return 'selector';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Section Builders
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildExpected(clause: ClauseResult, _diagnostic: Diagnostic | undefined, _type: string): string {
|
||||
return clause.clauseLabel ?? `Clause ${clause.clauseId}`;
|
||||
}
|
||||
|
||||
function buildObserved(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
|
||||
const metrics = clause.metrics ?? {};
|
||||
const diagMetrics = diagnostic?.metrics ?? {};
|
||||
const allMetrics = { ...diagMetrics, ...metrics };
|
||||
|
||||
const parts: string[] = [];
|
||||
|
||||
if (type === 'relation') {
|
||||
const gap = allMetrics.observedGap ?? allMetrics.gap;
|
||||
const minGap = allMetrics.minGap;
|
||||
if (gap !== undefined) parts.push(`measured gap is ${gap}px`);
|
||||
if (minGap !== undefined) parts.push(`minimum required gap is ${minGap}px`);
|
||||
} else if (type === 'size') {
|
||||
const observed = allMetrics.observedWidth ?? allMetrics.observedHeight ?? allMetrics.observedSize;
|
||||
const expected = allMetrics.minWidth ?? allMetrics.minHeight ?? allMetrics.minSize ?? allMetrics.expected;
|
||||
const prop = allMetrics.observedWidth !== undefined ? 'width' : allMetrics.observedHeight !== undefined ? 'height' : 'size';
|
||||
if (observed !== undefined) parts.push(`${prop} is ${observed}px`);
|
||||
if (expected !== undefined) parts.push(`expected ${prop} is ${expected}px`);
|
||||
} else if (type === 'cardinality') {
|
||||
const observed = allMetrics.observedCount;
|
||||
const expected = allMetrics.expectedCount;
|
||||
if (observed !== undefined) parts.push(`found ${observed} element(s)`);
|
||||
if (expected !== undefined) parts.push(`expected ${expected} element(s)`);
|
||||
} else if (type === 'parse') {
|
||||
parts.push(diagnostic?.message ?? 'parse error occurred');
|
||||
} else if (type === 'selector') {
|
||||
parts.push(`selector resolved to 0 elements`);
|
||||
} else {
|
||||
if (Object.keys(allMetrics).length > 0) {
|
||||
parts.push(
|
||||
Object.entries(allMetrics)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join(', '),
|
||||
);
|
||||
}
|
||||
if (parts.length === 0) {
|
||||
parts.push(diagnostic?.message ?? 'failure details unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join('; ');
|
||||
}
|
||||
|
||||
function buildWhy(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
|
||||
const selector = diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
|
||||
|
||||
switch (type) {
|
||||
case 'relation':
|
||||
return `The spatial relationship between elements is violated, breaking layout expectations for "${selector}".`;
|
||||
case 'size':
|
||||
return `The element "${selector}" does not meet size constraints, which may cause overflow or clipping.`;
|
||||
case 'cardinality':
|
||||
return `The expected number of elements for "${selector}" was not found, indicating a missing or duplicate component.`;
|
||||
case 'parse':
|
||||
return `The assertion could not be parsed, so Imhotep cannot evaluate the intended contract.`;
|
||||
case 'selector':
|
||||
return `The selector "${selector}" matched nothing, so the assertion has no subject to evaluate.`;
|
||||
default:
|
||||
return `The assertion failed, indicating a contract violation or extraction problem.`;
|
||||
}
|
||||
}
|
||||
|
||||
function buildReplay(clause: ClauseResult, _diagnostic: Diagnostic | undefined, type: string, seed?: number): string {
|
||||
const parts: string[] = [];
|
||||
if (seed !== undefined) {
|
||||
parts.push(`seed: ${seed}`);
|
||||
}
|
||||
parts.push(`clause: ${clause.clauseId}`);
|
||||
if (_diagnostic?.sourceRef?.line !== undefined) {
|
||||
parts.push(`line: ${_diagnostic.sourceRef.line}`);
|
||||
}
|
||||
if (_diagnostic?.sourceRef?.column !== undefined) {
|
||||
parts.push(`column: ${_diagnostic.sourceRef.column}`);
|
||||
}
|
||||
if (type === 'selector' || type === 'cardinality') {
|
||||
const selector = _diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
|
||||
parts.push(`selector: "${selector}"`);
|
||||
}
|
||||
return parts.join(', ');
|
||||
}
|
||||
|
||||
function buildNext(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
|
||||
const hints = diagnostic?.fixHints ?? [];
|
||||
if (hints.length > 0) {
|
||||
return hints.join(' ');
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case 'relation':
|
||||
return `Inspect the layout in the browser devtools and adjust element positions or gap thresholds.`;
|
||||
case 'size':
|
||||
return `Check the element dimensions with ui.extract() and adjust the expected size or CSS.`;
|
||||
case 'cardinality':
|
||||
return `Verify the selector matches the intended elements; use ui.extract() to debug.`;
|
||||
case 'parse':
|
||||
return `Fix the assertion syntax: use single-quoted selectors and valid relation keywords.`;
|
||||
case 'selector':
|
||||
return `Verify the selector is correct and the element exists in the DOM at evaluation time.`;
|
||||
default:
|
||||
return `Review the diagnostic message and metrics to determine the root cause.`;
|
||||
}
|
||||
}
|
||||
|
||||
function extractSelectorFromLabel(label: string | undefined): string {
|
||||
if (!label) return 'unknown';
|
||||
const match = label.match(/'([^']+)'/);
|
||||
return match?.[1] ?? 'unknown';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Report Assembly
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildCanonicalReport(result: ImhotepResult, options?: CanonicalFormatOptions): CanonicalReport {
|
||||
const failures: CanonicalFailureEntry[] = [];
|
||||
|
||||
for (const clause of result.clauseResults) {
|
||||
if (clause.status !== 'fail' && clause.status !== 'error') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the primary diagnostic for this clause.
|
||||
let diagnostic: Diagnostic | undefined;
|
||||
const diagCode = clause.diagnostics?.[0];
|
||||
if (diagCode) {
|
||||
diagnostic = result.diagnostics.find(
|
||||
(d: any) => d.code === diagCode && (d.clauseId === clause.clauseId || !d.clauseId),
|
||||
) as Diagnostic | undefined;
|
||||
}
|
||||
if (!diagnostic) {
|
||||
diagnostic = result.diagnostics.find((d: any) => d.clauseId === clause.clauseId) as Diagnostic | undefined;
|
||||
}
|
||||
|
||||
const type = detectFailureType(clause, diagnostic);
|
||||
|
||||
failures.push({
|
||||
expected: buildExpected(clause, diagnostic, type),
|
||||
observed: buildObserved(clause, diagnostic, type),
|
||||
why: buildWhy(clause, diagnostic, type),
|
||||
replay: buildReplay(clause, diagnostic, type, options?.seed),
|
||||
next: buildNext(clause, diagnostic, type),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
passed: result.passed,
|
||||
failureCount: failures.length,
|
||||
failures,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Text Rendering (80-char wrapping)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function wrapLine(line: string, maxWidth: number): string[] {
|
||||
if (line.length <= maxWidth) return [line];
|
||||
const words = line.split(' ');
|
||||
const lines: string[] = [];
|
||||
let current = '';
|
||||
for (const word of words) {
|
||||
if (current.length + word.length + 1 > maxWidth) {
|
||||
lines.push(current);
|
||||
current = word;
|
||||
} else {
|
||||
current = current ? `${current} ${word}` : word;
|
||||
}
|
||||
}
|
||||
if (current) lines.push(current);
|
||||
return lines;
|
||||
}
|
||||
|
||||
function renderEntryText(entry: CanonicalFailureEntry, maxWidth: number): string {
|
||||
const lines: string[] = [];
|
||||
lines.push('');
|
||||
lines.push('─'.repeat(maxWidth));
|
||||
lines.push('');
|
||||
|
||||
const sections = [
|
||||
{ label: 'Expected', value: entry.expected },
|
||||
{ label: 'Observed', value: entry.observed },
|
||||
{ label: 'Why this matters', value: entry.why },
|
||||
{ label: 'Replay', value: entry.replay },
|
||||
{ label: 'Next', value: entry.next },
|
||||
];
|
||||
|
||||
for (const section of sections) {
|
||||
lines.push(`${section.label}:`);
|
||||
const wrapped = wrapLine(section.value, maxWidth - 2);
|
||||
for (const w of wrapped) {
|
||||
lines.push(` ${w}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export function renderCanonicalText(result: ImhotepResult, options?: CanonicalFormatOptions): string {
|
||||
const report = buildCanonicalReport(result, options);
|
||||
const maxWidth = options?.maxWidth ?? 80;
|
||||
|
||||
if (report.failureCount === 0) {
|
||||
return 'All checks passed. No failures to report.';
|
||||
}
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push('='.repeat(maxWidth));
|
||||
lines.push('IMHOTEP CANONICAL FAILURE REPORT');
|
||||
lines.push(`${report.failureCount} failure(s) across ${result.clauseResults.length} clause(s)`);
|
||||
lines.push('='.repeat(maxWidth));
|
||||
|
||||
for (const entry of report.failures) {
|
||||
lines.push(renderEntryText(entry, maxWidth));
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// JSON Rendering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function renderCanonicalJson(result: ImhotepResult, options?: CanonicalFormatOptions): string {
|
||||
const report = buildCanonicalReport(result, options);
|
||||
return JSON.stringify(report, null, 2);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main Entry Point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Format an ImhotepResult into a canonical failure report.
|
||||
*
|
||||
* @param result - The evaluation result from checkAll or property runs.
|
||||
* @param options - Format options (text or json, wrapping, seed).
|
||||
* @returns A string containing the formatted report.
|
||||
*/
|
||||
export function formatCanonical(result: ImhotepResult, options?: CanonicalFormatOptions): string {
|
||||
const format = options?.format ?? 'text';
|
||||
if (format === 'json') {
|
||||
return renderCanonicalJson(result, options);
|
||||
}
|
||||
return renderCanonicalText(result, options);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,423 @@
|
||||
/**
|
||||
* Diagnostic object creation and formatting.
|
||||
*
|
||||
* Diagnostics carry codes, positions, related facts, and fix hints.
|
||||
* They map proof outcomes back to author source spans.
|
||||
*
|
||||
* Extended for V1.1 with property-run diagnostics (sampled runs,
|
||||
* seeds, counterexamples) to support Riley's 2 AM replay workflow.
|
||||
*/
|
||||
|
||||
import type { TraceEvent, TraceRefs } from './traces.js';
|
||||
import {
|
||||
IMH_RELATION_LEFT_OF_FAILED,
|
||||
IMH_RELATION_RIGHT_OF_FAILED,
|
||||
IMH_RELATION_ABOVE_FAILED,
|
||||
IMH_RELATION_BELOW_FAILED,
|
||||
IMH_RELATION_ALIGNED_FAILED,
|
||||
IMH_RELATION_CENTERED_FAILED,
|
||||
IMH_FACT_OBSERVED_GAP,
|
||||
lookupCode,
|
||||
} from './codes.js';
|
||||
|
||||
/**
|
||||
* Severity of a diagnostic.
|
||||
*/
|
||||
export type Severity = 'error' | 'warning' | 'info';
|
||||
|
||||
/**
|
||||
* High-level category for routing and suggestion logic.
|
||||
*/
|
||||
export type DiagnosticCategory =
|
||||
| 'parse-error'
|
||||
| 'validation-error'
|
||||
| 'resolution-error'
|
||||
| 'extraction-error'
|
||||
| 'contract-failure'
|
||||
| 'indeterminate-result'
|
||||
| 'internal-error';
|
||||
|
||||
/**
|
||||
* Source position inside the original authoring document.
|
||||
*/
|
||||
export interface Position {
|
||||
start: { line: number; column: number; offset: number };
|
||||
end: { line: number; column: number; offset: number };
|
||||
}
|
||||
|
||||
/**
|
||||
* A fact that is related to the diagnostic but not the primary cause.
|
||||
*/
|
||||
export interface RelatedFact {
|
||||
code?: string;
|
||||
message: string;
|
||||
position?: Position;
|
||||
}
|
||||
|
||||
/**
|
||||
* A fix hint is an actionable string the user can follow.
|
||||
*/
|
||||
export type FixHint = string;
|
||||
|
||||
/**
|
||||
* Canonical source reference for a diagnostic.
|
||||
*/
|
||||
export interface SourceRef {
|
||||
file?: string;
|
||||
line?: number;
|
||||
column?: number;
|
||||
selector?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Automated fix suggestion attached to a diagnostic.
|
||||
*/
|
||||
export interface SuggestedFix {
|
||||
action: string;
|
||||
target: string;
|
||||
value: string;
|
||||
rationale: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A rich diagnostic object.
|
||||
*/
|
||||
export interface Diagnostic {
|
||||
diagnosticId: string;
|
||||
code: string;
|
||||
severity: Severity;
|
||||
category: DiagnosticCategory;
|
||||
message: string;
|
||||
position?: Position;
|
||||
source?: string;
|
||||
clauseId?: string;
|
||||
sceneId?: string;
|
||||
snapshotId?: string;
|
||||
envCaseId?: string;
|
||||
/** Subject resolution origin for semantic selectors (Stream 7). */
|
||||
subjectOrigin?: string;
|
||||
related: RelatedFact[];
|
||||
traceRef?: string;
|
||||
fixHints: FixHint[];
|
||||
/** V1 required: structured metrics associated with this diagnostic. */
|
||||
metrics: Record<string, number>;
|
||||
/** V1 required: canonical source reference. */
|
||||
sourceRef: SourceRef;
|
||||
/** V1 optional: automated fix suggestion. */
|
||||
suggestedFix?: SuggestedFix;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sampled-Run Diagnostic Types (V1.1)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A diagnostic produced during a sampled or enumerated property run.
|
||||
* Captures the seed, run index, input snapshot, and scene result so
|
||||
* Riley can replay the exact failing input with a single seed number.
|
||||
*/
|
||||
export interface PropertyDiagnostic extends Diagnostic {
|
||||
seed?: number;
|
||||
runIndex?: number;
|
||||
inputSnapshot?: unknown;
|
||||
runResult?: 'pass' | 'fail' | 'error';
|
||||
counterexample?: unknown;
|
||||
shrunkCounterexample?: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Blueprint for building a PropertyDiagnostic.
|
||||
*/
|
||||
export interface PropertyDiagnosticBlueprint extends DiagnosticBlueprint {
|
||||
seed?: number;
|
||||
runIndex?: number;
|
||||
inputSnapshot?: unknown;
|
||||
runResult?: 'pass' | 'fail' | 'error';
|
||||
counterexample?: unknown;
|
||||
shrunkCounterexample?: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a PropertyDiagnostic from a blueprint.
|
||||
*/
|
||||
export function makePropertyDiagnostic(
|
||||
blueprint: PropertyDiagnosticBlueprint,
|
||||
deps: { idGen(): string },
|
||||
): PropertyDiagnostic {
|
||||
const base = makeDiagnostic(blueprint, deps);
|
||||
return {
|
||||
...base,
|
||||
seed: blueprint.seed,
|
||||
runIndex: blueprint.runIndex,
|
||||
inputSnapshot: blueprint.inputSnapshot,
|
||||
runResult: blueprint.runResult,
|
||||
counterexample: blueprint.counterexample,
|
||||
shrunkCounterexample: blueprint.shrunkCounterexample,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Inputs needed to build a diagnostic from a proof or system error.
|
||||
* All optional fields are injected by the caller.
|
||||
*/
|
||||
export interface DiagnosticBlueprint {
|
||||
code: string;
|
||||
severity?: Severity;
|
||||
category: DiagnosticCategory;
|
||||
message: string;
|
||||
position?: Position;
|
||||
source?: string;
|
||||
clauseId?: string;
|
||||
sceneId?: string;
|
||||
snapshotId?: string;
|
||||
envCaseId?: string;
|
||||
related?: RelatedFact[];
|
||||
traceRef?: string;
|
||||
fixHints?: FixHint[];
|
||||
metrics?: Record<string, number>;
|
||||
sourceRef?: SourceRef;
|
||||
suggestedFix?: SuggestedFix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Diagnostic from a blueprint, applying safe defaults.
|
||||
*/
|
||||
export function makeDiagnostic(
|
||||
blueprint: DiagnosticBlueprint,
|
||||
deps: { idGen(): string },
|
||||
): Diagnostic {
|
||||
return {
|
||||
diagnosticId: deps.idGen(),
|
||||
code: blueprint.code,
|
||||
severity: blueprint.severity ?? 'error',
|
||||
category: blueprint.category,
|
||||
message: blueprint.message,
|
||||
position: blueprint.position,
|
||||
source: blueprint.source,
|
||||
clauseId: blueprint.clauseId,
|
||||
sceneId: blueprint.sceneId,
|
||||
snapshotId: blueprint.snapshotId,
|
||||
envCaseId: blueprint.envCaseId,
|
||||
related: blueprint.related ?? [],
|
||||
traceRef: blueprint.traceRef,
|
||||
fixHints: blueprint.fixHints ?? [],
|
||||
metrics: blueprint.metrics ?? {},
|
||||
sourceRef: blueprint.sourceRef ?? {},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a diagnostic into a single-line summary for quick scanning.
|
||||
*/
|
||||
export function formatDiagnosticCompact(d: Diagnostic): string {
|
||||
const loc = d.position
|
||||
? `${d.position.start.line}:${d.position.start.column}`
|
||||
: 'unknown';
|
||||
return `[${d.severity.toUpperCase()} ${d.code} @ ${loc}] ${d.message}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a diagnostic into a multi-line human-readable string.
|
||||
*/
|
||||
export function formatDiagnosticFull(d: Diagnostic): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`${d.severity.toUpperCase()}: ${d.message}`);
|
||||
lines.push(` code: ${d.code}`);
|
||||
if (d.position) {
|
||||
lines.push(
|
||||
` at: line ${d.position.start.line}, column ${d.position.start.column}`,
|
||||
);
|
||||
}
|
||||
if (d.clauseId) {
|
||||
lines.push(` clause: ${d.clauseId}`);
|
||||
}
|
||||
if (d.related.length > 0) {
|
||||
lines.push(' related:');
|
||||
for (const r of d.related) {
|
||||
lines.push(` - ${r.message}`);
|
||||
}
|
||||
}
|
||||
if (d.fixHints.length > 0) {
|
||||
lines.push(' fix hints:');
|
||||
for (const h of d.fixHints) {
|
||||
lines.push(` - ${h}`);
|
||||
}
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal proof shape expected by the reporter.
|
||||
* The solver contract defines the full proof object.
|
||||
*/
|
||||
export interface ProofLike {
|
||||
proofId: string;
|
||||
clauseId: string;
|
||||
outcome: 'pass' | 'fail';
|
||||
truth: 'determinate' | 'indeterminate' | 'approximate';
|
||||
failedPredicate?: { op: string; left: number; right: number };
|
||||
witness?: {
|
||||
subjectId?: number;
|
||||
referenceId?: number;
|
||||
envCaseId?: string;
|
||||
snapshotId?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Rich relation proof carrying all metadata needed for a detailed
|
||||
* contract-failure diagnostic. Produced by the solver / pipeline
|
||||
* when a spatial-relation clause fails.
|
||||
*/
|
||||
export interface RelationProof extends ProofLike {
|
||||
/** CSS selector that identified the subject element. */
|
||||
subjectSelector: string;
|
||||
/** CSS selector that identified the reference element. */
|
||||
referenceSelector: string;
|
||||
/** Relation name, e.g. "leftOf", "above". */
|
||||
relation: string;
|
||||
/** Expected bound values (minGap, maxGap, etc.). */
|
||||
expected: Record<string, number>;
|
||||
/** Observed / measured values (observedGap, etc.). */
|
||||
observed: Record<string, number>;
|
||||
/** Border-box rect of the subject element. */
|
||||
subjectRect: { left: number; top: number; right: number; bottom: number };
|
||||
/** Border-box rect of the reference element. */
|
||||
referenceRect: { left: number; top: number; right: number; bottom: number };
|
||||
/** Frame identifier (viewport, containing-block, etc.). */
|
||||
frame: string;
|
||||
}
|
||||
|
||||
const RELATION_TO_CODE: Record<string, string> = {
|
||||
leftOf: IMH_RELATION_LEFT_OF_FAILED,
|
||||
rightOf: IMH_RELATION_RIGHT_OF_FAILED,
|
||||
above: IMH_RELATION_ABOVE_FAILED,
|
||||
below: IMH_RELATION_BELOW_FAILED,
|
||||
alignedWith: IMH_RELATION_ALIGNED_FAILED,
|
||||
centeredWithin: IMH_RELATION_CENTERED_FAILED,
|
||||
};
|
||||
|
||||
/**
|
||||
* Build a rich relation-specific diagnostic from a RelationProof.
|
||||
*
|
||||
* Includes measured values, expected bounds, element rects, and
|
||||
* frame context so the author knows exactly what failed and why.
|
||||
*/
|
||||
export function buildRelationDiagnostic(
|
||||
proof: RelationProof,
|
||||
deps: { idGen(): string },
|
||||
): Diagnostic {
|
||||
const code = RELATION_TO_CODE[proof.relation] ?? lookupCode(IMH_RELATION_LEFT_OF_FAILED)?.code ?? 'IMH_RELATION_FAILED';
|
||||
|
||||
const parts: string[] = [
|
||||
`Relation "${proof.relation}" failed for subject "${proof.subjectSelector}" vs reference "${proof.referenceSelector}".`,
|
||||
];
|
||||
|
||||
if (proof.observed.observedGap !== undefined && proof.expected.minGap !== undefined) {
|
||||
parts.push(`Measured gap is ${proof.observed.observedGap}px, but minimum required gap is ${proof.expected.minGap}px.`);
|
||||
}
|
||||
if (proof.observed.observedGap !== undefined && proof.expected.maxGap !== undefined) {
|
||||
parts.push(`Maximum allowed gap is ${proof.expected.maxGap}px.`);
|
||||
}
|
||||
|
||||
const related: RelatedFact[] = [
|
||||
{
|
||||
code: IMH_FACT_OBSERVED_GAP,
|
||||
message: `Subject rect: [${fmtRect(proof.subjectRect)}]`,
|
||||
},
|
||||
{
|
||||
code: IMH_FACT_OBSERVED_GAP,
|
||||
message: `Reference rect: [${fmtRect(proof.referenceRect)}]`,
|
||||
},
|
||||
{
|
||||
code: 'IMH_FRAME_CONTEXT',
|
||||
message: `Frame: ${proof.frame}`,
|
||||
},
|
||||
];
|
||||
|
||||
if (proof.failedPredicate) {
|
||||
related.push({
|
||||
code: 'IMH_PREDICATE_FAILED',
|
||||
message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
|
||||
});
|
||||
}
|
||||
|
||||
return makeDiagnostic(
|
||||
{
|
||||
code,
|
||||
category: 'contract-failure',
|
||||
message: parts.join(' '),
|
||||
clauseId: proof.clauseId,
|
||||
related,
|
||||
fixHints: [
|
||||
`Check the layout in frame "${proof.frame}".`,
|
||||
`Subject: ${proof.subjectSelector} at [${fmtRect(proof.subjectRect)}]`,
|
||||
`Reference: ${proof.referenceSelector} at [${fmtRect(proof.referenceRect)}]`,
|
||||
],
|
||||
},
|
||||
deps,
|
||||
);
|
||||
}
|
||||
|
||||
function fmtRect(r: { left: number; top: number; right: number; bottom: number }): string {
|
||||
return `left=${r.left} top=${r.top} right=${r.right} bottom=${r.bottom}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a contract-failure diagnostic from a proof object.
|
||||
* The proof shape is defined by the solver contract.
|
||||
*/
|
||||
export function diagnosticFromProof(
|
||||
proof: ProofLike,
|
||||
deps: {
|
||||
idGen(): string;
|
||||
codeForClause(clauseId: string): string;
|
||||
messageForClause(clauseId: string, proof: ProofLike): string;
|
||||
fixHintsForClause(clauseId: string, proof: ProofLike): string[];
|
||||
},
|
||||
): Diagnostic | null {
|
||||
if (proof.outcome !== 'fail') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const category: DiagnosticCategory =
|
||||
proof.truth === 'indeterminate'
|
||||
? 'indeterminate-result'
|
||||
: 'contract-failure';
|
||||
|
||||
const related: RelatedFact[] = [];
|
||||
if (proof.failedPredicate) {
|
||||
related.push({
|
||||
code: 'IMH_PREDICATE_FAILED',
|
||||
message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
|
||||
});
|
||||
}
|
||||
if (proof.witness) {
|
||||
if (proof.witness.envCaseId) {
|
||||
related.push({
|
||||
code: 'IMH_WITNESS_ENV',
|
||||
message: `Environment case: ${proof.witness.envCaseId}`,
|
||||
});
|
||||
}
|
||||
if (proof.witness.snapshotId) {
|
||||
related.push({
|
||||
code: 'IMH_WITNESS_SNAPSHOT',
|
||||
message: `Snapshot: ${proof.witness.snapshotId}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return makeDiagnostic(
|
||||
{
|
||||
code: deps.codeForClause(proof.clauseId),
|
||||
category,
|
||||
message: deps.messageForClause(proof.clauseId, proof),
|
||||
clauseId: proof.clauseId,
|
||||
envCaseId: proof.witness?.envCaseId,
|
||||
snapshotId: proof.witness?.snapshotId,
|
||||
related,
|
||||
fixHints: deps.fixHintsForClause(proof.clauseId, proof),
|
||||
},
|
||||
deps,
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
/**
|
||||
* Pattern-matching failure analyzer for Imhotep diagnostics.
|
||||
*
|
||||
* Maps failure codes + observed metrics to likely causes and concrete fixes.
|
||||
* Fail-closed: unknown failures return undefined (no analysis attached).
|
||||
*
|
||||
* Inspired by Apophis cross-pollination research (analyzeFailure pipeline).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Structured analysis attached to a diagnostic when a pattern rule matches.
|
||||
*/
|
||||
export interface FailureAnalysis {
|
||||
/** Human-readable classification of the root cause. */
|
||||
likelyCause: string;
|
||||
/** Ordered list of concrete fixes the user can apply. */
|
||||
suggestedFixes: string[];
|
||||
/** Severity override from the rule (may differ from diagnostic severity). */
|
||||
severity: 'error' | 'warning' | 'info';
|
||||
}
|
||||
|
||||
/**
|
||||
* Input context passed to each rule. Rules inspect the failure code,
|
||||
* the diagnostic message, and any metrics extracted from the proof.
|
||||
*/
|
||||
export interface FailureContext {
|
||||
code: string;
|
||||
message: string;
|
||||
metrics: Record<string, number>;
|
||||
category?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single pattern rule: predicate + analyzer factory.
|
||||
*/
|
||||
export interface FailureRule {
|
||||
/** Unique rule identifier for debugging and telemetry. */
|
||||
ruleId: string;
|
||||
/** Returns true when this rule applies to the given context. */
|
||||
matches(ctx: FailureContext): boolean;
|
||||
/** Produces the analysis for a matched context. */
|
||||
analyze(ctx: FailureContext): FailureAnalysis;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Built-in pattern rules
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Extract a numeric metric by key, returning undefined if missing or NaN. */
|
||||
function getMetric(metrics: Record<string, number>, key: string): number | undefined {
|
||||
const v = metrics[key];
|
||||
if (typeof v !== 'number' || Number.isNaN(v)) return undefined;
|
||||
return v;
|
||||
}
|
||||
|
||||
/** Parse gap from a diagnostic message as fallback when metrics are absent. */
|
||||
function parseGapFromMessage(message: string): number | undefined {
|
||||
const m = message.match(/gap\s+is\s+(-?[\d.]+)px/i);
|
||||
if (!m) return undefined;
|
||||
const v = parseFloat(m[1]);
|
||||
return Number.isNaN(v) ? undefined : v;
|
||||
}
|
||||
|
||||
/** Parse width from a diagnostic message as fallback when metrics are absent. */
|
||||
function parseWidthFromMessage(message: string): number | undefined {
|
||||
const m = message.match(/width\s+is\s+(-?[\d.]+)px/i);
|
||||
if (!m) return undefined;
|
||||
const v = parseFloat(m[1]);
|
||||
return Number.isNaN(v) ? undefined : v;
|
||||
}
|
||||
|
||||
/** Parse observed count from cardinality messages. */
|
||||
function parseCountFromMessage(message: string): number | undefined {
|
||||
const m = message.match(/resolved to\s+(\d+)\s+element/i);
|
||||
if (!m) return undefined;
|
||||
return parseInt(m[1], 10);
|
||||
}
|
||||
|
||||
const RULE_LEFT_OF_OVERLAP: FailureRule = {
|
||||
ruleId: 'leftOfOverlap',
|
||||
matches(ctx) {
|
||||
if (ctx.code !== 'IMH_RELATION_LEFT_OF_FAILED') return false;
|
||||
const gap = getMetric(ctx.metrics, 'observedGap') ?? parseGapFromMessage(ctx.message);
|
||||
return gap !== undefined && gap < 0;
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Horizontal overlap',
|
||||
suggestedFixes: [
|
||||
'Check that the subject element has a positive horizontal margin from the reference.',
|
||||
'Verify neither element is using negative margins or float that causes overlap.',
|
||||
'Consider adding clearfix or adjusting flex/grid gap settings.',
|
||||
],
|
||||
severity: 'error',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const RULE_INSIDE_OVERFLOW: FailureRule = {
|
||||
ruleId: 'insideOverflow',
|
||||
matches(ctx) {
|
||||
if (ctx.code !== 'IMH_RELATION_INSIDE_FAILED') return false;
|
||||
const subjectW = getMetric(ctx.metrics, 'subjectWidth');
|
||||
const subjectH = getMetric(ctx.metrics, 'subjectHeight');
|
||||
const containerW = getMetric(ctx.metrics, 'containerWidth');
|
||||
const containerH = getMetric(ctx.metrics, 'containerHeight');
|
||||
if (subjectW !== undefined && containerW !== undefined && subjectW > containerW) return true;
|
||||
if (subjectH !== undefined && containerH !== undefined && subjectH > containerH) return true;
|
||||
// Fallback: detect overflow from message heuristics
|
||||
if (ctx.message.toLowerCase().includes('overflow') || ctx.message.toLowerCase().includes('larger')) return true;
|
||||
return false;
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Overflow or box-sizing issue',
|
||||
suggestedFixes: [
|
||||
'Check padding on the container — subject may be larger than content-box allows.',
|
||||
'Ensure box-sizing: border-box is applied so padding does not increase total size.',
|
||||
'Verify the subject dimensions do not exceed the container inner width/height.',
|
||||
],
|
||||
severity: 'error',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const RULE_SIZE_TOUCH_TARGET: FailureRule = {
|
||||
ruleId: 'sizeTouchTarget',
|
||||
matches(ctx) {
|
||||
if (ctx.code !== 'IMH_SIZE_AT_LEAST_FAILED') return false;
|
||||
const width = getMetric(ctx.metrics, 'observedWidth') ?? parseWidthFromMessage(ctx.message);
|
||||
return width !== undefined && width < 44;
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Touch target too small',
|
||||
suggestedFixes: [
|
||||
'Increase min-width to at least 44px to meet WCAG 2.5.5 / mobile accessibility guidelines.',
|
||||
'Check that padding or border is not collapsing the clickable area.',
|
||||
'Consider using a larger font size or icon scale if the element is text-based.',
|
||||
],
|
||||
severity: 'warning',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const RULE_CARDINALITY_MISSING: FailureRule = {
|
||||
ruleId: 'cardinalityMissing',
|
||||
matches(ctx) {
|
||||
if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
|
||||
const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
|
||||
return count === 0;
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Missing element',
|
||||
suggestedFixes: [
|
||||
'Verify the selector matches an element that is actually rendered in the DOM.',
|
||||
'Check for conditional rendering that may hide the element in this state.',
|
||||
'Use ui.extract(selector) to debug what the page currently contains.',
|
||||
],
|
||||
severity: 'error',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const RULE_CARDINALITY_DUPLICATE: FailureRule = {
|
||||
ruleId: 'cardinalityDuplicate',
|
||||
matches(ctx) {
|
||||
if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
|
||||
const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
|
||||
return count !== undefined && count > 1;
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Duplicate matches',
|
||||
suggestedFixes: [
|
||||
'Check for repeated data-testid or class names across sibling elements.',
|
||||
'Scope the selector more tightly (e.g. add a parent prefix).',
|
||||
'Use a semantic selector (getByRole, getByLabelText) for disambiguation.',
|
||||
],
|
||||
severity: 'error',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const RULE_SELECTOR_ZERO_MATCHES: FailureRule = {
|
||||
ruleId: 'selectorZeroMatches',
|
||||
matches(ctx) {
|
||||
return ctx.code === 'IMH_SELECTOR_ZERO_MATCHES';
|
||||
},
|
||||
analyze(_ctx) {
|
||||
return {
|
||||
likelyCause: 'Selector not found',
|
||||
suggestedFixes: [
|
||||
'Use ui.extract(selector) to verify the selector resolves to at least one element.',
|
||||
'Check that the selector is valid CSS and the element is in the DOM.',
|
||||
'For semantic selectors, confirm the accessible name or role is correct.',
|
||||
],
|
||||
severity: 'error',
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
/** Default rule set shipped with V1.0. */
|
||||
export const DEFAULT_FAILURE_RULES: FailureRule[] = [
|
||||
RULE_LEFT_OF_OVERLAP,
|
||||
RULE_INSIDE_OVERFLOW,
|
||||
RULE_SIZE_TOUCH_TARGET,
|
||||
RULE_CARDINALITY_MISSING,
|
||||
RULE_CARDINALITY_DUPLICATE,
|
||||
RULE_SELECTOR_ZERO_MATCHES,
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Analyzer engine
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Analyze a failure context against a set of rules.
|
||||
*
|
||||
* @param ctx — failure context (code, message, metrics)
|
||||
* @param rules — rule set to evaluate (defaults to built-in rules)
|
||||
* @returns FailureAnalysis if a rule matches, undefined otherwise (fail-closed)
|
||||
*/
|
||||
export function analyzeFailure(
|
||||
ctx: FailureContext,
|
||||
rules: FailureRule[] = DEFAULT_FAILURE_RULES,
|
||||
): FailureAnalysis | undefined {
|
||||
for (const rule of rules) {
|
||||
if (rule.matches(ctx)) {
|
||||
return rule.analyze(ctx);
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: analyze a raw diagnostic-like object and attach the result
|
||||
* as an `analysis` property.
|
||||
*
|
||||
* @param diagnostic — any object with code, message, metrics, and optional category
|
||||
* @param rules — rule set to evaluate
|
||||
* @returns the same object with `analysis` field added when a rule matches
|
||||
*/
|
||||
export function attachFailureAnalysis<T extends { code: string; message: string; metrics?: Record<string, number>; category?: string }>(
|
||||
diagnostic: T,
|
||||
rules: FailureRule[] = DEFAULT_FAILURE_RULES,
|
||||
): T & { analysis?: FailureAnalysis } {
|
||||
const ctx: FailureContext = {
|
||||
code: diagnostic.code,
|
||||
message: diagnostic.message,
|
||||
metrics: diagnostic.metrics ?? {},
|
||||
category: diagnostic.category,
|
||||
};
|
||||
const analysis = analyzeFailure(ctx, rules);
|
||||
if (analysis) {
|
||||
return { ...diagnostic, analysis };
|
||||
}
|
||||
return diagnostic;
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Human-readable reporter output.
|
||||
*
|
||||
* Turns diagnostics, traces, and shrink results into plain text
|
||||
* suitable for terminal reading.
|
||||
*/
|
||||
|
||||
import type { Diagnostic } from './diagnostics.js';
|
||||
import type { TraceEvent } from './traces.js';
|
||||
import type { ShrinkResult } from './shrink.js';
|
||||
|
||||
/**
|
||||
* Options for human formatting.
|
||||
* Injected so callers control colors, verbosity, etc.
|
||||
*/
|
||||
export interface HumanFormatOptions {
|
||||
// show trace events after each diagnostic
|
||||
showTraces?: boolean;
|
||||
// show shrink summary when available
|
||||
showShrink?: boolean;
|
||||
// max related facts to print
|
||||
maxRelated?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a list of diagnostics into a human-readable string.
|
||||
*/
|
||||
export function renderHumanReport(
|
||||
diagnostics: Diagnostic[],
|
||||
traces: readonly TraceEvent[],
|
||||
shrinkResults: Map<string, ShrinkResult>,
|
||||
opts: HumanFormatOptions = {},
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
for (const d of diagnostics) {
|
||||
lines.push(renderDiagnostic(d, opts));
|
||||
|
||||
if (opts.showShrink && d.clauseId && shrinkResults.has(d.clauseId)) {
|
||||
const shrink = shrinkResults.get(d.clauseId)!;
|
||||
lines.push(renderShrink(shrink));
|
||||
}
|
||||
|
||||
if (opts.showTraces && d.traceRef) {
|
||||
const relevant = traces.filter(
|
||||
(t) => t.traceEventId === d.traceRef || t.refs.diagnosticId === d.diagnosticId,
|
||||
);
|
||||
if (relevant.length > 0) {
|
||||
lines.push(' trace:');
|
||||
for (const t of relevant) {
|
||||
lines.push(` ${t.phase} at ${t.at}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a single diagnostic in human form.
|
||||
*/
|
||||
export function renderDiagnostic(
|
||||
d: Diagnostic,
|
||||
opts: HumanFormatOptions = {},
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
const prefix = d.severity === 'error' ? '✖' : d.severity === 'warning' ? '⚠' : 'ℹ';
|
||||
lines.push(`${prefix} ${d.message}`);
|
||||
lines.push(` ${d.code}`);
|
||||
|
||||
if (d.position) {
|
||||
lines.push(
|
||||
` at line ${d.position.start.line}, column ${d.position.start.column}`,
|
||||
);
|
||||
}
|
||||
|
||||
const maxRelated = opts.maxRelated ?? 5;
|
||||
if (d.related.length > 0) {
|
||||
lines.push(' related:');
|
||||
for (const r of d.related.slice(0, maxRelated)) {
|
||||
lines.push(` • ${r.message}`);
|
||||
}
|
||||
if (d.related.length > maxRelated) {
|
||||
lines.push(` … and ${d.related.length - maxRelated} more`);
|
||||
}
|
||||
}
|
||||
|
||||
if (d.fixHints.length > 0) {
|
||||
lines.push(' hints:');
|
||||
for (const h of d.fixHints) {
|
||||
lines.push(` → ${h}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (d.suggestedFix) {
|
||||
lines.push(' suggested fix:');
|
||||
lines.push(` action: ${d.suggestedFix.action}`);
|
||||
lines.push(` target: ${d.suggestedFix.target}`);
|
||||
lines.push(` value: ${d.suggestedFix.value}`);
|
||||
lines.push(` rationale: ${d.suggestedFix.rationale}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a shrink result summary.
|
||||
*/
|
||||
export function renderShrink(result: ShrinkResult): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(' shrink:');
|
||||
if (result.reduced) {
|
||||
lines.push(` reduced across: ${result.axes.join(', ')}`);
|
||||
lines.push(` steps: ${result.steps}`);
|
||||
} else {
|
||||
lines.push(' no reduction possible');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/**
|
||||
* imhotep-reporter
|
||||
*
|
||||
* Diagnostics, traceability, and witness shrinking for Imhotep.
|
||||
* Makes failures explainable with rich diagnostics, trace chains,
|
||||
* and minimal failing witnesses.
|
||||
*/
|
||||
|
||||
// Trace event model
|
||||
export {
|
||||
createTraceBuilder,
|
||||
findClauseTraces,
|
||||
traceChainForProof,
|
||||
} from './traces.js';
|
||||
export type {
|
||||
TracePhase,
|
||||
TraceRefs,
|
||||
TraceEvent,
|
||||
TraceBuilder,
|
||||
} from './traces.js';
|
||||
|
||||
// Diagnostic objects and formatting
|
||||
export {
|
||||
makeDiagnostic,
|
||||
formatDiagnosticCompact,
|
||||
formatDiagnosticFull,
|
||||
diagnosticFromProof,
|
||||
} from './diagnostics.js';
|
||||
export type {
|
||||
Severity,
|
||||
DiagnosticCategory,
|
||||
Position,
|
||||
RelatedFact,
|
||||
FixHint,
|
||||
Diagnostic,
|
||||
DiagnosticBlueprint,
|
||||
ProofLike,
|
||||
PropertyDiagnostic,
|
||||
PropertyDiagnosticBlueprint,
|
||||
SourceRef,
|
||||
SuggestedFix,
|
||||
} from './diagnostics.js';
|
||||
export {
|
||||
makePropertyDiagnostic,
|
||||
buildRelationDiagnostic,
|
||||
} from './diagnostics.js';
|
||||
export type {
|
||||
RelationProof,
|
||||
} from './diagnostics.js';
|
||||
|
||||
// Diagnostic code registry
|
||||
export {
|
||||
lookupCode,
|
||||
listCodes,
|
||||
formatMessage,
|
||||
} from './codes.js';
|
||||
export type {
|
||||
CodeEntry,
|
||||
Severity as CodeSeverity,
|
||||
Category as CodeCategory,
|
||||
} from './codes.js';
|
||||
|
||||
// Witness shrinking
|
||||
export { shrinkWitness } from './shrink.js';
|
||||
export type {
|
||||
Witness,
|
||||
ShrinkResult,
|
||||
ShrinkAxis,
|
||||
StillFails,
|
||||
} from './shrink.js';
|
||||
|
||||
// Property-run diagnostics and replay (V1.1)
|
||||
export {
|
||||
buildPropertyDiagnostics,
|
||||
buildEnumeratedPropertyDiagnostics,
|
||||
buildShrunkPropertyDiagnostics,
|
||||
} from './property-diagnostics.js';
|
||||
|
||||
export {
|
||||
buildReplayMetadata,
|
||||
buildReplayScript,
|
||||
buildReplayFromResult,
|
||||
replay,
|
||||
} from './replay.js';
|
||||
export type {
|
||||
ReplayMetadata,
|
||||
ReplayScript,
|
||||
RunResult,
|
||||
ReplayExecutor,
|
||||
} from './replay.js';
|
||||
|
||||
// Human-readable reporter
|
||||
export { renderHumanReport, renderDiagnostic, renderShrink } from './human.js';
|
||||
export type { HumanFormatOptions } from './human.js';
|
||||
|
||||
// JSON reporter
|
||||
export { renderJsonReport, buildJsonReport } from './json.js';
|
||||
export type {
|
||||
JsonFormatOptions,
|
||||
JsonReport,
|
||||
JsonDiagnostic,
|
||||
} from './json.js';
|
||||
|
||||
// Suggestion engine
|
||||
export {
|
||||
createDefaultSuggestionRegistry,
|
||||
attachSuggestions,
|
||||
} from './suggestions.js';
|
||||
export type { Suggestion, SuggestionRegistry } from './suggestions.js';
|
||||
|
||||
// Canonical failure formatter (Workstream J)
|
||||
export {
|
||||
formatCanonical,
|
||||
renderCanonicalText,
|
||||
renderCanonicalJson,
|
||||
} from './canonical-formatter.js';
|
||||
export type {
|
||||
CanonicalFailureEntry,
|
||||
CanonicalReport,
|
||||
CanonicalFormatOptions,
|
||||
} from './canonical-formatter.js';
|
||||
|
||||
// Failure analyzer (Workstream K)
|
||||
export {
|
||||
analyzeFailure,
|
||||
attachFailureAnalysis,
|
||||
DEFAULT_FAILURE_RULES,
|
||||
} from './failure-analyzer.js';
|
||||
export type {
|
||||
FailureAnalysis,
|
||||
FailureContext,
|
||||
FailureRule,
|
||||
} from './failure-analyzer.js';
|
||||
@@ -0,0 +1,132 @@
|
||||
/**
|
||||
* JSON reporter output.
|
||||
*
|
||||
* Produces a structured JSON representation of the full evaluation
|
||||
* result for CI systems, editors, and other tooling.
|
||||
*/
|
||||
|
||||
import type { Diagnostic, SourceRef, SuggestedFix } from './diagnostics.js';
|
||||
import type { TraceEvent } from './traces.js';
|
||||
import type { ShrinkResult } from './shrink.js';
|
||||
|
||||
/**
|
||||
* Options for JSON formatting.
|
||||
*/
|
||||
export interface JsonFormatOptions {
|
||||
// indent size; null means no pretty-printing
|
||||
indent?: number | null;
|
||||
// include trace events in output
|
||||
includeTraces?: boolean;
|
||||
// include shrink details
|
||||
includeShrink?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* The top-level JSON report shape.
|
||||
*/
|
||||
export interface JsonReport {
|
||||
version: 1;
|
||||
summary: {
|
||||
totalDiagnostics: number;
|
||||
errorCount: number;
|
||||
warningCount: number;
|
||||
infoCount: number;
|
||||
};
|
||||
diagnostics: JsonDiagnostic[];
|
||||
traces?: TraceEvent[];
|
||||
shrinkResults?: Record<string, ShrinkResult>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A diagnostic serialized for JSON.
|
||||
*/
|
||||
export interface JsonDiagnostic {
|
||||
diagnosticId: string;
|
||||
code: string;
|
||||
severity: string;
|
||||
category: string;
|
||||
message: string;
|
||||
position?: Diagnostic['position'];
|
||||
source?: string;
|
||||
clauseId?: string;
|
||||
sceneId?: string;
|
||||
snapshotId?: string;
|
||||
envCaseId?: string;
|
||||
related: { code?: string; message: string; position?: Diagnostic['position'] }[];
|
||||
traceRef?: string;
|
||||
fixHints: string[];
|
||||
metrics: Record<string, number>;
|
||||
sourceRef: SourceRef;
|
||||
suggestedFix?: SuggestedFix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the complete report as a JSON string.
|
||||
*/
|
||||
export function renderJsonReport(
|
||||
diagnostics: Diagnostic[],
|
||||
traces: readonly TraceEvent[],
|
||||
shrinkResults: Map<string, ShrinkResult>,
|
||||
opts: JsonFormatOptions = {},
|
||||
): string {
|
||||
const report = buildJsonReport(diagnostics, traces, shrinkResults, opts);
|
||||
const space = opts.indent === null ? undefined : opts.indent ?? 2;
|
||||
return JSON.stringify(report, null, space);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the report object without stringifying.
|
||||
*/
|
||||
export function buildJsonReport(
|
||||
diagnostics: Diagnostic[],
|
||||
traces: readonly TraceEvent[],
|
||||
shrinkResults: Map<string, ShrinkResult>,
|
||||
opts: JsonFormatOptions = {},
|
||||
): JsonReport {
|
||||
const summary = {
|
||||
totalDiagnostics: diagnostics.length,
|
||||
errorCount: diagnostics.filter((d) => d.severity === 'error').length,
|
||||
warningCount: diagnostics.filter((d) => d.severity === 'warning').length,
|
||||
infoCount: diagnostics.filter((d) => d.severity === 'info').length,
|
||||
};
|
||||
|
||||
const jsonDiagnostics = diagnostics.map((d) => diagnosticToJson(d));
|
||||
|
||||
const report: JsonReport = {
|
||||
version: 1,
|
||||
summary,
|
||||
diagnostics: jsonDiagnostics,
|
||||
};
|
||||
|
||||
if (opts.includeTraces) {
|
||||
report.traces = traces.slice();
|
||||
}
|
||||
|
||||
if (opts.includeShrink && shrinkResults.size > 0) {
|
||||
report.shrinkResults = Object.fromEntries(shrinkResults);
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
function diagnosticToJson(d: Diagnostic): JsonDiagnostic {
|
||||
return {
|
||||
diagnosticId: d.diagnosticId,
|
||||
code: d.code,
|
||||
severity: d.severity,
|
||||
category: d.category,
|
||||
message: d.message,
|
||||
position: d.position,
|
||||
source: d.source,
|
||||
clauseId: d.clauseId,
|
||||
sceneId: d.sceneId,
|
||||
snapshotId: d.snapshotId,
|
||||
envCaseId: d.envCaseId,
|
||||
related: d.related,
|
||||
traceRef: d.traceRef,
|
||||
fixHints: d.fixHints,
|
||||
metrics: d.metrics,
|
||||
sourceRef: d.sourceRef,
|
||||
suggestedFix: d.suggestedFix,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
/**
|
||||
* Tests for property-run diagnostics and render-input shrinking.
|
||||
*
|
||||
* Verifies that sampled and enumerated results produce rich diagnostics
|
||||
* with replay metadata, and that shrinking works across render-input axes.
|
||||
*/
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert';
|
||||
|
||||
import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results';
|
||||
import {
|
||||
buildPropertyDiagnostics,
|
||||
buildEnumeratedPropertyDiagnostics,
|
||||
buildShrunkPropertyDiagnostics,
|
||||
} from './property-diagnostics.js';
|
||||
import { shrinkWitness, type Witness } from './shrink.js';
|
||||
|
||||
let _id = 0;
|
||||
function fakeId() {
|
||||
return `id_${++_id}`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Property Diagnostics Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('property diagnostics', () => {
|
||||
it('builds pass diagnostic for sampled run', () => {
|
||||
const result: SampledRunResult = {
|
||||
mode: 'sampled',
|
||||
seed: 42,
|
||||
numRuns: 100,
|
||||
passed: true,
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
|
||||
assert.strictEqual(diagnostics.length, 1);
|
||||
assert.strictEqual(diagnostics[0].code, 'IMH_PROPERTY_PASSED');
|
||||
assert.strictEqual(diagnostics[0].seed, 42);
|
||||
assert.strictEqual(diagnostics[0].runResult, 'pass');
|
||||
});
|
||||
|
||||
it('builds failure diagnostic with counterexample', () => {
|
||||
const result: SampledRunResult = {
|
||||
mode: 'sampled',
|
||||
seed: 123,
|
||||
numRuns: 50,
|
||||
passed: false,
|
||||
counterexample: { size: 'sm', disabled: true },
|
||||
shrunkCounterexample: { size: 'sm', disabled: true },
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
|
||||
assert.strictEqual(diagnostics.length, 2); // primary + replay
|
||||
|
||||
const primary = diagnostics.find((d) => d.code === 'IMH_PROPERTY_FAILED');
|
||||
assert.ok(primary);
|
||||
assert.strictEqual(primary!.seed, 123);
|
||||
assert.strictEqual(primary!.runResult, 'fail');
|
||||
assert.deepStrictEqual(primary!.counterexample, { size: 'sm', disabled: true });
|
||||
assert.ok(primary!.message.includes('seed 123'));
|
||||
});
|
||||
|
||||
it('builds enumerated pass diagnostic', () => {
|
||||
const result: EnumeratedRunResult = {
|
||||
mode: 'enumerated-determinate',
|
||||
totalCases: 6,
|
||||
passed: true,
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
|
||||
assert.strictEqual(diagnostics.length, 1);
|
||||
assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_PASSED');
|
||||
assert.strictEqual(diagnostics[0].runResult, 'pass');
|
||||
});
|
||||
|
||||
it('builds enumerated failure diagnostic with failing case', () => {
|
||||
const result: EnumeratedRunResult = {
|
||||
mode: 'enumerated-determinate',
|
||||
totalCases: 6,
|
||||
passed: false,
|
||||
failingCase: { size: 'lg' },
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
|
||||
assert.strictEqual(diagnostics.length, 1);
|
||||
assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_FAILED');
|
||||
assert.strictEqual(diagnostics[0].runResult, 'fail');
|
||||
assert.deepStrictEqual(diagnostics[0].inputSnapshot, { size: 'lg' });
|
||||
assert.ok(diagnostics[0].message.includes('lg'));
|
||||
});
|
||||
|
||||
it('builds shrunk diagnostic with shrunk input', () => {
|
||||
const result: SampledRunResult = {
|
||||
mode: 'sampled',
|
||||
seed: 7,
|
||||
numRuns: 100,
|
||||
passed: false,
|
||||
counterexample: { size: 'sm', disabled: true, label: 'hello world' },
|
||||
diagnostics: [],
|
||||
};
|
||||
|
||||
const shrunkInput = { size: 'sm', disabled: true };
|
||||
const diagnostics = buildShrunkPropertyDiagnostics(result, shrunkInput, { idGen: fakeId });
|
||||
|
||||
const shrunkDiag = diagnostics.find((d) => d.code === 'IMH_PROPERTY_SHRUNK');
|
||||
assert.ok(shrunkDiag);
|
||||
assert.deepStrictEqual(shrunkDiag!.shrunkCounterexample, shrunkInput);
|
||||
assert.ok(shrunkDiag!.message.includes('sm'));
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Render-Input Shrink Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('shrink across render inputs', () => {
|
||||
it('shrinks render props when redundant', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
renderProps: { size: 'sm', disabled: true, variant: 'primary' },
|
||||
};
|
||||
|
||||
// Only 'disabled' matters for failure
|
||||
const stillFails = (w: Witness) =>
|
||||
w.renderProps?.disabled === true;
|
||||
|
||||
const result = shrinkWitness(witness, stillFails);
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.ok(result.axes.includes('prop'));
|
||||
assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
|
||||
});
|
||||
|
||||
it('shrinks render args independently', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
renderArgs: { label: 'ok', icon: 'check', tone: 'positive' },
|
||||
};
|
||||
|
||||
const stillFails = (w: Witness) =>
|
||||
w.renderArgs?.icon === 'check';
|
||||
|
||||
const result = shrinkWitness(witness, stillFails);
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.ok(result.axes.includes('arg'));
|
||||
assert.deepStrictEqual(result.witness.renderArgs, { icon: 'check' });
|
||||
});
|
||||
|
||||
it('shrinks query params', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
queryParams: { theme: 'dark', debug: '1', version: '2' },
|
||||
};
|
||||
|
||||
const stillFails = (w: Witness) =>
|
||||
w.queryParams?.theme === 'dark';
|
||||
|
||||
const result = shrinkWitness(witness, stillFails);
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.ok(result.axes.includes('query-param'));
|
||||
assert.deepStrictEqual(result.witness.queryParams, { theme: 'dark' });
|
||||
});
|
||||
|
||||
it('shrinks fixture inputs', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
fixtureInputs: { count: 5, layout: 'grid', animated: true },
|
||||
};
|
||||
|
||||
const stillFails = (w: Witness) =>
|
||||
w.fixtureInputs?.layout === 'grid';
|
||||
|
||||
const result = shrinkWitness(witness, stillFails);
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.ok(result.axes.includes('fixture-input'));
|
||||
assert.deepStrictEqual(result.witness.fixtureInputs, { layout: 'grid' });
|
||||
});
|
||||
|
||||
it('does not shrink when all render props are required', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
renderProps: { a: 1, b: 2 },
|
||||
};
|
||||
|
||||
// All axes are required: removing anything causes failure to disappear
|
||||
const stillFails = (w: Witness) =>
|
||||
w.envCases.length === 1 &&
|
||||
w.snapshots.length === 1 &&
|
||||
w.subjects.length === 1 &&
|
||||
w.facts.length === 1 &&
|
||||
Object.keys(w.renderProps ?? {}).length === 2;
|
||||
|
||||
const result = shrinkWitness(witness, stillFails);
|
||||
assert.strictEqual(result.reduced, false);
|
||||
assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,151 @@
|
||||
/**
|
||||
* Property-run diagnostics builder.
|
||||
*
|
||||
* Converts sampled and enumerated run results into rich diagnostics
|
||||
* with full replay metadata: seed, run index, input snapshot.
|
||||
*
|
||||
* These diagnostics are what Riley sees in the CI failure report.
|
||||
* Every seed, every shrink step, every counterexample is inspectable.
|
||||
*/
|
||||
|
||||
import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results'
|
||||
import {
|
||||
makePropertyDiagnostic,
|
||||
type PropertyDiagnostic,
|
||||
type PropertyDiagnosticBlueprint,
|
||||
} from './diagnostics.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Property Diagnostics Builder
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface PropertyDiagnosticsOptions {
|
||||
idGen(): string;
|
||||
/** Optional formatter for input snapshots (e.g., JSON.stringify). */
|
||||
formatInput?(input: unknown): string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a full array of PropertyDiagnostics from a SampledRunResult.
|
||||
*
|
||||
* On failure, emits one primary diagnostic with the counterexample and
|
||||
* one per-run diagnostic for each failing run so the trace is complete.
|
||||
*/
|
||||
export function buildPropertyDiagnostics(
|
||||
result: SampledRunResult,
|
||||
options: PropertyDiagnosticsOptions,
|
||||
): PropertyDiagnostic[] {
|
||||
const diagnostics: PropertyDiagnostic[] = [];
|
||||
|
||||
if (result.passed) {
|
||||
diagnostics.push(makePropertyDiagnostic({
|
||||
code: 'IMH_PROPERTY_PASSED',
|
||||
category: 'contract-failure',
|
||||
message: `Property passed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
|
||||
seed: result.seed,
|
||||
runResult: 'pass',
|
||||
}, options));
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Primary failure diagnostic
|
||||
const primary: PropertyDiagnosticBlueprint = {
|
||||
code: 'IMH_PROPERTY_FAILED',
|
||||
category: 'contract-failure',
|
||||
message: `Property failed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
|
||||
seed: result.seed,
|
||||
runResult: 'fail',
|
||||
counterexample: result.counterexample,
|
||||
shrunkCounterexample: result.shrunkCounterexample,
|
||||
};
|
||||
|
||||
if (result.counterexample !== undefined) {
|
||||
const inputStr = options.formatInput
|
||||
? options.formatInput(result.counterexample)
|
||||
: JSON.stringify(result.counterexample);
|
||||
primary.message += ` Counterexample: ${inputStr}`;
|
||||
primary.inputSnapshot = result.counterexample;
|
||||
}
|
||||
|
||||
diagnostics.push(makePropertyDiagnostic(primary, options));
|
||||
|
||||
// Replay metadata diagnostic (always emitted on failure)
|
||||
diagnostics.push(makePropertyDiagnostic({
|
||||
code: 'IMH_PROPERTY_REPLAY',
|
||||
category: 'internal-error',
|
||||
message: `Replay: seed=${result.seed}, numRuns=${result.numRuns}`,
|
||||
seed: result.seed,
|
||||
runResult: 'fail',
|
||||
}, options));
|
||||
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build diagnostics from an EnumeratedRunResult.
|
||||
*
|
||||
* On failure, reports the exact failing case index and value.
|
||||
*/
|
||||
export function buildEnumeratedPropertyDiagnostics(
|
||||
result: EnumeratedRunResult,
|
||||
options: PropertyDiagnosticsOptions,
|
||||
): PropertyDiagnostic[] {
|
||||
const diagnostics: PropertyDiagnostic[] = [];
|
||||
|
||||
if (result.passed) {
|
||||
diagnostics.push(makePropertyDiagnostic({
|
||||
code: 'IMH_ENUMERATED_PASSED',
|
||||
category: 'contract-failure',
|
||||
message: `Enumerated property passed all ${result.totalCases} cases.`,
|
||||
runResult: 'pass',
|
||||
}, options));
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
const failingCaseStr = result.failingCase !== undefined
|
||||
? (options.formatInput ? options.formatInput(result.failingCase) : JSON.stringify(result.failingCase))
|
||||
: 'unknown';
|
||||
|
||||
diagnostics.push(makePropertyDiagnostic({
|
||||
code: 'IMH_ENUMERATED_FAILED',
|
||||
category: 'contract-failure',
|
||||
message: `Enumerated property failed at case ${failingCaseStr} out of ${result.totalCases}.`,
|
||||
runResult: 'fail',
|
||||
inputSnapshot: result.failingCase,
|
||||
}, options));
|
||||
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shrink-Aware Diagnostics
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build diagnostics that include shrink results.
|
||||
*
|
||||
* This is the diagnostic layer Riley uses: it tells her not just that
|
||||
* the property failed, but what the smallest still-failing input is.
|
||||
*/
|
||||
export function buildShrunkPropertyDiagnostics(
|
||||
result: SampledRunResult,
|
||||
shrunkInput: unknown,
|
||||
options: PropertyDiagnosticsOptions,
|
||||
): PropertyDiagnostic[] {
|
||||
const base = buildPropertyDiagnostics(result, options);
|
||||
|
||||
const shrunkStr = options.formatInput
|
||||
? options.formatInput(shrunkInput)
|
||||
: JSON.stringify(shrunkInput);
|
||||
|
||||
const shrinkDiagnostic = makePropertyDiagnostic({
|
||||
code: 'IMH_PROPERTY_SHRUNK',
|
||||
category: 'contract-failure',
|
||||
message: `Shrunk counterexample: ${shrunkStr}`,
|
||||
seed: result.seed,
|
||||
runResult: 'fail',
|
||||
shrunkCounterexample: shrunkInput,
|
||||
}, options);
|
||||
|
||||
return [...base, shrinkDiagnostic];
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
* Tests for executable replay.
|
||||
*
|
||||
* Verifies that replay metadata can be fed back into a replay executor
|
||||
* and reproduce the same failure deterministically.
|
||||
*/
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert';
|
||||
import {
|
||||
replay,
|
||||
buildReplayMetadata,
|
||||
type ReplayMetadata,
|
||||
type ReplayExecutor,
|
||||
type RunResult,
|
||||
} from './replay.js';
|
||||
import { makeDiagnostic, type Diagnostic, type ProofLike } from './diagnostics.js';
|
||||
|
||||
let _id = 0;
|
||||
function fakeId() {
|
||||
return `id_${++_id}`;
|
||||
}
|
||||
|
||||
describe('replay', () => {
|
||||
it('replays a failing run and produces the same failure', async () => {
|
||||
const metadata: ReplayMetadata = {
|
||||
version: 1,
|
||||
seed: 42,
|
||||
numRuns: 100,
|
||||
sceneTarget: { kind: 'fixture', fixtureId: 'test.html' },
|
||||
inputDomain: { mode: 'enumerated', values: [] },
|
||||
counterexample: { size: 'sm' },
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const mockProof: ProofLike = {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
failedPredicate: { op: '>=', left: 10, right: 24 },
|
||||
witness: { subjectId: 0, referenceId: 1 },
|
||||
};
|
||||
|
||||
const mockResult = { status: 'fail', clauseId: 'c1' };
|
||||
|
||||
const executor: ReplayExecutor = {
|
||||
buildWorld: async () => ({ sceneId: 'scene' }),
|
||||
buildClauses: () => [{ clauseId: 'c1' }],
|
||||
evaluate: () => ({
|
||||
results: [mockResult],
|
||||
proofs: [mockProof],
|
||||
}),
|
||||
isFailure: (r) => (r as any).status === 'fail',
|
||||
toDiagnostic: (_r, proof) =>
|
||||
makeDiagnostic(
|
||||
{
|
||||
code: 'IMH_REPLAY_FAIL',
|
||||
category: 'contract-failure',
|
||||
message: `Replayed failure for clause ${proof.clauseId}`,
|
||||
},
|
||||
{ idGen: fakeId },
|
||||
),
|
||||
};
|
||||
|
||||
const result: RunResult = await replay(metadata, executor);
|
||||
|
||||
assert.strictEqual(result.passed, false);
|
||||
assert.strictEqual(result.seed, 42);
|
||||
assert.strictEqual(result.diagnostics.length, 1);
|
||||
assert.strictEqual(result.proofs.length, 1);
|
||||
assert.strictEqual(result.proofs[0].outcome, 'fail');
|
||||
assert.ok(
|
||||
result.diagnostics[0].message.includes('Replayed failure'),
|
||||
'diagnostic should mention replayed failure',
|
||||
);
|
||||
});
|
||||
|
||||
it('replays a passing run and reports pass', async () => {
|
||||
const metadata: ReplayMetadata = {
|
||||
version: 1,
|
||||
seed: 7,
|
||||
numRuns: 50,
|
||||
sceneTarget: { kind: 'fixture', fixtureId: 'pass.html' },
|
||||
inputDomain: { mode: 'enumerated', values: [] },
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const mockProof: ProofLike = {
|
||||
proofId: 'p2',
|
||||
clauseId: 'c2',
|
||||
outcome: 'pass',
|
||||
truth: 'determinate',
|
||||
};
|
||||
|
||||
const executor: ReplayExecutor = {
|
||||
buildWorld: async () => ({ sceneId: 'scene' }),
|
||||
buildClauses: () => [{ clauseId: 'c2' }],
|
||||
evaluate: () => ({
|
||||
results: [{ status: 'pass', clauseId: 'c2' }],
|
||||
proofs: [mockProof],
|
||||
}),
|
||||
isFailure: (r) => (r as any).status === 'fail',
|
||||
toDiagnostic: (_r, proof) =>
|
||||
makeDiagnostic(
|
||||
{
|
||||
code: 'IMH_REPLAY_FAIL',
|
||||
category: 'contract-failure',
|
||||
message: `Replayed failure for clause ${proof.clauseId}`,
|
||||
},
|
||||
{ idGen: fakeId },
|
||||
),
|
||||
};
|
||||
|
||||
const result = await replay(metadata, executor);
|
||||
|
||||
assert.strictEqual(result.passed, true);
|
||||
assert.strictEqual(result.diagnostics.length, 0);
|
||||
assert.strictEqual(result.proofs[0].outcome, 'pass');
|
||||
});
|
||||
|
||||
it('uses the same seed for deterministic reconstruction', async () => {
|
||||
const metadata: ReplayMetadata = {
|
||||
version: 1,
|
||||
seed: 99,
|
||||
numRuns: 10,
|
||||
sceneTarget: { kind: 'fixture', fixtureId: 'seed.html' },
|
||||
inputDomain: { mode: 'generated', arbitrary: null, seed: 99, numRuns: 10 },
|
||||
counterexample: { label: 'x' },
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const executor: ReplayExecutor = {
|
||||
buildWorld: async () => ({}),
|
||||
buildClauses: () => [],
|
||||
evaluate: () => ({ results: [], proofs: [] }),
|
||||
isFailure: () => false,
|
||||
toDiagnostic: () =>
|
||||
makeDiagnostic(
|
||||
{ code: 'IMH_TEST', category: 'internal-error', message: 'test' },
|
||||
{ idGen: fakeId },
|
||||
),
|
||||
};
|
||||
|
||||
const result = await replay(metadata, executor);
|
||||
assert.strictEqual(result.seed, 99);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
* Replay metadata builder for sampled property runs.
|
||||
*
|
||||
* Given a SampledRunResult, produces a replay script or metadata object
|
||||
* that can reproduce the exact run. Riley needs this for 2 AM debugging:
|
||||
* she passes the seed to the runner and watches the identical failure.
|
||||
*
|
||||
* The replay payload includes everything needed to reconstruct the run:
|
||||
* seed, numRuns, renderer, component, input domain.
|
||||
*/
|
||||
|
||||
import type { SceneTarget, RenderCase } from 'imhotep-core/scene-target'
|
||||
import type { InputDomain } from 'imhotep-core/property-contracts'
|
||||
import type { SampledRunResult } from 'imhotep-core/property-results'
|
||||
import { setDefaultContext, createDeterministicContext } from 'imhotep-core'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Replay Metadata
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ReplayMetadata {
|
||||
version: 1;
|
||||
seed: number;
|
||||
numRuns: number;
|
||||
sceneTarget: SceneTarget;
|
||||
inputDomain: InputDomain;
|
||||
/** The counterexample input that failed, if any. */
|
||||
counterexample?: unknown;
|
||||
/** The shrunk counterexample, if shrinking was performed. */
|
||||
shrunkCounterexample?: unknown;
|
||||
/** Optional renderer adapter identifier for reconstruction. */
|
||||
rendererAdapterId?: string;
|
||||
/** ISO timestamp of when the original run occurred. */
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build replay metadata from a sampled run result.
|
||||
*
|
||||
* This is the deterministic receipt Riley uses: seed 42, run 7,
|
||||
* counterexample { size: 'sm', disabled: true }.
|
||||
*/
|
||||
export function buildReplayMetadata(
|
||||
result: SampledRunResult,
|
||||
sceneTarget: SceneTarget,
|
||||
inputDomain: InputDomain,
|
||||
options?: {
|
||||
rendererAdapterId?: string;
|
||||
timestamp?: string;
|
||||
},
|
||||
): ReplayMetadata {
|
||||
return {
|
||||
version: 1,
|
||||
seed: result.seed,
|
||||
numRuns: result.numRuns,
|
||||
sceneTarget,
|
||||
inputDomain,
|
||||
counterexample: result.counterexample,
|
||||
shrunkCounterexample: result.shrunkCounterexample,
|
||||
rendererAdapterId: options?.rendererAdapterId,
|
||||
timestamp: options?.timestamp ?? new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Replay Script
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ReplayScript {
|
||||
/** Human-readable description of how to replay. */
|
||||
description: string;
|
||||
/** Programmatic replay command (e.g., a shell command or JS snippet). */
|
||||
command: string;
|
||||
/** The metadata object for machine consumption. */
|
||||
metadata: ReplayMetadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a replay script from metadata.
|
||||
*
|
||||
* Returns both a human-readable description and a programmatic command.
|
||||
*/
|
||||
export function buildReplayScript(
|
||||
metadata: ReplayMetadata,
|
||||
options?: {
|
||||
runnerCommand?: string;
|
||||
formatInput?(input: unknown): string;
|
||||
},
|
||||
): ReplayScript {
|
||||
const inputStr = options?.formatInput
|
||||
? options.formatInput(metadata.counterexample)
|
||||
: metadata.counterexample !== undefined
|
||||
? JSON.stringify(metadata.counterexample)
|
||||
: 'unknown';
|
||||
|
||||
const description = [
|
||||
`Replay sampled property run:`,
|
||||
` seed: ${metadata.seed}`,
|
||||
` numRuns: ${metadata.numRuns}`,
|
||||
` counterexample: ${inputStr}`,
|
||||
` timestamp: ${metadata.timestamp}`,
|
||||
].join('\n');
|
||||
|
||||
const command = `${options?.runnerCommand ?? 'imhotep replay'} --seed ${metadata.seed} --num-runs ${metadata.numRuns}`;
|
||||
|
||||
return {
|
||||
description,
|
||||
command,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Replay from Result (Convenience)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build a full replay script directly from a SampledRunResult.
|
||||
*/
|
||||
export function buildReplayFromResult(
|
||||
result: SampledRunResult,
|
||||
sceneTarget: SceneTarget,
|
||||
inputDomain: InputDomain,
|
||||
options?: {
|
||||
rendererAdapterId?: string;
|
||||
runnerCommand?: string;
|
||||
formatInput?(input: unknown): string;
|
||||
},
|
||||
): ReplayScript {
|
||||
const metadata = buildReplayMetadata(result, sceneTarget, inputDomain, {
|
||||
rendererAdapterId: options?.rendererAdapterId,
|
||||
});
|
||||
return buildReplayScript(metadata, options);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Executable Replay
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
import type { Diagnostic } from './diagnostics.js';
|
||||
import type { ProofLike } from './diagnostics.js';
|
||||
|
||||
/**
|
||||
* Result of executing a replay.
|
||||
*/
|
||||
export interface RunResult {
|
||||
/** Whether the replayed run passed (no failures). */
|
||||
passed: boolean;
|
||||
/** Diagnostics emitted during the replay. */
|
||||
diagnostics: Diagnostic[];
|
||||
/** Proofs generated during the replay. */
|
||||
proofs: ProofLike[];
|
||||
/** Seed used for the replay. */
|
||||
seed: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executor interface injected by the caller.
|
||||
*
|
||||
* The reporter does not know how to mount scenes or evaluate clauses;
|
||||
* it delegates those operations to the injected executor. This keeps
|
||||
* the reporter backend-agnostic and testable.
|
||||
*/
|
||||
export interface ReplayExecutor {
|
||||
/** Mount the scene described by metadata and extract a geometry world. */
|
||||
buildWorld(metadata: ReplayMetadata): Promise<unknown>;
|
||||
/** Build clause descriptors from the metadata. */
|
||||
buildClauses(metadata: ReplayMetadata): unknown[];
|
||||
/** Evaluate clauses against the world. */
|
||||
evaluate(world: unknown, clauses: unknown[]): { results: unknown[]; proofs: ProofLike[] };
|
||||
/** Check whether an evaluation result represents a failure. */
|
||||
isFailure(result: unknown): boolean;
|
||||
/** Convert a failing result + proof into a diagnostic. */
|
||||
toDiagnostic(result: unknown, proof: ProofLike): Diagnostic;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replay a failing property run from its metadata.
|
||||
*
|
||||
* Reconstructs the scene, rebuilds clauses, and re-evaluates using the
|
||||
* same seed so Riley gets deterministic reproduction.
|
||||
*
|
||||
* @param metadata - The replay metadata captured from the original run.
|
||||
* @param executor - Injected executor that knows how to mount and evaluate.
|
||||
*/
|
||||
export async function replay(
|
||||
metadata: ReplayMetadata,
|
||||
executor: ReplayExecutor,
|
||||
): Promise<RunResult> {
|
||||
// Ensure deterministic context during replay so ids, clocks, and rng
|
||||
// match the original run exactly.
|
||||
setDefaultContext(createDeterministicContext(metadata.seed));
|
||||
|
||||
const world = await executor.buildWorld(metadata);
|
||||
const clauses = executor.buildClauses(metadata);
|
||||
const evaluation = executor.evaluate(world, clauses);
|
||||
|
||||
const diagnostics: Diagnostic[] = [];
|
||||
let hasFailure = false;
|
||||
|
||||
for (let i = 0; i < evaluation.results.length; i++) {
|
||||
const result = evaluation.results[i];
|
||||
const proof = evaluation.proofs[i];
|
||||
if (executor.isFailure(result)) {
|
||||
hasFailure = true;
|
||||
if (proof) {
|
||||
diagnostics.push(executor.toDiagnostic(result, proof));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
passed: !hasFailure,
|
||||
diagnostics,
|
||||
proofs: evaluation.proofs,
|
||||
seed: metadata.seed,
|
||||
};
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,177 @@
|
||||
/**
|
||||
* Tests for oracle-preserving witness shrinking.
|
||||
*
|
||||
* Verifies that shrinkWitness only accepts a reduced witness if the
|
||||
* injected oracle still reports failure. If the oracle returns false
|
||||
* (meaning the shrunk witness no longer reproduces the bug), the
|
||||
* shrink must be rejected and the original witness returned unchanged.
|
||||
*/
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert';
|
||||
import { shrinkWitness, type Witness, type ShrinkResult } from './shrink.js';
|
||||
|
||||
describe('shrink oracle preservation', () => {
|
||||
it('accepts a shrunk witness when the oracle still fails', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1', 'env_2', 'env_3'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1, 2, 3],
|
||||
facts: [100, 101, 102],
|
||||
};
|
||||
|
||||
// The failure reproduces as long as env_2 and subject 2 are present.
|
||||
const oracle = (w: Witness) =>
|
||||
w.envCases.includes('env_2') && w.subjects.includes(2);
|
||||
|
||||
const result: ShrinkResult = shrinkWitness(witness, oracle);
|
||||
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.deepStrictEqual(result.witness.envCases, ['env_2']);
|
||||
assert.deepStrictEqual(result.witness.subjects, [2]);
|
||||
assert.ok(result.axes.includes('env-case'));
|
||||
assert.ok(result.axes.includes('subject'));
|
||||
|
||||
// Oracle must still fail on the final witness
|
||||
assert.strictEqual(
|
||||
oracle(result.witness),
|
||||
true,
|
||||
'final shrunk witness must still fail the oracle',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects the shrink when the oracle no longer fails', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1', 'env_2'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
};
|
||||
|
||||
// The failure ONLY reproduces with the exact original witness.
|
||||
// Removing any axis item makes the failure disappear.
|
||||
const oracle = (w: Witness) =>
|
||||
w.envCases.length === 2 &&
|
||||
w.envCases.includes('env_1') &&
|
||||
w.snapshots.length === 1 &&
|
||||
w.subjects.length === 1 &&
|
||||
w.facts.length === 1;
|
||||
|
||||
const result: ShrinkResult = shrinkWitness(witness, oracle);
|
||||
|
||||
// Because every item is required, nothing can be removed.
|
||||
// The validation step ensures the final witness still fails,
|
||||
// and since the shrinker cannot remove anything without breaking
|
||||
// the oracle, reduced must be false.
|
||||
assert.strictEqual(result.reduced, false);
|
||||
assert.deepStrictEqual(result.witness.envCases, ['env_1', 'env_2']);
|
||||
assert.deepStrictEqual(result.witness.snapshots, ['default']);
|
||||
assert.deepStrictEqual(result.witness.subjects, [1]);
|
||||
assert.deepStrictEqual(result.witness.facts, [100]);
|
||||
assert.deepStrictEqual(result.axes, []);
|
||||
assert.strictEqual(result.steps, 0);
|
||||
});
|
||||
|
||||
it('preserves failure across render-prop shrinking', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
renderProps: { size: 'lg', disabled: true, variant: 'primary' },
|
||||
};
|
||||
|
||||
// Only the 'disabled' prop matters for the failure.
|
||||
const oracle = (w: Witness) => w.renderProps?.disabled === true;
|
||||
|
||||
const result: ShrinkResult = shrinkWitness(witness, oracle);
|
||||
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.ok(result.axes.includes('prop'));
|
||||
assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
|
||||
assert.strictEqual(oracle(result.witness), true);
|
||||
});
|
||||
|
||||
it('rejects render-prop shrink if oracle returns false', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['env_1'],
|
||||
snapshots: ['default'],
|
||||
subjects: [1],
|
||||
facts: [100],
|
||||
renderProps: { a: 1, b: 2 },
|
||||
};
|
||||
|
||||
// Both props are required; removing either breaks reproduction.
|
||||
// All other axes are also required.
|
||||
const oracle = (w: Witness) =>
|
||||
Object.keys(w.renderProps ?? {}).length === 2 &&
|
||||
w.envCases.length === 1 &&
|
||||
w.snapshots.length === 1 &&
|
||||
w.subjects.length === 1 &&
|
||||
w.facts.length === 1;
|
||||
|
||||
const result: ShrinkResult = shrinkWitness(witness, oracle);
|
||||
|
||||
assert.strictEqual(result.reduced, false);
|
||||
assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
|
||||
assert.strictEqual(oracle(result.witness), true);
|
||||
});
|
||||
|
||||
it('original failure → shrink → smaller witness → prove still fails', () => {
|
||||
const witness: Witness = {
|
||||
proof: {
|
||||
proofId: 'p1',
|
||||
clauseId: 'c1',
|
||||
outcome: 'fail',
|
||||
truth: 'determinate',
|
||||
},
|
||||
envCases: ['desktop', 'tablet', 'mobile'],
|
||||
snapshots: ['default', 'hover', 'focus'],
|
||||
subjects: [10, 20, 30, 40],
|
||||
facts: [1, 2, 3, 4, 5],
|
||||
};
|
||||
|
||||
// The bug reproduces on desktop with subject 20 and fact 3.
|
||||
const oracle = (w: Witness) =>
|
||||
w.envCases.includes('desktop') &&
|
||||
w.subjects.includes(20) &&
|
||||
w.facts.includes(3);
|
||||
|
||||
const result: ShrinkResult = shrinkWitness(witness, oracle);
|
||||
|
||||
assert.strictEqual(result.reduced, true);
|
||||
assert.deepStrictEqual(result.witness.envCases, ['desktop']);
|
||||
assert.deepStrictEqual(result.witness.subjects, [20]);
|
||||
assert.deepStrictEqual(result.witness.facts, [3]);
|
||||
assert.strictEqual(
|
||||
oracle(result.witness),
|
||||
true,
|
||||
'shrunk witness must still reproduce the original failure',
|
||||
);
|
||||
assert.ok(result.steps > 0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,273 @@
|
||||
/**
|
||||
* Witness shrinking logic.
|
||||
*
|
||||
* Shrinking minimizes failing evidence across multiple axes:
|
||||
* - environment case
|
||||
* - state snapshot
|
||||
* - timeline sample set
|
||||
* - subject subset
|
||||
* - contributing facts
|
||||
* - clause group context
|
||||
*
|
||||
* Goal: produce the smallest still-failing witness that preserves
|
||||
* explanatory value. This is a diagnostic minimizer, not merely
|
||||
* a test minimizer.
|
||||
*/
|
||||
|
||||
import type { ProofLike } from './diagnostics.js';
|
||||
|
||||
/**
|
||||
* A shrinkable witness bundles the proof with the full context
|
||||
* needed to attempt reduction.
|
||||
*
|
||||
* V1.1 extension: render inputs (props, args, query params, fixture inputs)
|
||||
* are now shrinkable axes so Riley can isolate bugs to the smallest
|
||||
* still-failing render configuration.
|
||||
*/
|
||||
export interface Witness {
|
||||
proof: ProofLike;
|
||||
envCases: string[];
|
||||
snapshots: string[];
|
||||
subjects: number[];
|
||||
facts: number[];
|
||||
/** Render-input axes for property-run shrinking (V1.1). */
|
||||
renderProps?: Record<string, unknown>;
|
||||
renderArgs?: Record<string, unknown>;
|
||||
queryParams?: Record<string, unknown>;
|
||||
fixtureInputs?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of a shrink attempt.
|
||||
*/
|
||||
export interface ShrinkResult {
|
||||
// true if the witness was reduced at all
|
||||
reduced: boolean;
|
||||
// the minimized witness
|
||||
witness: Witness;
|
||||
// which axes were shrunk
|
||||
axes: ShrinkAxis[];
|
||||
// how many reduction steps were applied
|
||||
steps: number;
|
||||
}
|
||||
|
||||
export type ShrinkAxis =
|
||||
| 'env-case'
|
||||
| 'snapshot'
|
||||
| 'subject'
|
||||
| 'fact'
|
||||
| 'timeline'
|
||||
| 'group-context'
|
||||
| 'prop'
|
||||
| 'arg'
|
||||
| 'query-param'
|
||||
| 'fixture-input';
|
||||
|
||||
/**
|
||||
* Predicate injected by the caller.
|
||||
* Returns true if the reduced witness still reproduces the failure.
|
||||
*/
|
||||
export type StillFails = (w: Witness) => boolean;
|
||||
|
||||
/**
|
||||
* Shrink a witness by trying to drop one element at a time from
|
||||
* each axis. Uses a naive delta-debugging style: try to remove each
|
||||
* item individually; if the failure still reproduces, keep it out.
|
||||
*/
|
||||
export function shrinkWitness(
|
||||
witness: Witness,
|
||||
stillFails: StillFails,
|
||||
): ShrinkResult {
|
||||
const current = cloneWitness(witness);
|
||||
let reduced = false;
|
||||
const axes: ShrinkAxis[] = [];
|
||||
let steps = 0;
|
||||
|
||||
// Shrink env cases
|
||||
const envResult = shrinkArray(current.envCases, (arr) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.envCases = arr;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (envResult.reduced) {
|
||||
current.envCases = envResult.value;
|
||||
reduced = true;
|
||||
axes.push('env-case');
|
||||
steps += envResult.steps;
|
||||
}
|
||||
|
||||
// Shrink snapshots
|
||||
const snapResult = shrinkArray(current.snapshots, (arr) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.snapshots = arr;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (snapResult.reduced) {
|
||||
current.snapshots = snapResult.value;
|
||||
reduced = true;
|
||||
axes.push('snapshot');
|
||||
steps += snapResult.steps;
|
||||
}
|
||||
|
||||
// Shrink subjects
|
||||
const subjResult = shrinkArray(current.subjects, (arr) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.subjects = arr;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (subjResult.reduced) {
|
||||
current.subjects = subjResult.value;
|
||||
reduced = true;
|
||||
axes.push('subject');
|
||||
steps += subjResult.steps;
|
||||
}
|
||||
|
||||
// Shrink facts
|
||||
const factResult = shrinkArray(current.facts, (arr) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.facts = arr;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (factResult.reduced) {
|
||||
current.facts = factResult.value;
|
||||
reduced = true;
|
||||
axes.push('fact');
|
||||
steps += factResult.steps;
|
||||
}
|
||||
|
||||
// Shrink render props (V1.1)
|
||||
if (current.renderProps) {
|
||||
const propResult = shrinkObject(current.renderProps, (obj) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.renderProps = obj;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (propResult.reduced) {
|
||||
current.renderProps = propResult.value;
|
||||
reduced = true;
|
||||
axes.push('prop');
|
||||
steps += propResult.steps;
|
||||
}
|
||||
}
|
||||
|
||||
// Shrink render args (V1.1)
|
||||
if (current.renderArgs) {
|
||||
const argResult = shrinkObject(current.renderArgs, (obj) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.renderArgs = obj;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (argResult.reduced) {
|
||||
current.renderArgs = argResult.value;
|
||||
reduced = true;
|
||||
axes.push('arg');
|
||||
steps += argResult.steps;
|
||||
}
|
||||
}
|
||||
|
||||
// Shrink query params (V1.1)
|
||||
if (current.queryParams) {
|
||||
const qpResult = shrinkObject(current.queryParams, (obj) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.queryParams = obj;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (qpResult.reduced) {
|
||||
current.queryParams = qpResult.value;
|
||||
reduced = true;
|
||||
axes.push('query-param');
|
||||
steps += qpResult.steps;
|
||||
}
|
||||
}
|
||||
|
||||
// Shrink fixture inputs (V1.1)
|
||||
if (current.fixtureInputs) {
|
||||
const fiResult = shrinkObject(current.fixtureInputs, (obj) => {
|
||||
const candidate = cloneWitness(current);
|
||||
candidate.fixtureInputs = obj;
|
||||
return stillFails(candidate);
|
||||
});
|
||||
if (fiResult.reduced) {
|
||||
current.fixtureInputs = fiResult.value;
|
||||
reduced = true;
|
||||
axes.push('fixture-input');
|
||||
steps += fiResult.steps;
|
||||
}
|
||||
}
|
||||
|
||||
// Oracle validation: the shrunk witness must still reproduce the failure.
|
||||
// If the oracle says it does not fail, discard the shrink and return original.
|
||||
if (reduced && !stillFails(current)) {
|
||||
return { reduced: false, witness: cloneWitness(witness), axes: [], steps: 0 };
|
||||
}
|
||||
|
||||
return { reduced, witness: current, axes, steps };
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to remove each element from arr one by one.
|
||||
* If the predicate still holds with the element removed, drop it.
|
||||
*/
|
||||
function shrinkArray<T>(
|
||||
arr: T[],
|
||||
predicate: (reduced: T[]) => boolean,
|
||||
): { reduced: boolean; value: T[]; steps: number } {
|
||||
let current = arr.slice();
|
||||
let changed = false;
|
||||
let steps = 0;
|
||||
|
||||
for (let i = current.length - 1; i >= 0; i--) {
|
||||
const candidate = current.slice(0, i).concat(current.slice(i + 1));
|
||||
steps++;
|
||||
if (predicate(candidate)) {
|
||||
current = candidate;
|
||||
changed = true;
|
||||
// continue checking from the same index because items shifted left
|
||||
i = Math.min(i, current.length);
|
||||
}
|
||||
}
|
||||
|
||||
return { reduced: changed, value: current, steps };
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to remove each key from an object one by one.
|
||||
* If the predicate still holds with the key removed, drop it.
|
||||
*/
|
||||
function shrinkObject<T extends Record<string, unknown>>(
|
||||
obj: T,
|
||||
predicate: (reduced: T) => boolean,
|
||||
): { reduced: boolean; value: T; steps: number } {
|
||||
let current = { ...obj } as T;
|
||||
let changed = false;
|
||||
let steps = 0;
|
||||
const keys = Object.keys(current);
|
||||
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
const key = keys[i];
|
||||
const candidate = { ...current } as T;
|
||||
delete (candidate as Record<string, unknown>)[key];
|
||||
steps++;
|
||||
if (predicate(candidate)) {
|
||||
current = candidate;
|
||||
changed = true;
|
||||
i = Math.min(i, Object.keys(current).length);
|
||||
}
|
||||
}
|
||||
|
||||
return { reduced: changed, value: current, steps };
|
||||
}
|
||||
|
||||
function cloneWitness(w: Witness): Witness {
|
||||
return {
|
||||
proof: w.proof,
|
||||
envCases: w.envCases.slice(),
|
||||
snapshots: w.snapshots.slice(),
|
||||
subjects: w.subjects.slice(),
|
||||
facts: w.facts.slice(),
|
||||
renderProps: w.renderProps ? { ...w.renderProps } : undefined,
|
||||
renderArgs: w.renderArgs ? { ...w.renderArgs } : undefined,
|
||||
queryParams: w.queryParams ? { ...w.queryParams } : undefined,
|
||||
fixtureInputs: w.fixtureInputs ? { ...w.fixtureInputs } : undefined,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Usage error suggestion engine.
|
||||
*
|
||||
* When a system-use error occurs (parse, validation, resolution,
|
||||
* extraction), this module produces actionable suggestions based on
|
||||
* the diagnostic code and category.
|
||||
*/
|
||||
|
||||
import type { Diagnostic, DiagnosticCategory } from './diagnostics.js';
|
||||
import {
|
||||
IMH_SELECTOR_ZERO_MATCHES,
|
||||
IMH_FRAME_AMBIGUOUS,
|
||||
IMH_VALID_INVALID_UNIT,
|
||||
IMH_VALID_ILLEGAL_RELATION_OPTION,
|
||||
IMH_EXTRACT_PARTIAL,
|
||||
IMH_INDETERMINATE_MISSING_FACT,
|
||||
} from './codes.js';
|
||||
|
||||
/**
|
||||
* A suggestion carries a message and an optional example snippet.
|
||||
*/
|
||||
export interface Suggestion {
|
||||
message: string;
|
||||
example?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registry of code patterns to suggestion factories.
|
||||
* Injected so tests can substitute or extend suggestions.
|
||||
*/
|
||||
export interface SuggestionRegistry {
|
||||
lookup(code: string, category: DiagnosticCategory): Suggestion[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Default suggestion knowledge base.
|
||||
*/
|
||||
export function createDefaultSuggestionRegistry(): SuggestionRegistry {
|
||||
const map = new Map<string, Suggestion[]>();
|
||||
|
||||
// Resolution errors
|
||||
map.set(IMH_SELECTOR_ZERO_MATCHES, [
|
||||
{
|
||||
message: 'The selector matched no elements. Verify the selector is correct and the element is present in the DOM.',
|
||||
example: "await expect('.buy-button').to.be.visible()",
|
||||
},
|
||||
{
|
||||
message: 'If the element is rendered conditionally, add a wait or guard.',
|
||||
},
|
||||
]);
|
||||
|
||||
map.set(IMH_FRAME_AMBIGUOUS, [
|
||||
{
|
||||
message: 'Narrow the subject selector so it matches a single element.',
|
||||
},
|
||||
{
|
||||
message: 'Use expectAll(...) if multiple subjects are intended.',
|
||||
},
|
||||
]);
|
||||
|
||||
// Validation errors
|
||||
map.set(IMH_VALID_INVALID_UNIT, [
|
||||
{
|
||||
message: 'Use a supported unit: px, rem, em, %, vh, vw, or jnd.',
|
||||
example: "{ minGap: 16, unit: 'px' }",
|
||||
},
|
||||
]);
|
||||
|
||||
map.set(IMH_VALID_ILLEGAL_RELATION_OPTION, [
|
||||
{
|
||||
message: 'Check the allowed options for this relation in the documentation.',
|
||||
},
|
||||
]);
|
||||
|
||||
// Extraction errors
|
||||
map.set(IMH_EXTRACT_PARTIAL, [
|
||||
{
|
||||
message: 'Some facts were unavailable. Check that the page is fully loaded.',
|
||||
},
|
||||
{
|
||||
message: 'If the fact is unsupported for this element type, simplify the assertion.',
|
||||
},
|
||||
]);
|
||||
|
||||
// Indeterminate results
|
||||
map.set(IMH_INDETERMINATE_MISSING_FACT, [
|
||||
{
|
||||
message: 'A required fact was missing. Check extractor output for warnings.',
|
||||
},
|
||||
]);
|
||||
|
||||
return {
|
||||
lookup(code, _category) {
|
||||
return map.get(code) ?? [];
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Attach suggestions to a diagnostic by looking up its code.
|
||||
* Returns a new diagnostic with fixHints extended.
|
||||
*/
|
||||
export function attachSuggestions(
|
||||
diagnostic: Diagnostic,
|
||||
registry: SuggestionRegistry,
|
||||
): Diagnostic {
|
||||
const suggestions = registry.lookup(diagnostic.code, diagnostic.category);
|
||||
if (suggestions.length === 0) {
|
||||
return diagnostic;
|
||||
}
|
||||
|
||||
const newHints = suggestions.map((s) => {
|
||||
return s.example ? `${s.message} Example: ${s.example}` : s.message;
|
||||
});
|
||||
|
||||
return {
|
||||
...diagnostic,
|
||||
fixHints: [...diagnostic.fixHints, ...newHints],
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Trace event model for Imhotep.
|
||||
*
|
||||
* Every evaluation produces a chain of trace events that link
|
||||
* source spans → AST → IR → execution → proof → diagnostic.
|
||||
*
|
||||
* Trace events are cheap to keep in compact form and expand on demand.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Well-known phases in the evaluation pipeline.
|
||||
*/
|
||||
export type TracePhase =
|
||||
| 'parse-started'
|
||||
| 'ast-created'
|
||||
| 'ir-normalized'
|
||||
| 'fact-requirements-computed'
|
||||
| 'extraction-started'
|
||||
| 'extraction-step-completed'
|
||||
| 'world-normalized'
|
||||
| 'clause-evaluated'
|
||||
| 'proof-created'
|
||||
| 'witness-shrunk'
|
||||
| 'diagnostic-emitted';
|
||||
|
||||
/**
|
||||
* Cross-references that tie a trace event to other entities.
|
||||
*/
|
||||
export interface TraceRefs {
|
||||
clauseId?: string;
|
||||
proofId?: string;
|
||||
snapshotId?: string;
|
||||
diagnosticId?: string;
|
||||
astNodeId?: string;
|
||||
envCaseId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single trace event.
|
||||
*/
|
||||
export interface TraceEvent {
|
||||
traceEventId: string;
|
||||
phase: TracePhase;
|
||||
at: number; // epoch ms
|
||||
refs: TraceRefs;
|
||||
payload?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A builder that accumulates trace events during evaluation.
|
||||
* Injected into each pipeline stage so stages stay pure.
|
||||
*/
|
||||
export interface TraceBuilder {
|
||||
emit(event: Omit<TraceEvent, 'traceEventId' | 'at'>): TraceEvent;
|
||||
events(): readonly TraceEvent[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory for creating a concrete TraceBuilder.
|
||||
* Uses dependency injection so callers can supply id generation and timing.
|
||||
*/
|
||||
export function createTraceBuilder(deps: {
|
||||
idGen(): string;
|
||||
now(): number;
|
||||
}): TraceBuilder {
|
||||
const buffer: TraceEvent[] = [];
|
||||
|
||||
return {
|
||||
emit(event) {
|
||||
const full: TraceEvent = {
|
||||
traceEventId: deps.idGen(),
|
||||
phase: event.phase,
|
||||
at: deps.now(),
|
||||
refs: event.refs,
|
||||
payload: event.payload,
|
||||
};
|
||||
buffer.push(full);
|
||||
return full;
|
||||
},
|
||||
events() {
|
||||
return buffer;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: find the first trace event for a given clause.
|
||||
*/
|
||||
export function findClauseTraces(
|
||||
events: readonly TraceEvent[],
|
||||
clauseId: string,
|
||||
): TraceEvent[] {
|
||||
return events.filter(
|
||||
(e) => e.refs.clauseId === clauseId,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: reconstruct the evaluation chain for a proof.
|
||||
*/
|
||||
export function traceChainForProof(
|
||||
events: readonly TraceEvent[],
|
||||
proofId: string,
|
||||
): TraceEvent[] {
|
||||
return events.filter(
|
||||
(e) =>
|
||||
e.refs.proofId === proofId ||
|
||||
(e.phase === 'clause-evaluated' &&
|
||||
events.some(
|
||||
(later) =>
|
||||
later.traceEventId === e.traceEventId &&
|
||||
later.refs.proofId === proofId,
|
||||
)),
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user