v1.1.0: pooled runtime, 959 tests, production hardening (0 squash)

2025-08-15 10:00:00 -07:00
commit 92deb689cd
321 changed files with 79170 additions and 0 deletions
@@ -0,0 +1,335 @@
+/**
+ * Canonical failure formatter.
+ *
+ * Transforms raw ImhotepResult diagnostics into narrative reports with
+ * five sections per failing clause:
+ *   - Expected: human-readable assertion intent
+ *   - Observed: measured metrics that caused failure
+ *   - Why this matters: one-sentence semantic impact
+ *   - Replay: reproduction command or seed/case info
+ *   - Next: concrete next steps to investigate/fix
+ *
+ * Supports both human-readable text and structured JSON output.
+ */
+
+import type { ImhotepResult, ClauseResult, ClauseStatus } from 'imhotep-core';
+import type { Diagnostic } from './diagnostics.js';
+
+/**
+ * A single canonical failure entry with all five required sections.
+ */
+export interface CanonicalFailureEntry {
+  /** Human-readable assertion intent (from clauseLabel). */
+  expected: string;
+  /** Measured metrics that caused the failure. */
+  observed: string;
+  /** One-sentence explanation of the semantic impact. */
+  why: string;
+  /** Reproduction command or seed/case info. */
+  replay: string;
+  /** Concrete next steps to investigate or fix. */
+  next: string;
+}
+
+/**
+ * The complete canonical report, either as structured data or rendered string.
+ */
+export interface CanonicalReport {
+  /** Overall pass/fail status. */
+  passed: boolean;
+  /** Number of failing clauses. */
+  failureCount: number;
+  /** Individual failure entries. */
+  failures: CanonicalFailureEntry[];
+}
+
+/**
+ * Options controlling canonical formatter output.
+ */
+export interface CanonicalFormatOptions {
+  /** Output format: 'text' for terminal, 'json' for machine. */
+  format?: 'text' | 'json';
+  /** Max line width for text wrapping. */
+  maxWidth?: number;
+  /** Include seed in replay section when available. */
+  seed?: number;
+}
+
+// ---------------------------------------------------------------------------
+// Failure Type Detection
+// ---------------------------------------------------------------------------
+
+function detectFailureType(clause: ClauseResult, diagnostic: Diagnostic | undefined): 'relation' | 'size' | 'cardinality' | 'parse' | 'selector' | 'unknown' {
+  const code = diagnostic?.code ?? '';
+  if (code.startsWith('IMH_RELATION_') || code === 'IMH_ALIGNMENT_FAILED' || code === 'IMH_PREDICATE_FAILED') {
+    return 'relation';
+  }
+  if (code.startsWith('IMH_SIZE_')) {
+    return 'size';
+  }
+  if (code.startsWith('IMH_CARDINALITY_')) {
+    return 'cardinality';
+  }
+  if (code.startsWith('IMH_PARSE_')) {
+    return 'parse';
+  }
+  if (code === 'IMH_SELECTOR_ZERO_MATCHES') {
+    return 'selector';
+  }
+  return 'unknown';
+}
+
+// ---------------------------------------------------------------------------
+// Section Builders
+// ---------------------------------------------------------------------------
+
+function buildExpected(clause: ClauseResult, _diagnostic: Diagnostic | undefined, _type: string): string {
+  return clause.clauseLabel ?? `Clause ${clause.clauseId}`;
+}
+
+function buildObserved(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
+  const metrics = clause.metrics ?? {};
+  const diagMetrics = diagnostic?.metrics ?? {};
+  const allMetrics = { ...diagMetrics, ...metrics };
+
+  const parts: string[] = [];
+
+  if (type === 'relation') {
+    const gap = allMetrics.observedGap ?? allMetrics.gap;
+    const minGap = allMetrics.minGap;
+    if (gap !== undefined) parts.push(`measured gap is ${gap}px`);
+    if (minGap !== undefined) parts.push(`minimum required gap is ${minGap}px`);
+  } else if (type === 'size') {
+    const observed = allMetrics.observedWidth ?? allMetrics.observedHeight ?? allMetrics.observedSize;
+    const expected = allMetrics.minWidth ?? allMetrics.minHeight ?? allMetrics.minSize ?? allMetrics.expected;
+    const prop = allMetrics.observedWidth !== undefined ? 'width' : allMetrics.observedHeight !== undefined ? 'height' : 'size';
+    if (observed !== undefined) parts.push(`${prop} is ${observed}px`);
+    if (expected !== undefined) parts.push(`expected ${prop} is ${expected}px`);
+  } else if (type === 'cardinality') {
+    const observed = allMetrics.observedCount;
+    const expected = allMetrics.expectedCount;
+    if (observed !== undefined) parts.push(`found ${observed} element(s)`);
+    if (expected !== undefined) parts.push(`expected ${expected} element(s)`);
+  } else if (type === 'parse') {
+    parts.push(diagnostic?.message ?? 'parse error occurred');
+  } else if (type === 'selector') {
+    parts.push(`selector resolved to 0 elements`);
+  } else {
+    if (Object.keys(allMetrics).length > 0) {
+      parts.push(
+        Object.entries(allMetrics)
+          .map(([k, v]) => `${k}=${v}`)
+          .join(', '),
+      );
+    }
+    if (parts.length === 0) {
+      parts.push(diagnostic?.message ?? 'failure details unavailable');
+    }
+  }
+
+  return parts.join('; ');
+}
+
+function buildWhy(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
+  const selector = diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
+
+  switch (type) {
+    case 'relation':
+      return `The spatial relationship between elements is violated, breaking layout expectations for "${selector}".`;
+    case 'size':
+      return `The element "${selector}" does not meet size constraints, which may cause overflow or clipping.`;
+    case 'cardinality':
+      return `The expected number of elements for "${selector}" was not found, indicating a missing or duplicate component.`;
+    case 'parse':
+      return `The assertion could not be parsed, so Imhotep cannot evaluate the intended contract.`;
+    case 'selector':
+      return `The selector "${selector}" matched nothing, so the assertion has no subject to evaluate.`;
+    default:
+      return `The assertion failed, indicating a contract violation or extraction problem.`;
+  }
+}
+
+function buildReplay(clause: ClauseResult, _diagnostic: Diagnostic | undefined, type: string, seed?: number): string {
+  const parts: string[] = [];
+  if (seed !== undefined) {
+    parts.push(`seed: ${seed}`);
+  }
+  parts.push(`clause: ${clause.clauseId}`);
+  if (_diagnostic?.sourceRef?.line !== undefined) {
+    parts.push(`line: ${_diagnostic.sourceRef.line}`);
+  }
+  if (_diagnostic?.sourceRef?.column !== undefined) {
+    parts.push(`column: ${_diagnostic.sourceRef.column}`);
+  }
+  if (type === 'selector' || type === 'cardinality') {
+    const selector = _diagnostic?.sourceRef?.selector ?? extractSelectorFromLabel(clause.clauseLabel);
+    parts.push(`selector: "${selector}"`);
+  }
+  return parts.join(', ');
+}
+
+function buildNext(clause: ClauseResult, diagnostic: Diagnostic | undefined, type: string): string {
+  const hints = diagnostic?.fixHints ?? [];
+  if (hints.length > 0) {
+    return hints.join(' ');
+  }
+
+  switch (type) {
+    case 'relation':
+      return `Inspect the layout in the browser devtools and adjust element positions or gap thresholds.`;
+    case 'size':
+      return `Check the element dimensions with ui.extract() and adjust the expected size or CSS.`;
+    case 'cardinality':
+      return `Verify the selector matches the intended elements; use ui.extract() to debug.`;
+    case 'parse':
+      return `Fix the assertion syntax: use single-quoted selectors and valid relation keywords.`;
+    case 'selector':
+      return `Verify the selector is correct and the element exists in the DOM at evaluation time.`;
+    default:
+      return `Review the diagnostic message and metrics to determine the root cause.`;
+  }
+}
+
+function extractSelectorFromLabel(label: string | undefined): string {
+  if (!label) return 'unknown';
+  const match = label.match(/'([^']+)'/);
+  return match?.[1] ?? 'unknown';
+}
+
+// ---------------------------------------------------------------------------
+// Report Assembly
+// ---------------------------------------------------------------------------
+
+function buildCanonicalReport(result: ImhotepResult, options?: CanonicalFormatOptions): CanonicalReport {
+  const failures: CanonicalFailureEntry[] = [];
+
+  for (const clause of result.clauseResults) {
+    if (clause.status !== 'fail' && clause.status !== 'error') {
+      continue;
+    }
+
+    // Find the primary diagnostic for this clause.
+    let diagnostic: Diagnostic | undefined;
+    const diagCode = clause.diagnostics?.[0];
+    if (diagCode) {
+      diagnostic = result.diagnostics.find(
+        (d: any) => d.code === diagCode && (d.clauseId === clause.clauseId || !d.clauseId),
+      ) as Diagnostic | undefined;
+    }
+    if (!diagnostic) {
+      diagnostic = result.diagnostics.find((d: any) => d.clauseId === clause.clauseId) as Diagnostic | undefined;
+    }
+
+    const type = detectFailureType(clause, diagnostic);
+
+    failures.push({
+      expected: buildExpected(clause, diagnostic, type),
+      observed: buildObserved(clause, diagnostic, type),
+      why: buildWhy(clause, diagnostic, type),
+      replay: buildReplay(clause, diagnostic, type, options?.seed),
+      next: buildNext(clause, diagnostic, type),
+    });
+  }
+
+  return {
+    passed: result.passed,
+    failureCount: failures.length,
+    failures,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Text Rendering (80-char wrapping)
+// ---------------------------------------------------------------------------
+
+function wrapLine(line: string, maxWidth: number): string[] {
+  if (line.length <= maxWidth) return [line];
+  const words = line.split(' ');
+  const lines: string[] = [];
+  let current = '';
+  for (const word of words) {
+    if (current.length + word.length + 1 > maxWidth) {
+      lines.push(current);
+      current = word;
+    } else {
+      current = current ? `${current} ${word}` : word;
+    }
+  }
+  if (current) lines.push(current);
+  return lines;
+}
+
+function renderEntryText(entry: CanonicalFailureEntry, maxWidth: number): string {
+  const lines: string[] = [];
+  lines.push('');
+  lines.push('─'.repeat(maxWidth));
+  lines.push('');
+
+  const sections = [
+    { label: 'Expected', value: entry.expected },
+    { label: 'Observed', value: entry.observed },
+    { label: 'Why this matters', value: entry.why },
+    { label: 'Replay', value: entry.replay },
+    { label: 'Next', value: entry.next },
+  ];
+
+  for (const section of sections) {
+    lines.push(`${section.label}:`);
+    const wrapped = wrapLine(section.value, maxWidth - 2);
+    for (const w of wrapped) {
+      lines.push(`  ${w}`);
+    }
+    lines.push('');
+  }
+
+  return lines.join('\n');
+}
+
+export function renderCanonicalText(result: ImhotepResult, options?: CanonicalFormatOptions): string {
+  const report = buildCanonicalReport(result, options);
+  const maxWidth = options?.maxWidth ?? 80;
+
+  if (report.failureCount === 0) {
+    return 'All checks passed. No failures to report.';
+  }
+
+  const lines: string[] = [];
+  lines.push('='.repeat(maxWidth));
+  lines.push('IMHOTEP CANONICAL FAILURE REPORT');
+  lines.push(`${report.failureCount} failure(s) across ${result.clauseResults.length} clause(s)`);
+  lines.push('='.repeat(maxWidth));
+
+  for (const entry of report.failures) {
+    lines.push(renderEntryText(entry, maxWidth));
+  }
+
+  return lines.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// JSON Rendering
+// ---------------------------------------------------------------------------
+
+export function renderCanonicalJson(result: ImhotepResult, options?: CanonicalFormatOptions): string {
+  const report = buildCanonicalReport(result, options);
+  return JSON.stringify(report, null, 2);
+}
+
+// ---------------------------------------------------------------------------
+// Main Entry Point
+// ---------------------------------------------------------------------------
+
+/**
+ * Format an ImhotepResult into a canonical failure report.
+ *
+ * @param result - The evaluation result from checkAll or property runs.
+ * @param options - Format options (text or json, wrapping, seed).
+ * @returns A string containing the formatted report.
+ */
+export function formatCanonical(result: ImhotepResult, options?: CanonicalFormatOptions): string {
+  const format = options?.format ?? 'text';
+  if (format === 'json') {
+    return renderCanonicalJson(result, options);
+  }
+  return renderCanonicalText(result, options);
+}
@@ -0,0 +1,423 @@
+/**
+ * Diagnostic object creation and formatting.
+ *
+ * Diagnostics carry codes, positions, related facts, and fix hints.
+ * They map proof outcomes back to author source spans.
+ *
+ * Extended for V1.1 with property-run diagnostics (sampled runs,
+ * seeds, counterexamples) to support Riley's 2 AM replay workflow.
+ */
+
+import type { TraceEvent, TraceRefs } from './traces.js';
+import {
+  IMH_RELATION_LEFT_OF_FAILED,
+  IMH_RELATION_RIGHT_OF_FAILED,
+  IMH_RELATION_ABOVE_FAILED,
+  IMH_RELATION_BELOW_FAILED,
+  IMH_RELATION_ALIGNED_FAILED,
+  IMH_RELATION_CENTERED_FAILED,
+  IMH_FACT_OBSERVED_GAP,
+  lookupCode,
+} from './codes.js';
+
+/**
+ * Severity of a diagnostic.
+ */
+export type Severity = 'error' | 'warning' | 'info';
+
+/**
+ * High-level category for routing and suggestion logic.
+ */
+export type DiagnosticCategory =
+  | 'parse-error'
+  | 'validation-error'
+  | 'resolution-error'
+  | 'extraction-error'
+  | 'contract-failure'
+  | 'indeterminate-result'
+  | 'internal-error';
+
+/**
+ * Source position inside the original authoring document.
+ */
+export interface Position {
+  start: { line: number; column: number; offset: number };
+  end: { line: number; column: number; offset: number };
+}
+
+/**
+ * A fact that is related to the diagnostic but not the primary cause.
+ */
+export interface RelatedFact {
+  code?: string;
+  message: string;
+  position?: Position;
+}
+
+/**
+ * A fix hint is an actionable string the user can follow.
+ */
+export type FixHint = string;
+
+/**
+ * Canonical source reference for a diagnostic.
+ */
+export interface SourceRef {
+  file?: string;
+  line?: number;
+  column?: number;
+  selector?: string;
+}
+
+/**
+ * Automated fix suggestion attached to a diagnostic.
+ */
+export interface SuggestedFix {
+  action: string;
+  target: string;
+  value: string;
+  rationale: string;
+}
+
+/**
+ * A rich diagnostic object.
+ */
+export interface Diagnostic {
+  diagnosticId: string;
+  code: string;
+  severity: Severity;
+  category: DiagnosticCategory;
+  message: string;
+  position?: Position;
+  source?: string;
+  clauseId?: string;
+  sceneId?: string;
+  snapshotId?: string;
+  envCaseId?: string;
+  /** Subject resolution origin for semantic selectors (Stream 7). */
+  subjectOrigin?: string;
+  related: RelatedFact[];
+  traceRef?: string;
+  fixHints: FixHint[];
+  /** V1 required: structured metrics associated with this diagnostic. */
+  metrics: Record<string, number>;
+  /** V1 required: canonical source reference. */
+  sourceRef: SourceRef;
+  /** V1 optional: automated fix suggestion. */
+  suggestedFix?: SuggestedFix;
+}
+
+// ---------------------------------------------------------------------------
+// Sampled-Run Diagnostic Types (V1.1)
+// ---------------------------------------------------------------------------
+
+/**
+ * A diagnostic produced during a sampled or enumerated property run.
+ * Captures the seed, run index, input snapshot, and scene result so
+ * Riley can replay the exact failing input with a single seed number.
+ */
+export interface PropertyDiagnostic extends Diagnostic {
+  seed?: number;
+  runIndex?: number;
+  inputSnapshot?: unknown;
+  runResult?: 'pass' | 'fail' | 'error';
+  counterexample?: unknown;
+  shrunkCounterexample?: unknown;
+}
+
+/**
+ * Blueprint for building a PropertyDiagnostic.
+ */
+export interface PropertyDiagnosticBlueprint extends DiagnosticBlueprint {
+  seed?: number;
+  runIndex?: number;
+  inputSnapshot?: unknown;
+  runResult?: 'pass' | 'fail' | 'error';
+  counterexample?: unknown;
+  shrunkCounterexample?: unknown;
+}
+
+/**
+ * Build a PropertyDiagnostic from a blueprint.
+ */
+export function makePropertyDiagnostic(
+  blueprint: PropertyDiagnosticBlueprint,
+  deps: { idGen(): string },
+): PropertyDiagnostic {
+  const base = makeDiagnostic(blueprint, deps);
+  return {
+    ...base,
+    seed: blueprint.seed,
+    runIndex: blueprint.runIndex,
+    inputSnapshot: blueprint.inputSnapshot,
+    runResult: blueprint.runResult,
+    counterexample: blueprint.counterexample,
+    shrunkCounterexample: blueprint.shrunkCounterexample,
+  };
+}
+
+/**
+ * Inputs needed to build a diagnostic from a proof or system error.
+ * All optional fields are injected by the caller.
+ */
+export interface DiagnosticBlueprint {
+  code: string;
+  severity?: Severity;
+  category: DiagnosticCategory;
+  message: string;
+  position?: Position;
+  source?: string;
+  clauseId?: string;
+  sceneId?: string;
+  snapshotId?: string;
+  envCaseId?: string;
+  related?: RelatedFact[];
+  traceRef?: string;
+  fixHints?: FixHint[];
+  metrics?: Record<string, number>;
+  sourceRef?: SourceRef;
+  suggestedFix?: SuggestedFix;
+}
+
+/**
+ * Build a Diagnostic from a blueprint, applying safe defaults.
+ */
+export function makeDiagnostic(
+  blueprint: DiagnosticBlueprint,
+  deps: { idGen(): string },
+): Diagnostic {
+  return {
+    diagnosticId: deps.idGen(),
+    code: blueprint.code,
+    severity: blueprint.severity ?? 'error',
+    category: blueprint.category,
+    message: blueprint.message,
+    position: blueprint.position,
+    source: blueprint.source,
+    clauseId: blueprint.clauseId,
+    sceneId: blueprint.sceneId,
+    snapshotId: blueprint.snapshotId,
+    envCaseId: blueprint.envCaseId,
+    related: blueprint.related ?? [],
+    traceRef: blueprint.traceRef,
+    fixHints: blueprint.fixHints ?? [],
+    metrics: blueprint.metrics ?? {},
+    sourceRef: blueprint.sourceRef ?? {},
+  };
+}
+
+/**
+ * Format a diagnostic into a single-line summary for quick scanning.
+ */
+export function formatDiagnosticCompact(d: Diagnostic): string {
+  const loc = d.position
+    ? `${d.position.start.line}:${d.position.start.column}`
+    : 'unknown';
+  return `[${d.severity.toUpperCase()} ${d.code} @ ${loc}] ${d.message}`;
+}
+
+/**
+ * Format a diagnostic into a multi-line human-readable string.
+ */
+export function formatDiagnosticFull(d: Diagnostic): string {
+  const lines: string[] = [];
+  lines.push(`${d.severity.toUpperCase()}: ${d.message}`);
+  lines.push(`  code: ${d.code}`);
+  if (d.position) {
+    lines.push(
+      `  at: line ${d.position.start.line}, column ${d.position.start.column}`,
+    );
+  }
+  if (d.clauseId) {
+    lines.push(`  clause: ${d.clauseId}`);
+  }
+  if (d.related.length > 0) {
+    lines.push('  related:');
+    for (const r of d.related) {
+      lines.push(`    - ${r.message}`);
+    }
+  }
+  if (d.fixHints.length > 0) {
+    lines.push('  fix hints:');
+    for (const h of d.fixHints) {
+      lines.push(`    - ${h}`);
+    }
+  }
+  return lines.join('\n');
+}
+
+/**
+ * Minimal proof shape expected by the reporter.
+ * The solver contract defines the full proof object.
+ */
+export interface ProofLike {
+  proofId: string;
+  clauseId: string;
+  outcome: 'pass' | 'fail';
+  truth: 'determinate' | 'indeterminate' | 'approximate';
+  failedPredicate?: { op: string; left: number; right: number };
+  witness?: {
+    subjectId?: number;
+    referenceId?: number;
+    envCaseId?: string;
+    snapshotId?: string;
+  };
+}
+
+/**
+ * Rich relation proof carrying all metadata needed for a detailed
+ * contract-failure diagnostic.  Produced by the solver / pipeline
+ * when a spatial-relation clause fails.
+ */
+export interface RelationProof extends ProofLike {
+  /** CSS selector that identified the subject element. */
+  subjectSelector: string;
+  /** CSS selector that identified the reference element. */
+  referenceSelector: string;
+  /** Relation name, e.g. "leftOf", "above". */
+  relation: string;
+  /** Expected bound values (minGap, maxGap, etc.). */
+  expected: Record<string, number>;
+  /** Observed / measured values (observedGap, etc.). */
+  observed: Record<string, number>;
+  /** Border-box rect of the subject element. */
+  subjectRect: { left: number; top: number; right: number; bottom: number };
+  /** Border-box rect of the reference element. */
+  referenceRect: { left: number; top: number; right: number; bottom: number };
+  /** Frame identifier (viewport, containing-block, etc.). */
+  frame: string;
+}
+
+const RELATION_TO_CODE: Record<string, string> = {
+  leftOf: IMH_RELATION_LEFT_OF_FAILED,
+  rightOf: IMH_RELATION_RIGHT_OF_FAILED,
+  above: IMH_RELATION_ABOVE_FAILED,
+  below: IMH_RELATION_BELOW_FAILED,
+  alignedWith: IMH_RELATION_ALIGNED_FAILED,
+  centeredWithin: IMH_RELATION_CENTERED_FAILED,
+};
+
+/**
+ * Build a rich relation-specific diagnostic from a RelationProof.
+ *
+ * Includes measured values, expected bounds, element rects, and
+ * frame context so the author knows exactly what failed and why.
+ */
+export function buildRelationDiagnostic(
+  proof: RelationProof,
+  deps: { idGen(): string },
+): Diagnostic {
+  const code = RELATION_TO_CODE[proof.relation] ?? lookupCode(IMH_RELATION_LEFT_OF_FAILED)?.code ?? 'IMH_RELATION_FAILED';
+
+  const parts: string[] = [
+    `Relation "${proof.relation}" failed for subject "${proof.subjectSelector}" vs reference "${proof.referenceSelector}".`,
+  ];
+
+  if (proof.observed.observedGap !== undefined && proof.expected.minGap !== undefined) {
+    parts.push(`Measured gap is ${proof.observed.observedGap}px, but minimum required gap is ${proof.expected.minGap}px.`);
+  }
+  if (proof.observed.observedGap !== undefined && proof.expected.maxGap !== undefined) {
+    parts.push(`Maximum allowed gap is ${proof.expected.maxGap}px.`);
+  }
+
+  const related: RelatedFact[] = [
+    {
+      code: IMH_FACT_OBSERVED_GAP,
+      message: `Subject rect: [${fmtRect(proof.subjectRect)}]`,
+    },
+    {
+      code: IMH_FACT_OBSERVED_GAP,
+      message: `Reference rect: [${fmtRect(proof.referenceRect)}]`,
+    },
+    {
+      code: 'IMH_FRAME_CONTEXT',
+      message: `Frame: ${proof.frame}`,
+    },
+  ];
+
+  if (proof.failedPredicate) {
+    related.push({
+      code: 'IMH_PREDICATE_FAILED',
+      message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
+    });
+  }
+
+  return makeDiagnostic(
+    {
+      code,
+      category: 'contract-failure',
+      message: parts.join(' '),
+      clauseId: proof.clauseId,
+      related,
+      fixHints: [
+        `Check the layout in frame "${proof.frame}".`,
+        `Subject: ${proof.subjectSelector} at [${fmtRect(proof.subjectRect)}]`,
+        `Reference: ${proof.referenceSelector} at [${fmtRect(proof.referenceRect)}]`,
+      ],
+    },
+    deps,
+  );
+}
+
+function fmtRect(r: { left: number; top: number; right: number; bottom: number }): string {
+  return `left=${r.left} top=${r.top} right=${r.right} bottom=${r.bottom}`;
+}
+
+/**
+ * Create a contract-failure diagnostic from a proof object.
+ * The proof shape is defined by the solver contract.
+ */
+export function diagnosticFromProof(
+  proof: ProofLike,
+  deps: {
+    idGen(): string;
+    codeForClause(clauseId: string): string;
+    messageForClause(clauseId: string, proof: ProofLike): string;
+    fixHintsForClause(clauseId: string, proof: ProofLike): string[];
+  },
+): Diagnostic | null {
+  if (proof.outcome !== 'fail') {
+    return null;
+  }
+
+  const category: DiagnosticCategory =
+    proof.truth === 'indeterminate'
+      ? 'indeterminate-result'
+      : 'contract-failure';
+
+  const related: RelatedFact[] = [];
+  if (proof.failedPredicate) {
+    related.push({
+      code: 'IMH_PREDICATE_FAILED',
+      message: `Predicate ${proof.failedPredicate.op} failed: ${proof.failedPredicate.left} vs ${proof.failedPredicate.right}`,
+    });
+  }
+  if (proof.witness) {
+    if (proof.witness.envCaseId) {
+      related.push({
+        code: 'IMH_WITNESS_ENV',
+        message: `Environment case: ${proof.witness.envCaseId}`,
+      });
+    }
+    if (proof.witness.snapshotId) {
+      related.push({
+        code: 'IMH_WITNESS_SNAPSHOT',
+        message: `Snapshot: ${proof.witness.snapshotId}`,
+      });
+    }
+  }
+
+  return makeDiagnostic(
+    {
+      code: deps.codeForClause(proof.clauseId),
+      category,
+      message: deps.messageForClause(proof.clauseId, proof),
+      clauseId: proof.clauseId,
+      envCaseId: proof.witness?.envCaseId,
+      snapshotId: proof.witness?.snapshotId,
+      related,
+      fixHints: deps.fixHintsForClause(proof.clauseId, proof),
+    },
+    deps,
+  );
+}
@@ -0,0 +1,260 @@
+/**
+ * Pattern-matching failure analyzer for Imhotep diagnostics.
+ *
+ * Maps failure codes + observed metrics to likely causes and concrete fixes.
+ * Fail-closed: unknown failures return undefined (no analysis attached).
+ *
+ * Inspired by Apophis cross-pollination research (analyzeFailure pipeline).
+ */
+
+/**
+ * Structured analysis attached to a diagnostic when a pattern rule matches.
+ */
+export interface FailureAnalysis {
+  /** Human-readable classification of the root cause. */
+  likelyCause: string;
+  /** Ordered list of concrete fixes the user can apply. */
+  suggestedFixes: string[];
+  /** Severity override from the rule (may differ from diagnostic severity). */
+  severity: 'error' | 'warning' | 'info';
+}
+
+/**
+ * Input context passed to each rule.  Rules inspect the failure code,
+ * the diagnostic message, and any metrics extracted from the proof.
+ */
+export interface FailureContext {
+  code: string;
+  message: string;
+  metrics: Record<string, number>;
+  category?: string;
+}
+
+/**
+ * A single pattern rule: predicate + analyzer factory.
+ */
+export interface FailureRule {
+  /** Unique rule identifier for debugging and telemetry. */
+  ruleId: string;
+  /** Returns true when this rule applies to the given context. */
+  matches(ctx: FailureContext): boolean;
+  /** Produces the analysis for a matched context. */
+  analyze(ctx: FailureContext): FailureAnalysis;
+}
+
+// ---------------------------------------------------------------------------
+// Built-in pattern rules
+// ---------------------------------------------------------------------------
+
+/** Extract a numeric metric by key, returning undefined if missing or NaN. */
+function getMetric(metrics: Record<string, number>, key: string): number | undefined {
+  const v = metrics[key];
+  if (typeof v !== 'number' || Number.isNaN(v)) return undefined;
+  return v;
+}
+
+/** Parse gap from a diagnostic message as fallback when metrics are absent. */
+function parseGapFromMessage(message: string): number | undefined {
+  const m = message.match(/gap\s+is\s+(-?[\d.]+)px/i);
+  if (!m) return undefined;
+  const v = parseFloat(m[1]);
+  return Number.isNaN(v) ? undefined : v;
+}
+
+/** Parse width from a diagnostic message as fallback when metrics are absent. */
+function parseWidthFromMessage(message: string): number | undefined {
+  const m = message.match(/width\s+is\s+(-?[\d.]+)px/i);
+  if (!m) return undefined;
+  const v = parseFloat(m[1]);
+  return Number.isNaN(v) ? undefined : v;
+}
+
+/** Parse observed count from cardinality messages. */
+function parseCountFromMessage(message: string): number | undefined {
+  const m = message.match(/resolved to\s+(\d+)\s+element/i);
+  if (!m) return undefined;
+  return parseInt(m[1], 10);
+}
+
+const RULE_LEFT_OF_OVERLAP: FailureRule = {
+  ruleId: 'leftOfOverlap',
+  matches(ctx) {
+    if (ctx.code !== 'IMH_RELATION_LEFT_OF_FAILED') return false;
+    const gap = getMetric(ctx.metrics, 'observedGap') ?? parseGapFromMessage(ctx.message);
+    return gap !== undefined && gap < 0;
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Horizontal overlap',
+      suggestedFixes: [
+        'Check that the subject element has a positive horizontal margin from the reference.',
+        'Verify neither element is using negative margins or float that causes overlap.',
+        'Consider adding clearfix or adjusting flex/grid gap settings.',
+      ],
+      severity: 'error',
+    };
+  },
+};
+
+const RULE_INSIDE_OVERFLOW: FailureRule = {
+  ruleId: 'insideOverflow',
+  matches(ctx) {
+    if (ctx.code !== 'IMH_RELATION_INSIDE_FAILED') return false;
+    const subjectW = getMetric(ctx.metrics, 'subjectWidth');
+    const subjectH = getMetric(ctx.metrics, 'subjectHeight');
+    const containerW = getMetric(ctx.metrics, 'containerWidth');
+    const containerH = getMetric(ctx.metrics, 'containerHeight');
+    if (subjectW !== undefined && containerW !== undefined && subjectW > containerW) return true;
+    if (subjectH !== undefined && containerH !== undefined && subjectH > containerH) return true;
+    // Fallback: detect overflow from message heuristics
+    if (ctx.message.toLowerCase().includes('overflow') || ctx.message.toLowerCase().includes('larger')) return true;
+    return false;
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Overflow or box-sizing issue',
+      suggestedFixes: [
+        'Check padding on the container — subject may be larger than content-box allows.',
+        'Ensure box-sizing: border-box is applied so padding does not increase total size.',
+        'Verify the subject dimensions do not exceed the container inner width/height.',
+      ],
+      severity: 'error',
+    };
+  },
+};
+
+const RULE_SIZE_TOUCH_TARGET: FailureRule = {
+  ruleId: 'sizeTouchTarget',
+  matches(ctx) {
+    if (ctx.code !== 'IMH_SIZE_AT_LEAST_FAILED') return false;
+    const width = getMetric(ctx.metrics, 'observedWidth') ?? parseWidthFromMessage(ctx.message);
+    return width !== undefined && width < 44;
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Touch target too small',
+      suggestedFixes: [
+        'Increase min-width to at least 44px to meet WCAG 2.5.5 / mobile accessibility guidelines.',
+        'Check that padding or border is not collapsing the clickable area.',
+        'Consider using a larger font size or icon scale if the element is text-based.',
+      ],
+      severity: 'warning',
+    };
+  },
+};
+
+const RULE_CARDINALITY_MISSING: FailureRule = {
+  ruleId: 'cardinalityMissing',
+  matches(ctx) {
+    if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
+    const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
+    return count === 0;
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Missing element',
+      suggestedFixes: [
+        'Verify the selector matches an element that is actually rendered in the DOM.',
+        'Check for conditional rendering that may hide the element in this state.',
+        'Use ui.extract(selector) to debug what the page currently contains.',
+      ],
+      severity: 'error',
+    };
+  },
+};
+
+const RULE_CARDINALITY_DUPLICATE: FailureRule = {
+  ruleId: 'cardinalityDuplicate',
+  matches(ctx) {
+    if (ctx.code !== 'IMH_CARDINALITY_EXACTLYONE_FAILED') return false;
+    const count = getMetric(ctx.metrics, 'observedCount') ?? parseCountFromMessage(ctx.message);
+    return count !== undefined && count > 1;
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Duplicate matches',
+      suggestedFixes: [
+        'Check for repeated data-testid or class names across sibling elements.',
+        'Scope the selector more tightly (e.g. add a parent prefix).',
+        'Use a semantic selector (getByRole, getByLabelText) for disambiguation.',
+      ],
+      severity: 'error',
+    };
+  },
+};
+
+const RULE_SELECTOR_ZERO_MATCHES: FailureRule = {
+  ruleId: 'selectorZeroMatches',
+  matches(ctx) {
+    return ctx.code === 'IMH_SELECTOR_ZERO_MATCHES';
+  },
+  analyze(_ctx) {
+    return {
+      likelyCause: 'Selector not found',
+      suggestedFixes: [
+        'Use ui.extract(selector) to verify the selector resolves to at least one element.',
+        'Check that the selector is valid CSS and the element is in the DOM.',
+        'For semantic selectors, confirm the accessible name or role is correct.',
+      ],
+      severity: 'error',
+    };
+  },
+};
+
+/** Default rule set shipped with V1.0. */
+export const DEFAULT_FAILURE_RULES: FailureRule[] = [
+  RULE_LEFT_OF_OVERLAP,
+  RULE_INSIDE_OVERFLOW,
+  RULE_SIZE_TOUCH_TARGET,
+  RULE_CARDINALITY_MISSING,
+  RULE_CARDINALITY_DUPLICATE,
+  RULE_SELECTOR_ZERO_MATCHES,
+];
+
+// ---------------------------------------------------------------------------
+// Analyzer engine
+// ---------------------------------------------------------------------------
+
+/**
+ * Analyze a failure context against a set of rules.
+ *
+ * @param ctx — failure context (code, message, metrics)
+ * @param rules — rule set to evaluate (defaults to built-in rules)
+ * @returns FailureAnalysis if a rule matches, undefined otherwise (fail-closed)
+ */
+export function analyzeFailure(
+  ctx: FailureContext,
+  rules: FailureRule[] = DEFAULT_FAILURE_RULES,
+): FailureAnalysis | undefined {
+  for (const rule of rules) {
+    if (rule.matches(ctx)) {
+      return rule.analyze(ctx);
+    }
+  }
+  return undefined;
+}
+
+/**
+ * Convenience: analyze a raw diagnostic-like object and attach the result
+ * as an `analysis` property.
+ *
+ * @param diagnostic — any object with code, message, metrics, and optional category
+ * @param rules — rule set to evaluate
+ * @returns the same object with `analysis` field added when a rule matches
+ */
+export function attachFailureAnalysis<T extends { code: string; message: string; metrics?: Record<string, number>; category?: string }>(
+  diagnostic: T,
+  rules: FailureRule[] = DEFAULT_FAILURE_RULES,
+): T & { analysis?: FailureAnalysis } {
+  const ctx: FailureContext = {
+    code: diagnostic.code,
+    message: diagnostic.message,
+    metrics: diagnostic.metrics ?? {},
+    category: diagnostic.category,
+  };
+  const analysis = analyzeFailure(ctx, rules);
+  if (analysis) {
+    return { ...diagnostic, analysis };
+  }
+  return diagnostic;
+}
@@ -0,0 +1,120 @@
+/**
+ * Human-readable reporter output.
+ *
+ * Turns diagnostics, traces, and shrink results into plain text
+ * suitable for terminal reading.
+ */
+
+import type { Diagnostic } from './diagnostics.js';
+import type { TraceEvent } from './traces.js';
+import type { ShrinkResult } from './shrink.js';
+
+/**
+ * Options for human formatting.
+ * Injected so callers control colors, verbosity, etc.
+ */
+export interface HumanFormatOptions {
+  // show trace events after each diagnostic
+  showTraces?: boolean;
+  // show shrink summary when available
+  showShrink?: boolean;
+  // max related facts to print
+  maxRelated?: number;
+}
+
+/**
+ * Render a list of diagnostics into a human-readable string.
+ */
+export function renderHumanReport(
+  diagnostics: Diagnostic[],
+  traces: readonly TraceEvent[],
+  shrinkResults: Map<string, ShrinkResult>,
+  opts: HumanFormatOptions = {},
+): string {
+  const lines: string[] = [];
+
+  for (const d of diagnostics) {
+    lines.push(renderDiagnostic(d, opts));
+
+    if (opts.showShrink && d.clauseId && shrinkResults.has(d.clauseId)) {
+      const shrink = shrinkResults.get(d.clauseId)!;
+      lines.push(renderShrink(shrink));
+    }
+
+    if (opts.showTraces && d.traceRef) {
+      const relevant = traces.filter(
+        (t) => t.traceEventId === d.traceRef || t.refs.diagnosticId === d.diagnosticId,
+      );
+      if (relevant.length > 0) {
+        lines.push('  trace:');
+        for (const t of relevant) {
+          lines.push(`    ${t.phase} at ${t.at}`);
+        }
+      }
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Render a single diagnostic in human form.
+ */
+export function renderDiagnostic(
+  d: Diagnostic,
+  opts: HumanFormatOptions = {},
+): string {
+  const lines: string[] = [];
+  const prefix = d.severity === 'error' ? '✖' : d.severity === 'warning' ? '⚠' : 'ℹ';
+  lines.push(`${prefix} ${d.message}`);
+  lines.push(`  ${d.code}`);
+
+  if (d.position) {
+    lines.push(
+      `  at line ${d.position.start.line}, column ${d.position.start.column}`,
+    );
+  }
+
+  const maxRelated = opts.maxRelated ?? 5;
+  if (d.related.length > 0) {
+    lines.push('  related:');
+    for (const r of d.related.slice(0, maxRelated)) {
+      lines.push(`    • ${r.message}`);
+    }
+    if (d.related.length > maxRelated) {
+      lines.push(`    … and ${d.related.length - maxRelated} more`);
+    }
+  }
+
+  if (d.fixHints.length > 0) {
+    lines.push('  hints:');
+    for (const h of d.fixHints) {
+      lines.push(`    → ${h}`);
+    }
+  }
+
+  if (d.suggestedFix) {
+    lines.push('  suggested fix:');
+    lines.push(`    action: ${d.suggestedFix.action}`);
+    lines.push(`    target: ${d.suggestedFix.target}`);
+    lines.push(`    value: ${d.suggestedFix.value}`);
+    lines.push(`    rationale: ${d.suggestedFix.rationale}`);
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Render a shrink result summary.
+ */
+export function renderShrink(result: ShrinkResult): string {
+  const lines: string[] = [];
+  lines.push('  shrink:');
+  if (result.reduced) {
+    lines.push(`    reduced across: ${result.axes.join(', ')}`);
+    lines.push(`    steps: ${result.steps}`);
+  } else {
+    lines.push('    no reduction possible');
+  }
+  return lines.join('\n');
+}
@@ -0,0 +1,133 @@
+/**
+ * imhotep-reporter
+ *
+ * Diagnostics, traceability, and witness shrinking for Imhotep.
+ * Makes failures explainable with rich diagnostics, trace chains,
+ * and minimal failing witnesses.
+ */
+
+// Trace event model
+export {
+  createTraceBuilder,
+  findClauseTraces,
+  traceChainForProof,
+} from './traces.js';
+export type {
+  TracePhase,
+  TraceRefs,
+  TraceEvent,
+  TraceBuilder,
+} from './traces.js';
+
+// Diagnostic objects and formatting
+export {
+  makeDiagnostic,
+  formatDiagnosticCompact,
+  formatDiagnosticFull,
+  diagnosticFromProof,
+} from './diagnostics.js';
+export type {
+  Severity,
+  DiagnosticCategory,
+  Position,
+  RelatedFact,
+  FixHint,
+  Diagnostic,
+  DiagnosticBlueprint,
+  ProofLike,
+  PropertyDiagnostic,
+  PropertyDiagnosticBlueprint,
+  SourceRef,
+  SuggestedFix,
+} from './diagnostics.js';
+export {
+  makePropertyDiagnostic,
+  buildRelationDiagnostic,
+} from './diagnostics.js';
+export type {
+  RelationProof,
+} from './diagnostics.js';
+
+// Diagnostic code registry
+export {
+  lookupCode,
+  listCodes,
+  formatMessage,
+} from './codes.js';
+export type {
+  CodeEntry,
+  Severity as CodeSeverity,
+  Category as CodeCategory,
+} from './codes.js';
+
+// Witness shrinking
+export { shrinkWitness } from './shrink.js';
+export type {
+  Witness,
+  ShrinkResult,
+  ShrinkAxis,
+  StillFails,
+} from './shrink.js';
+
+// Property-run diagnostics and replay (V1.1)
+export {
+  buildPropertyDiagnostics,
+  buildEnumeratedPropertyDiagnostics,
+  buildShrunkPropertyDiagnostics,
+} from './property-diagnostics.js';
+
+export {
+  buildReplayMetadata,
+  buildReplayScript,
+  buildReplayFromResult,
+  replay,
+} from './replay.js';
+export type {
+  ReplayMetadata,
+  ReplayScript,
+  RunResult,
+  ReplayExecutor,
+} from './replay.js';
+
+// Human-readable reporter
+export { renderHumanReport, renderDiagnostic, renderShrink } from './human.js';
+export type { HumanFormatOptions } from './human.js';
+
+// JSON reporter
+export { renderJsonReport, buildJsonReport } from './json.js';
+export type {
+  JsonFormatOptions,
+  JsonReport,
+  JsonDiagnostic,
+} from './json.js';
+
+// Suggestion engine
+export {
+  createDefaultSuggestionRegistry,
+  attachSuggestions,
+} from './suggestions.js';
+export type { Suggestion, SuggestionRegistry } from './suggestions.js';
+
+// Canonical failure formatter (Workstream J)
+export {
+  formatCanonical,
+  renderCanonicalText,
+  renderCanonicalJson,
+} from './canonical-formatter.js';
+export type {
+  CanonicalFailureEntry,
+  CanonicalReport,
+  CanonicalFormatOptions,
+} from './canonical-formatter.js';
+
+// Failure analyzer (Workstream K)
+export {
+  analyzeFailure,
+  attachFailureAnalysis,
+  DEFAULT_FAILURE_RULES,
+} from './failure-analyzer.js';
+export type {
+  FailureAnalysis,
+  FailureContext,
+  FailureRule,
+} from './failure-analyzer.js';
@@ -0,0 +1,132 @@
+/**
+ * JSON reporter output.
+ *
+ * Produces a structured JSON representation of the full evaluation
+ * result for CI systems, editors, and other tooling.
+ */
+
+import type { Diagnostic, SourceRef, SuggestedFix } from './diagnostics.js';
+import type { TraceEvent } from './traces.js';
+import type { ShrinkResult } from './shrink.js';
+
+/**
+ * Options for JSON formatting.
+ */
+export interface JsonFormatOptions {
+  // indent size; null means no pretty-printing
+  indent?: number | null;
+  // include trace events in output
+  includeTraces?: boolean;
+  // include shrink details
+  includeShrink?: boolean;
+}
+
+/**
+ * The top-level JSON report shape.
+ */
+export interface JsonReport {
+  version: 1;
+  summary: {
+    totalDiagnostics: number;
+    errorCount: number;
+    warningCount: number;
+    infoCount: number;
+  };
+  diagnostics: JsonDiagnostic[];
+  traces?: TraceEvent[];
+  shrinkResults?: Record<string, ShrinkResult>;
+}
+
+/**
+ * A diagnostic serialized for JSON.
+ */
+export interface JsonDiagnostic {
+  diagnosticId: string;
+  code: string;
+  severity: string;
+  category: string;
+  message: string;
+  position?: Diagnostic['position'];
+  source?: string;
+  clauseId?: string;
+  sceneId?: string;
+  snapshotId?: string;
+  envCaseId?: string;
+  related: { code?: string; message: string; position?: Diagnostic['position'] }[];
+  traceRef?: string;
+  fixHints: string[];
+  metrics: Record<string, number>;
+  sourceRef: SourceRef;
+  suggestedFix?: SuggestedFix;
+}
+
+/**
+ * Render the complete report as a JSON string.
+ */
+export function renderJsonReport(
+  diagnostics: Diagnostic[],
+  traces: readonly TraceEvent[],
+  shrinkResults: Map<string, ShrinkResult>,
+  opts: JsonFormatOptions = {},
+): string {
+  const report = buildJsonReport(diagnostics, traces, shrinkResults, opts);
+  const space = opts.indent === null ? undefined : opts.indent ?? 2;
+  return JSON.stringify(report, null, space);
+}
+
+/**
+ * Build the report object without stringifying.
+ */
+export function buildJsonReport(
+  diagnostics: Diagnostic[],
+  traces: readonly TraceEvent[],
+  shrinkResults: Map<string, ShrinkResult>,
+  opts: JsonFormatOptions = {},
+): JsonReport {
+  const summary = {
+    totalDiagnostics: diagnostics.length,
+    errorCount: diagnostics.filter((d) => d.severity === 'error').length,
+    warningCount: diagnostics.filter((d) => d.severity === 'warning').length,
+    infoCount: diagnostics.filter((d) => d.severity === 'info').length,
+  };
+
+  const jsonDiagnostics = diagnostics.map((d) => diagnosticToJson(d));
+
+  const report: JsonReport = {
+    version: 1,
+    summary,
+    diagnostics: jsonDiagnostics,
+  };
+
+  if (opts.includeTraces) {
+    report.traces = traces.slice();
+  }
+
+  if (opts.includeShrink && shrinkResults.size > 0) {
+    report.shrinkResults = Object.fromEntries(shrinkResults);
+  }
+
+  return report;
+}
+
+function diagnosticToJson(d: Diagnostic): JsonDiagnostic {
+  return {
+    diagnosticId: d.diagnosticId,
+    code: d.code,
+    severity: d.severity,
+    category: d.category,
+    message: d.message,
+    position: d.position,
+    source: d.source,
+    clauseId: d.clauseId,
+    sceneId: d.sceneId,
+    snapshotId: d.snapshotId,
+    envCaseId: d.envCaseId,
+    related: d.related,
+    traceRef: d.traceRef,
+    fixHints: d.fixHints,
+    metrics: d.metrics,
+    sourceRef: d.sourceRef,
+    suggestedFix: d.suggestedFix,
+  };
+}
@@ -0,0 +1,247 @@
+/**
+ * Tests for property-run diagnostics and render-input shrinking.
+ *
+ * Verifies that sampled and enumerated results produce rich diagnostics
+ * with replay metadata, and that shrinking works across render-input axes.
+ */
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert';
+
+import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results';
+import {
+  buildPropertyDiagnostics,
+  buildEnumeratedPropertyDiagnostics,
+  buildShrunkPropertyDiagnostics,
+} from './property-diagnostics.js';
+import { shrinkWitness, type Witness } from './shrink.js';
+
+let _id = 0;
+function fakeId() {
+  return `id_${++_id}`;
+}
+
+// ---------------------------------------------------------------------------
+// Property Diagnostics Tests
+// ---------------------------------------------------------------------------
+
+describe('property diagnostics', () => {
+  it('builds pass diagnostic for sampled run', () => {
+    const result: SampledRunResult = {
+      mode: 'sampled',
+      seed: 42,
+      numRuns: 100,
+      passed: true,
+      diagnostics: [],
+    };
+
+    const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
+    assert.strictEqual(diagnostics.length, 1);
+    assert.strictEqual(diagnostics[0].code, 'IMH_PROPERTY_PASSED');
+    assert.strictEqual(diagnostics[0].seed, 42);
+    assert.strictEqual(diagnostics[0].runResult, 'pass');
+  });
+
+  it('builds failure diagnostic with counterexample', () => {
+    const result: SampledRunResult = {
+      mode: 'sampled',
+      seed: 123,
+      numRuns: 50,
+      passed: false,
+      counterexample: { size: 'sm', disabled: true },
+      shrunkCounterexample: { size: 'sm', disabled: true },
+      diagnostics: [],
+    };
+
+    const diagnostics = buildPropertyDiagnostics(result, { idGen: fakeId });
+    assert.strictEqual(diagnostics.length, 2); // primary + replay
+
+    const primary = diagnostics.find((d) => d.code === 'IMH_PROPERTY_FAILED');
+    assert.ok(primary);
+    assert.strictEqual(primary!.seed, 123);
+    assert.strictEqual(primary!.runResult, 'fail');
+    assert.deepStrictEqual(primary!.counterexample, { size: 'sm', disabled: true });
+    assert.ok(primary!.message.includes('seed 123'));
+  });
+
+  it('builds enumerated pass diagnostic', () => {
+    const result: EnumeratedRunResult = {
+      mode: 'enumerated-determinate',
+      totalCases: 6,
+      passed: true,
+      diagnostics: [],
+    };
+
+    const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
+    assert.strictEqual(diagnostics.length, 1);
+    assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_PASSED');
+    assert.strictEqual(diagnostics[0].runResult, 'pass');
+  });
+
+  it('builds enumerated failure diagnostic with failing case', () => {
+    const result: EnumeratedRunResult = {
+      mode: 'enumerated-determinate',
+      totalCases: 6,
+      passed: false,
+      failingCase: { size: 'lg' },
+      diagnostics: [],
+    };
+
+    const diagnostics = buildEnumeratedPropertyDiagnostics(result, { idGen: fakeId });
+    assert.strictEqual(diagnostics.length, 1);
+    assert.strictEqual(diagnostics[0].code, 'IMH_ENUMERATED_FAILED');
+    assert.strictEqual(diagnostics[0].runResult, 'fail');
+    assert.deepStrictEqual(diagnostics[0].inputSnapshot, { size: 'lg' });
+    assert.ok(diagnostics[0].message.includes('lg'));
+  });
+
+  it('builds shrunk diagnostic with shrunk input', () => {
+    const result: SampledRunResult = {
+      mode: 'sampled',
+      seed: 7,
+      numRuns: 100,
+      passed: false,
+      counterexample: { size: 'sm', disabled: true, label: 'hello world' },
+      diagnostics: [],
+    };
+
+    const shrunkInput = { size: 'sm', disabled: true };
+    const diagnostics = buildShrunkPropertyDiagnostics(result, shrunkInput, { idGen: fakeId });
+
+    const shrunkDiag = diagnostics.find((d) => d.code === 'IMH_PROPERTY_SHRUNK');
+    assert.ok(shrunkDiag);
+    assert.deepStrictEqual(shrunkDiag!.shrunkCounterexample, shrunkInput);
+    assert.ok(shrunkDiag!.message.includes('sm'));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Render-Input Shrink Tests
+// ---------------------------------------------------------------------------
+
+describe('shrink across render inputs', () => {
+  it('shrinks render props when redundant', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      renderProps: { size: 'sm', disabled: true, variant: 'primary' },
+    };
+
+    // Only 'disabled' matters for failure
+    const stillFails = (w: Witness) =>
+      w.renderProps?.disabled === true;
+
+    const result = shrinkWitness(witness, stillFails);
+    assert.strictEqual(result.reduced, true);
+    assert.ok(result.axes.includes('prop'));
+    assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
+  });
+
+  it('shrinks render args independently', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      renderArgs: { label: 'ok', icon: 'check', tone: 'positive' },
+    };
+
+    const stillFails = (w: Witness) =>
+      w.renderArgs?.icon === 'check';
+
+    const result = shrinkWitness(witness, stillFails);
+    assert.strictEqual(result.reduced, true);
+    assert.ok(result.axes.includes('arg'));
+    assert.deepStrictEqual(result.witness.renderArgs, { icon: 'check' });
+  });
+
+  it('shrinks query params', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      queryParams: { theme: 'dark', debug: '1', version: '2' },
+    };
+
+    const stillFails = (w: Witness) =>
+      w.queryParams?.theme === 'dark';
+
+    const result = shrinkWitness(witness, stillFails);
+    assert.strictEqual(result.reduced, true);
+    assert.ok(result.axes.includes('query-param'));
+    assert.deepStrictEqual(result.witness.queryParams, { theme: 'dark' });
+  });
+
+  it('shrinks fixture inputs', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      fixtureInputs: { count: 5, layout: 'grid', animated: true },
+    };
+
+    const stillFails = (w: Witness) =>
+      w.fixtureInputs?.layout === 'grid';
+
+    const result = shrinkWitness(witness, stillFails);
+    assert.strictEqual(result.reduced, true);
+    assert.ok(result.axes.includes('fixture-input'));
+    assert.deepStrictEqual(result.witness.fixtureInputs, { layout: 'grid' });
+  });
+
+  it('does not shrink when all render props are required', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      renderProps: { a: 1, b: 2 },
+    };
+
+    // All axes are required: removing anything causes failure to disappear
+    const stillFails = (w: Witness) =>
+      w.envCases.length === 1 &&
+      w.snapshots.length === 1 &&
+      w.subjects.length === 1 &&
+      w.facts.length === 1 &&
+      Object.keys(w.renderProps ?? {}).length === 2;
+
+    const result = shrinkWitness(witness, stillFails);
+    assert.strictEqual(result.reduced, false);
+    assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
+  });
+});
@@ -0,0 +1,151 @@
+/**
+ * Property-run diagnostics builder.
+ *
+ * Converts sampled and enumerated run results into rich diagnostics
+ * with full replay metadata: seed, run index, input snapshot.
+ *
+ * These diagnostics are what Riley sees in the CI failure report.
+ * Every seed, every shrink step, every counterexample is inspectable.
+ */
+
+import type { SampledRunResult, EnumeratedRunResult } from 'imhotep-core/property-results'
+import {
+  makePropertyDiagnostic,
+  type PropertyDiagnostic,
+  type PropertyDiagnosticBlueprint,
+} from './diagnostics.js';
+
+// ---------------------------------------------------------------------------
+// Property Diagnostics Builder
+// ---------------------------------------------------------------------------
+
+export interface PropertyDiagnosticsOptions {
+  idGen(): string;
+  /** Optional formatter for input snapshots (e.g., JSON.stringify). */
+  formatInput?(input: unknown): string;
+}
+
+/**
+ * Build a full array of PropertyDiagnostics from a SampledRunResult.
+ *
+ * On failure, emits one primary diagnostic with the counterexample and
+ * one per-run diagnostic for each failing run so the trace is complete.
+ */
+export function buildPropertyDiagnostics(
+  result: SampledRunResult,
+  options: PropertyDiagnosticsOptions,
+): PropertyDiagnostic[] {
+  const diagnostics: PropertyDiagnostic[] = [];
+
+  if (result.passed) {
+    diagnostics.push(makePropertyDiagnostic({
+      code: 'IMH_PROPERTY_PASSED',
+      category: 'contract-failure',
+      message: `Property passed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
+      seed: result.seed,
+      runResult: 'pass',
+    }, options));
+    return diagnostics;
+  }
+
+  // Primary failure diagnostic
+  const primary: PropertyDiagnosticBlueprint = {
+    code: 'IMH_PROPERTY_FAILED',
+    category: 'contract-failure',
+    message: `Property failed after ${result.numRuns} sampled runs (seed ${result.seed}).`,
+    seed: result.seed,
+    runResult: 'fail',
+    counterexample: result.counterexample,
+    shrunkCounterexample: result.shrunkCounterexample,
+  };
+
+  if (result.counterexample !== undefined) {
+    const inputStr = options.formatInput
+      ? options.formatInput(result.counterexample)
+      : JSON.stringify(result.counterexample);
+    primary.message += ` Counterexample: ${inputStr}`;
+    primary.inputSnapshot = result.counterexample;
+  }
+
+  diagnostics.push(makePropertyDiagnostic(primary, options));
+
+  // Replay metadata diagnostic (always emitted on failure)
+  diagnostics.push(makePropertyDiagnostic({
+    code: 'IMH_PROPERTY_REPLAY',
+    category: 'internal-error',
+    message: `Replay: seed=${result.seed}, numRuns=${result.numRuns}`,
+    seed: result.seed,
+    runResult: 'fail',
+  }, options));
+
+  return diagnostics;
+}
+
+/**
+ * Build diagnostics from an EnumeratedRunResult.
+ *
+ * On failure, reports the exact failing case index and value.
+ */
+export function buildEnumeratedPropertyDiagnostics(
+  result: EnumeratedRunResult,
+  options: PropertyDiagnosticsOptions,
+): PropertyDiagnostic[] {
+  const diagnostics: PropertyDiagnostic[] = [];
+
+  if (result.passed) {
+    diagnostics.push(makePropertyDiagnostic({
+      code: 'IMH_ENUMERATED_PASSED',
+      category: 'contract-failure',
+      message: `Enumerated property passed all ${result.totalCases} cases.`,
+      runResult: 'pass',
+    }, options));
+    return diagnostics;
+  }
+
+  const failingCaseStr = result.failingCase !== undefined
+    ? (options.formatInput ? options.formatInput(result.failingCase) : JSON.stringify(result.failingCase))
+    : 'unknown';
+
+  diagnostics.push(makePropertyDiagnostic({
+    code: 'IMH_ENUMERATED_FAILED',
+    category: 'contract-failure',
+    message: `Enumerated property failed at case ${failingCaseStr} out of ${result.totalCases}.`,
+    runResult: 'fail',
+    inputSnapshot: result.failingCase,
+  }, options));
+
+  return diagnostics;
+}
+
+// ---------------------------------------------------------------------------
+// Shrink-Aware Diagnostics
+// ---------------------------------------------------------------------------
+
+/**
+ * Build diagnostics that include shrink results.
+ *
+ * This is the diagnostic layer Riley uses: it tells her not just that
+ * the property failed, but what the smallest still-failing input is.
+ */
+export function buildShrunkPropertyDiagnostics(
+  result: SampledRunResult,
+  shrunkInput: unknown,
+  options: PropertyDiagnosticsOptions,
+): PropertyDiagnostic[] {
+  const base = buildPropertyDiagnostics(result, options);
+
+  const shrunkStr = options.formatInput
+    ? options.formatInput(shrunkInput)
+    : JSON.stringify(shrunkInput);
+
+  const shrinkDiagnostic = makePropertyDiagnostic({
+    code: 'IMH_PROPERTY_SHRUNK',
+    category: 'contract-failure',
+    message: `Shrunk counterexample: ${shrunkStr}`,
+    seed: result.seed,
+    runResult: 'fail',
+    shrunkCounterexample: shrunkInput,
+  }, options);
+
+  return [...base, shrinkDiagnostic];
+}
@@ -0,0 +1,148 @@
+/**
+ * Tests for executable replay.
+ *
+ * Verifies that replay metadata can be fed back into a replay executor
+ * and reproduce the same failure deterministically.
+ */
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert';
+import {
+  replay,
+  buildReplayMetadata,
+  type ReplayMetadata,
+  type ReplayExecutor,
+  type RunResult,
+} from './replay.js';
+import { makeDiagnostic, type Diagnostic, type ProofLike } from './diagnostics.js';
+
+let _id = 0;
+function fakeId() {
+  return `id_${++_id}`;
+}
+
+describe('replay', () => {
+  it('replays a failing run and produces the same failure', async () => {
+    const metadata: ReplayMetadata = {
+      version: 1,
+      seed: 42,
+      numRuns: 100,
+      sceneTarget: { kind: 'fixture', fixtureId: 'test.html' },
+      inputDomain: { mode: 'enumerated', values: [] },
+      counterexample: { size: 'sm' },
+      timestamp: new Date().toISOString(),
+    };
+
+    const mockProof: ProofLike = {
+      proofId: 'p1',
+      clauseId: 'c1',
+      outcome: 'fail',
+      truth: 'determinate',
+      failedPredicate: { op: '>=', left: 10, right: 24 },
+      witness: { subjectId: 0, referenceId: 1 },
+    };
+
+    const mockResult = { status: 'fail', clauseId: 'c1' };
+
+    const executor: ReplayExecutor = {
+      buildWorld: async () => ({ sceneId: 'scene' }),
+      buildClauses: () => [{ clauseId: 'c1' }],
+      evaluate: () => ({
+        results: [mockResult],
+        proofs: [mockProof],
+      }),
+      isFailure: (r) => (r as any).status === 'fail',
+      toDiagnostic: (_r, proof) =>
+        makeDiagnostic(
+          {
+            code: 'IMH_REPLAY_FAIL',
+            category: 'contract-failure',
+            message: `Replayed failure for clause ${proof.clauseId}`,
+          },
+          { idGen: fakeId },
+        ),
+    };
+
+    const result: RunResult = await replay(metadata, executor);
+
+    assert.strictEqual(result.passed, false);
+    assert.strictEqual(result.seed, 42);
+    assert.strictEqual(result.diagnostics.length, 1);
+    assert.strictEqual(result.proofs.length, 1);
+    assert.strictEqual(result.proofs[0].outcome, 'fail');
+    assert.ok(
+      result.diagnostics[0].message.includes('Replayed failure'),
+      'diagnostic should mention replayed failure',
+    );
+  });
+
+  it('replays a passing run and reports pass', async () => {
+    const metadata: ReplayMetadata = {
+      version: 1,
+      seed: 7,
+      numRuns: 50,
+      sceneTarget: { kind: 'fixture', fixtureId: 'pass.html' },
+      inputDomain: { mode: 'enumerated', values: [] },
+      timestamp: new Date().toISOString(),
+    };
+
+    const mockProof: ProofLike = {
+      proofId: 'p2',
+      clauseId: 'c2',
+      outcome: 'pass',
+      truth: 'determinate',
+    };
+
+    const executor: ReplayExecutor = {
+      buildWorld: async () => ({ sceneId: 'scene' }),
+      buildClauses: () => [{ clauseId: 'c2' }],
+      evaluate: () => ({
+        results: [{ status: 'pass', clauseId: 'c2' }],
+        proofs: [mockProof],
+      }),
+      isFailure: (r) => (r as any).status === 'fail',
+      toDiagnostic: (_r, proof) =>
+        makeDiagnostic(
+          {
+            code: 'IMH_REPLAY_FAIL',
+            category: 'contract-failure',
+            message: `Replayed failure for clause ${proof.clauseId}`,
+          },
+          { idGen: fakeId },
+        ),
+    };
+
+    const result = await replay(metadata, executor);
+
+    assert.strictEqual(result.passed, true);
+    assert.strictEqual(result.diagnostics.length, 0);
+    assert.strictEqual(result.proofs[0].outcome, 'pass');
+  });
+
+  it('uses the same seed for deterministic reconstruction', async () => {
+    const metadata: ReplayMetadata = {
+      version: 1,
+      seed: 99,
+      numRuns: 10,
+      sceneTarget: { kind: 'fixture', fixtureId: 'seed.html' },
+      inputDomain: { mode: 'generated', arbitrary: null, seed: 99, numRuns: 10 },
+      counterexample: { label: 'x' },
+      timestamp: new Date().toISOString(),
+    };
+
+    const executor: ReplayExecutor = {
+      buildWorld: async () => ({}),
+      buildClauses: () => [],
+      evaluate: () => ({ results: [], proofs: [] }),
+      isFailure: () => false,
+      toDiagnostic: () =>
+        makeDiagnostic(
+          { code: 'IMH_TEST', category: 'internal-error', message: 'test' },
+          { idGen: fakeId },
+        ),
+    };
+
+    const result = await replay(metadata, executor);
+    assert.strictEqual(result.seed, 99);
+  });
+});
@@ -0,0 +1,218 @@
+/**
+ * Replay metadata builder for sampled property runs.
+ *
+ * Given a SampledRunResult, produces a replay script or metadata object
+ * that can reproduce the exact run. Riley needs this for 2 AM debugging:
+ * she passes the seed to the runner and watches the identical failure.
+ *
+ * The replay payload includes everything needed to reconstruct the run:
+ * seed, numRuns, renderer, component, input domain.
+ */
+
+import type { SceneTarget, RenderCase } from 'imhotep-core/scene-target'
+import type { InputDomain } from 'imhotep-core/property-contracts'
+import type { SampledRunResult } from 'imhotep-core/property-results'
+import { setDefaultContext, createDeterministicContext } from 'imhotep-core'
+
+// ---------------------------------------------------------------------------
+// Replay Metadata
+// ---------------------------------------------------------------------------
+
+export interface ReplayMetadata {
+  version: 1;
+  seed: number;
+  numRuns: number;
+  sceneTarget: SceneTarget;
+  inputDomain: InputDomain;
+  /** The counterexample input that failed, if any. */
+  counterexample?: unknown;
+  /** The shrunk counterexample, if shrinking was performed. */
+  shrunkCounterexample?: unknown;
+  /** Optional renderer adapter identifier for reconstruction. */
+  rendererAdapterId?: string;
+  /** ISO timestamp of when the original run occurred. */
+  timestamp: string;
+}
+
+/**
+ * Build replay metadata from a sampled run result.
+ *
+ * This is the deterministic receipt Riley uses: seed 42, run 7,
+ * counterexample { size: 'sm', disabled: true }.
+ */
+export function buildReplayMetadata(
+  result: SampledRunResult,
+  sceneTarget: SceneTarget,
+  inputDomain: InputDomain,
+  options?: {
+    rendererAdapterId?: string;
+    timestamp?: string;
+  },
+): ReplayMetadata {
+  return {
+    version: 1,
+    seed: result.seed,
+    numRuns: result.numRuns,
+    sceneTarget,
+    inputDomain,
+    counterexample: result.counterexample,
+    shrunkCounterexample: result.shrunkCounterexample,
+    rendererAdapterId: options?.rendererAdapterId,
+    timestamp: options?.timestamp ?? new Date().toISOString(),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Replay Script
+// ---------------------------------------------------------------------------
+
+export interface ReplayScript {
+  /** Human-readable description of how to replay. */
+  description: string;
+  /** Programmatic replay command (e.g., a shell command or JS snippet). */
+  command: string;
+  /** The metadata object for machine consumption. */
+  metadata: ReplayMetadata;
+}
+
+/**
+ * Build a replay script from metadata.
+ *
+ * Returns both a human-readable description and a programmatic command.
+ */
+export function buildReplayScript(
+  metadata: ReplayMetadata,
+  options?: {
+    runnerCommand?: string;
+    formatInput?(input: unknown): string;
+  },
+): ReplayScript {
+  const inputStr = options?.formatInput
+    ? options.formatInput(metadata.counterexample)
+    : metadata.counterexample !== undefined
+      ? JSON.stringify(metadata.counterexample)
+      : 'unknown';
+
+  const description = [
+    `Replay sampled property run:`,
+    `  seed: ${metadata.seed}`,
+    `  numRuns: ${metadata.numRuns}`,
+    `  counterexample: ${inputStr}`,
+    `  timestamp: ${metadata.timestamp}`,
+  ].join('\n');
+
+  const command = `${options?.runnerCommand ?? 'imhotep replay'} --seed ${metadata.seed} --num-runs ${metadata.numRuns}`;
+
+  return {
+    description,
+    command,
+    metadata,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Replay from Result (Convenience)
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a full replay script directly from a SampledRunResult.
+ */
+export function buildReplayFromResult(
+  result: SampledRunResult,
+  sceneTarget: SceneTarget,
+  inputDomain: InputDomain,
+  options?: {
+    rendererAdapterId?: string;
+    runnerCommand?: string;
+    formatInput?(input: unknown): string;
+  },
+): ReplayScript {
+  const metadata = buildReplayMetadata(result, sceneTarget, inputDomain, {
+    rendererAdapterId: options?.rendererAdapterId,
+  });
+  return buildReplayScript(metadata, options);
+}
+
+// ---------------------------------------------------------------------------
+// Executable Replay
+// ---------------------------------------------------------------------------
+
+import type { Diagnostic } from './diagnostics.js';
+import type { ProofLike } from './diagnostics.js';
+
+/**
+ * Result of executing a replay.
+ */
+export interface RunResult {
+  /** Whether the replayed run passed (no failures). */
+  passed: boolean;
+  /** Diagnostics emitted during the replay. */
+  diagnostics: Diagnostic[];
+  /** Proofs generated during the replay. */
+  proofs: ProofLike[];
+  /** Seed used for the replay. */
+  seed: number;
+}
+
+/**
+ * Executor interface injected by the caller.
+ *
+ * The reporter does not know how to mount scenes or evaluate clauses;
+ * it delegates those operations to the injected executor.  This keeps
+ * the reporter backend-agnostic and testable.
+ */
+export interface ReplayExecutor {
+  /** Mount the scene described by metadata and extract a geometry world. */
+  buildWorld(metadata: ReplayMetadata): Promise<unknown>;
+  /** Build clause descriptors from the metadata. */
+  buildClauses(metadata: ReplayMetadata): unknown[];
+  /** Evaluate clauses against the world. */
+  evaluate(world: unknown, clauses: unknown[]): { results: unknown[]; proofs: ProofLike[] };
+  /** Check whether an evaluation result represents a failure. */
+  isFailure(result: unknown): boolean;
+  /** Convert a failing result + proof into a diagnostic. */
+  toDiagnostic(result: unknown, proof: ProofLike): Diagnostic;
+}
+
+/**
+ * Replay a failing property run from its metadata.
+ *
+ * Reconstructs the scene, rebuilds clauses, and re-evaluates using the
+ * same seed so Riley gets deterministic reproduction.
+ *
+ * @param metadata - The replay metadata captured from the original run.
+ * @param executor - Injected executor that knows how to mount and evaluate.
+ */
+export async function replay(
+  metadata: ReplayMetadata,
+  executor: ReplayExecutor,
+): Promise<RunResult> {
+  // Ensure deterministic context during replay so ids, clocks, and rng
+  // match the original run exactly.
+  setDefaultContext(createDeterministicContext(metadata.seed));
+
+  const world = await executor.buildWorld(metadata);
+  const clauses = executor.buildClauses(metadata);
+  const evaluation = executor.evaluate(world, clauses);
+
+  const diagnostics: Diagnostic[] = [];
+  let hasFailure = false;
+
+  for (let i = 0; i < evaluation.results.length; i++) {
+    const result = evaluation.results[i];
+    const proof = evaluation.proofs[i];
+    if (executor.isFailure(result)) {
+      hasFailure = true;
+      if (proof) {
+        diagnostics.push(executor.toDiagnostic(result, proof));
+      }
+    }
+  }
+
+  return {
+    passed: !hasFailure,
+    diagnostics,
+    proofs: evaluation.proofs,
+    seed: metadata.seed,
+  };
+}
@@ -0,0 +1,177 @@
+/**
+ * Tests for oracle-preserving witness shrinking.
+ *
+ * Verifies that shrinkWitness only accepts a reduced witness if the
+ * injected oracle still reports failure.  If the oracle returns false
+ * (meaning the shrunk witness no longer reproduces the bug), the
+ * shrink must be rejected and the original witness returned unchanged.
+ */
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert';
+import { shrinkWitness, type Witness, type ShrinkResult } from './shrink.js';
+
+describe('shrink oracle preservation', () => {
+  it('accepts a shrunk witness when the oracle still fails', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1', 'env_2', 'env_3'],
+      snapshots: ['default'],
+      subjects: [1, 2, 3],
+      facts: [100, 101, 102],
+    };
+
+    // The failure reproduces as long as env_2 and subject 2 are present.
+    const oracle = (w: Witness) =>
+      w.envCases.includes('env_2') && w.subjects.includes(2);
+
+    const result: ShrinkResult = shrinkWitness(witness, oracle);
+
+    assert.strictEqual(result.reduced, true);
+    assert.deepStrictEqual(result.witness.envCases, ['env_2']);
+    assert.deepStrictEqual(result.witness.subjects, [2]);
+    assert.ok(result.axes.includes('env-case'));
+    assert.ok(result.axes.includes('subject'));
+
+    // Oracle must still fail on the final witness
+    assert.strictEqual(
+      oracle(result.witness),
+      true,
+      'final shrunk witness must still fail the oracle',
+    );
+  });
+
+  it('rejects the shrink when the oracle no longer fails', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1', 'env_2'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+    };
+
+    // The failure ONLY reproduces with the exact original witness.
+    // Removing any axis item makes the failure disappear.
+    const oracle = (w: Witness) =>
+      w.envCases.length === 2 &&
+      w.envCases.includes('env_1') &&
+      w.snapshots.length === 1 &&
+      w.subjects.length === 1 &&
+      w.facts.length === 1;
+
+    const result: ShrinkResult = shrinkWitness(witness, oracle);
+
+    // Because every item is required, nothing can be removed.
+    // The validation step ensures the final witness still fails,
+    // and since the shrinker cannot remove anything without breaking
+    // the oracle, reduced must be false.
+    assert.strictEqual(result.reduced, false);
+    assert.deepStrictEqual(result.witness.envCases, ['env_1', 'env_2']);
+    assert.deepStrictEqual(result.witness.snapshots, ['default']);
+    assert.deepStrictEqual(result.witness.subjects, [1]);
+    assert.deepStrictEqual(result.witness.facts, [100]);
+    assert.deepStrictEqual(result.axes, []);
+    assert.strictEqual(result.steps, 0);
+  });
+
+  it('preserves failure across render-prop shrinking', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      renderProps: { size: 'lg', disabled: true, variant: 'primary' },
+    };
+
+    // Only the 'disabled' prop matters for the failure.
+    const oracle = (w: Witness) => w.renderProps?.disabled === true;
+
+    const result: ShrinkResult = shrinkWitness(witness, oracle);
+
+    assert.strictEqual(result.reduced, true);
+    assert.ok(result.axes.includes('prop'));
+    assert.deepStrictEqual(result.witness.renderProps, { disabled: true });
+    assert.strictEqual(oracle(result.witness), true);
+  });
+
+  it('rejects render-prop shrink if oracle returns false', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['env_1'],
+      snapshots: ['default'],
+      subjects: [1],
+      facts: [100],
+      renderProps: { a: 1, b: 2 },
+    };
+
+    // Both props are required; removing either breaks reproduction.
+    // All other axes are also required.
+    const oracle = (w: Witness) =>
+      Object.keys(w.renderProps ?? {}).length === 2 &&
+      w.envCases.length === 1 &&
+      w.snapshots.length === 1 &&
+      w.subjects.length === 1 &&
+      w.facts.length === 1;
+
+    const result: ShrinkResult = shrinkWitness(witness, oracle);
+
+    assert.strictEqual(result.reduced, false);
+    assert.deepStrictEqual(result.witness.renderProps, { a: 1, b: 2 });
+    assert.strictEqual(oracle(result.witness), true);
+  });
+
+  it('original failure → shrink → smaller witness → prove still fails', () => {
+    const witness: Witness = {
+      proof: {
+        proofId: 'p1',
+        clauseId: 'c1',
+        outcome: 'fail',
+        truth: 'determinate',
+      },
+      envCases: ['desktop', 'tablet', 'mobile'],
+      snapshots: ['default', 'hover', 'focus'],
+      subjects: [10, 20, 30, 40],
+      facts: [1, 2, 3, 4, 5],
+    };
+
+    // The bug reproduces on desktop with subject 20 and fact 3.
+    const oracle = (w: Witness) =>
+      w.envCases.includes('desktop') &&
+      w.subjects.includes(20) &&
+      w.facts.includes(3);
+
+    const result: ShrinkResult = shrinkWitness(witness, oracle);
+
+    assert.strictEqual(result.reduced, true);
+    assert.deepStrictEqual(result.witness.envCases, ['desktop']);
+    assert.deepStrictEqual(result.witness.subjects, [20]);
+    assert.deepStrictEqual(result.witness.facts, [3]);
+    assert.strictEqual(
+      oracle(result.witness),
+      true,
+      'shrunk witness must still reproduce the original failure',
+    );
+    assert.ok(result.steps > 0);
+  });
+});
@@ -0,0 +1,273 @@
+/**
+ * Witness shrinking logic.
+ *
+ * Shrinking minimizes failing evidence across multiple axes:
+ *   - environment case
+ *   - state snapshot
+ *   - timeline sample set
+ *   - subject subset
+ *   - contributing facts
+ *   - clause group context
+ *
+ * Goal: produce the smallest still-failing witness that preserves
+ * explanatory value. This is a diagnostic minimizer, not merely
+ * a test minimizer.
+ */
+
+import type { ProofLike } from './diagnostics.js';
+
+/**
+ * A shrinkable witness bundles the proof with the full context
+ * needed to attempt reduction.
+ *
+ * V1.1 extension: render inputs (props, args, query params, fixture inputs)
+ * are now shrinkable axes so Riley can isolate bugs to the smallest
+ * still-failing render configuration.
+ */
+export interface Witness {
+  proof: ProofLike;
+  envCases: string[];
+  snapshots: string[];
+  subjects: number[];
+  facts: number[];
+  /** Render-input axes for property-run shrinking (V1.1). */
+  renderProps?: Record<string, unknown>;
+  renderArgs?: Record<string, unknown>;
+  queryParams?: Record<string, unknown>;
+  fixtureInputs?: Record<string, unknown>;
+}
+
+/**
+ * Result of a shrink attempt.
+ */
+export interface ShrinkResult {
+  // true if the witness was reduced at all
+  reduced: boolean;
+  // the minimized witness
+  witness: Witness;
+  // which axes were shrunk
+  axes: ShrinkAxis[];
+  // how many reduction steps were applied
+  steps: number;
+}
+
+export type ShrinkAxis =
+  | 'env-case'
+  | 'snapshot'
+  | 'subject'
+  | 'fact'
+  | 'timeline'
+  | 'group-context'
+  | 'prop'
+  | 'arg'
+  | 'query-param'
+  | 'fixture-input';
+
+/**
+ * Predicate injected by the caller.
+ * Returns true if the reduced witness still reproduces the failure.
+ */
+export type StillFails = (w: Witness) => boolean;
+
+/**
+ * Shrink a witness by trying to drop one element at a time from
+ * each axis. Uses a naive delta-debugging style: try to remove each
+ * item individually; if the failure still reproduces, keep it out.
+ */
+export function shrinkWitness(
+  witness: Witness,
+  stillFails: StillFails,
+): ShrinkResult {
+  const current = cloneWitness(witness);
+  let reduced = false;
+  const axes: ShrinkAxis[] = [];
+  let steps = 0;
+
+  // Shrink env cases
+  const envResult = shrinkArray(current.envCases, (arr) => {
+    const candidate = cloneWitness(current);
+    candidate.envCases = arr;
+    return stillFails(candidate);
+  });
+  if (envResult.reduced) {
+    current.envCases = envResult.value;
+    reduced = true;
+    axes.push('env-case');
+    steps += envResult.steps;
+  }
+
+  // Shrink snapshots
+  const snapResult = shrinkArray(current.snapshots, (arr) => {
+    const candidate = cloneWitness(current);
+    candidate.snapshots = arr;
+    return stillFails(candidate);
+  });
+  if (snapResult.reduced) {
+    current.snapshots = snapResult.value;
+    reduced = true;
+    axes.push('snapshot');
+    steps += snapResult.steps;
+  }
+
+  // Shrink subjects
+  const subjResult = shrinkArray(current.subjects, (arr) => {
+    const candidate = cloneWitness(current);
+    candidate.subjects = arr;
+    return stillFails(candidate);
+  });
+  if (subjResult.reduced) {
+    current.subjects = subjResult.value;
+    reduced = true;
+    axes.push('subject');
+    steps += subjResult.steps;
+  }
+
+  // Shrink facts
+  const factResult = shrinkArray(current.facts, (arr) => {
+    const candidate = cloneWitness(current);
+    candidate.facts = arr;
+    return stillFails(candidate);
+  });
+  if (factResult.reduced) {
+    current.facts = factResult.value;
+    reduced = true;
+    axes.push('fact');
+    steps += factResult.steps;
+  }
+
+  // Shrink render props (V1.1)
+  if (current.renderProps) {
+    const propResult = shrinkObject(current.renderProps, (obj) => {
+      const candidate = cloneWitness(current);
+      candidate.renderProps = obj;
+      return stillFails(candidate);
+    });
+    if (propResult.reduced) {
+      current.renderProps = propResult.value;
+      reduced = true;
+      axes.push('prop');
+      steps += propResult.steps;
+    }
+  }
+
+  // Shrink render args (V1.1)
+  if (current.renderArgs) {
+    const argResult = shrinkObject(current.renderArgs, (obj) => {
+      const candidate = cloneWitness(current);
+      candidate.renderArgs = obj;
+      return stillFails(candidate);
+    });
+    if (argResult.reduced) {
+      current.renderArgs = argResult.value;
+      reduced = true;
+      axes.push('arg');
+      steps += argResult.steps;
+    }
+  }
+
+  // Shrink query params (V1.1)
+  if (current.queryParams) {
+    const qpResult = shrinkObject(current.queryParams, (obj) => {
+      const candidate = cloneWitness(current);
+      candidate.queryParams = obj;
+      return stillFails(candidate);
+    });
+    if (qpResult.reduced) {
+      current.queryParams = qpResult.value;
+      reduced = true;
+      axes.push('query-param');
+      steps += qpResult.steps;
+    }
+  }
+
+  // Shrink fixture inputs (V1.1)
+  if (current.fixtureInputs) {
+    const fiResult = shrinkObject(current.fixtureInputs, (obj) => {
+      const candidate = cloneWitness(current);
+      candidate.fixtureInputs = obj;
+      return stillFails(candidate);
+    });
+    if (fiResult.reduced) {
+      current.fixtureInputs = fiResult.value;
+      reduced = true;
+      axes.push('fixture-input');
+      steps += fiResult.steps;
+    }
+  }
+
+  // Oracle validation: the shrunk witness must still reproduce the failure.
+  // If the oracle says it does not fail, discard the shrink and return original.
+  if (reduced && !stillFails(current)) {
+    return { reduced: false, witness: cloneWitness(witness), axes: [], steps: 0 };
+  }
+
+  return { reduced, witness: current, axes, steps };
+}
+
+/**
+ * Try to remove each element from arr one by one.
+ * If the predicate still holds with the element removed, drop it.
+ */
+function shrinkArray<T>(
+  arr: T[],
+  predicate: (reduced: T[]) => boolean,
+): { reduced: boolean; value: T[]; steps: number } {
+  let current = arr.slice();
+  let changed = false;
+  let steps = 0;
+
+  for (let i = current.length - 1; i >= 0; i--) {
+    const candidate = current.slice(0, i).concat(current.slice(i + 1));
+    steps++;
+    if (predicate(candidate)) {
+      current = candidate;
+      changed = true;
+      // continue checking from the same index because items shifted left
+      i = Math.min(i, current.length);
+    }
+  }
+
+  return { reduced: changed, value: current, steps };
+}
+
+/**
+ * Try to remove each key from an object one by one.
+ * If the predicate still holds with the key removed, drop it.
+ */
+function shrinkObject<T extends Record<string, unknown>>(
+  obj: T,
+  predicate: (reduced: T) => boolean,
+): { reduced: boolean; value: T; steps: number } {
+  let current = { ...obj } as T;
+  let changed = false;
+  let steps = 0;
+  const keys = Object.keys(current);
+
+  for (let i = keys.length - 1; i >= 0; i--) {
+    const key = keys[i];
+    const candidate = { ...current } as T;
+    delete (candidate as Record<string, unknown>)[key];
+    steps++;
+    if (predicate(candidate)) {
+      current = candidate;
+      changed = true;
+      i = Math.min(i, Object.keys(current).length);
+    }
+  }
+
+  return { reduced: changed, value: current, steps };
+}
+
+function cloneWitness(w: Witness): Witness {
+  return {
+    proof: w.proof,
+    envCases: w.envCases.slice(),
+    snapshots: w.snapshots.slice(),
+    subjects: w.subjects.slice(),
+    facts: w.facts.slice(),
+    renderProps: w.renderProps ? { ...w.renderProps } : undefined,
+    renderArgs: w.renderArgs ? { ...w.renderArgs } : undefined,
+    queryParams: w.queryParams ? { ...w.queryParams } : undefined,
+    fixtureInputs: w.fixtureInputs ? { ...w.fixtureInputs } : undefined,
+  };
+}
@@ -0,0 +1,120 @@
+/**
+ * Usage error suggestion engine.
+ *
+ * When a system-use error occurs (parse, validation, resolution,
+ * extraction), this module produces actionable suggestions based on
+ * the diagnostic code and category.
+ */
+
+import type { Diagnostic, DiagnosticCategory } from './diagnostics.js';
+import {
+  IMH_SELECTOR_ZERO_MATCHES,
+  IMH_FRAME_AMBIGUOUS,
+  IMH_VALID_INVALID_UNIT,
+  IMH_VALID_ILLEGAL_RELATION_OPTION,
+  IMH_EXTRACT_PARTIAL,
+  IMH_INDETERMINATE_MISSING_FACT,
+} from './codes.js';
+
+/**
+ * A suggestion carries a message and an optional example snippet.
+ */
+export interface Suggestion {
+  message: string;
+  example?: string;
+}
+
+/**
+ * Registry of code patterns to suggestion factories.
+ * Injected so tests can substitute or extend suggestions.
+ */
+export interface SuggestionRegistry {
+  lookup(code: string, category: DiagnosticCategory): Suggestion[];
+}
+
+/**
+ * Default suggestion knowledge base.
+ */
+export function createDefaultSuggestionRegistry(): SuggestionRegistry {
+  const map = new Map<string, Suggestion[]>();
+
+  // Resolution errors
+  map.set(IMH_SELECTOR_ZERO_MATCHES, [
+    {
+      message: 'The selector matched no elements. Verify the selector is correct and the element is present in the DOM.',
+      example: "await expect('.buy-button').to.be.visible()",
+    },
+    {
+      message: 'If the element is rendered conditionally, add a wait or guard.',
+    },
+  ]);
+
+  map.set(IMH_FRAME_AMBIGUOUS, [
+    {
+      message: 'Narrow the subject selector so it matches a single element.',
+    },
+    {
+      message: 'Use expectAll(...) if multiple subjects are intended.',
+    },
+  ]);
+
+  // Validation errors
+  map.set(IMH_VALID_INVALID_UNIT, [
+    {
+      message: 'Use a supported unit: px, rem, em, %, vh, vw, or jnd.',
+      example: "{ minGap: 16, unit: 'px' }",
+    },
+  ]);
+
+  map.set(IMH_VALID_ILLEGAL_RELATION_OPTION, [
+    {
+      message: 'Check the allowed options for this relation in the documentation.',
+    },
+  ]);
+
+  // Extraction errors
+  map.set(IMH_EXTRACT_PARTIAL, [
+    {
+      message: 'Some facts were unavailable. Check that the page is fully loaded.',
+    },
+    {
+      message: 'If the fact is unsupported for this element type, simplify the assertion.',
+    },
+  ]);
+
+  // Indeterminate results
+  map.set(IMH_INDETERMINATE_MISSING_FACT, [
+    {
+      message: 'A required fact was missing. Check extractor output for warnings.',
+    },
+  ]);
+
+  return {
+    lookup(code, _category) {
+      return map.get(code) ?? [];
+    },
+  };
+}
+
+/**
+ * Attach suggestions to a diagnostic by looking up its code.
+ * Returns a new diagnostic with fixHints extended.
+ */
+export function attachSuggestions(
+  diagnostic: Diagnostic,
+  registry: SuggestionRegistry,
+): Diagnostic {
+  const suggestions = registry.lookup(diagnostic.code, diagnostic.category);
+  if (suggestions.length === 0) {
+    return diagnostic;
+  }
+
+  const newHints = suggestions.map((s) => {
+    return s.example ? `${s.message} Example: ${s.example}` : s.message;
+  });
+
+  return {
+    ...diagnostic,
+    fixHints: [...diagnostic.fixHints, ...newHints],
+  };
+}
@@ -0,0 +1,115 @@
+/**
+ * Trace event model for Imhotep.
+ *
+ * Every evaluation produces a chain of trace events that link
+ * source spans → AST → IR → execution → proof → diagnostic.
+ *
+ * Trace events are cheap to keep in compact form and expand on demand.
+ */
+
+/**
+ * Well-known phases in the evaluation pipeline.
+ */
+export type TracePhase =
+  | 'parse-started'
+  | 'ast-created'
+  | 'ir-normalized'
+  | 'fact-requirements-computed'
+  | 'extraction-started'
+  | 'extraction-step-completed'
+  | 'world-normalized'
+  | 'clause-evaluated'
+  | 'proof-created'
+  | 'witness-shrunk'
+  | 'diagnostic-emitted';
+
+/**
+ * Cross-references that tie a trace event to other entities.
+ */
+export interface TraceRefs {
+  clauseId?: string;
+  proofId?: string;
+  snapshotId?: string;
+  diagnosticId?: string;
+  astNodeId?: string;
+  envCaseId?: string;
+}
+
+/**
+ * A single trace event.
+ */
+export interface TraceEvent {
+  traceEventId: string;
+  phase: TracePhase;
+  at: number; // epoch ms
+  refs: TraceRefs;
+  payload?: Record<string, unknown>;
+}
+
+/**
+ * A builder that accumulates trace events during evaluation.
+ * Injected into each pipeline stage so stages stay pure.
+ */
+export interface TraceBuilder {
+  emit(event: Omit<TraceEvent, 'traceEventId' | 'at'>): TraceEvent;
+  events(): readonly TraceEvent[];
+}
+
+/**
+ * Factory for creating a concrete TraceBuilder.
+ * Uses dependency injection so callers can supply id generation and timing.
+ */
+export function createTraceBuilder(deps: {
+  idGen(): string;
+  now(): number;
+}): TraceBuilder {
+  const buffer: TraceEvent[] = [];
+
+  return {
+    emit(event) {
+      const full: TraceEvent = {
+        traceEventId: deps.idGen(),
+        phase: event.phase,
+        at: deps.now(),
+        refs: event.refs,
+        payload: event.payload,
+      };
+      buffer.push(full);
+      return full;
+    },
+    events() {
+      return buffer;
+    },
+  };
+}
+
+/**
+ * Convenience: find the first trace event for a given clause.
+ */
+export function findClauseTraces(
+  events: readonly TraceEvent[],
+  clauseId: string,
+): TraceEvent[] {
+  return events.filter(
+    (e) => e.refs.clauseId === clauseId,
+  );
+}
+
+/**
+ * Convenience: reconstruct the evaluation chain for a proof.
+ */
+export function traceChainForProof(
+  events: readonly TraceEvent[],
+  proofId: string,
+): TraceEvent[] {
+  return events.filter(
+    (e) =>
+      e.refs.proofId === proofId ||
+      (e.phase === 'clause-evaluated' &&
+        events.some(
+          (later) =>
+            later.traceEventId === e.traceEventId &&
+            later.refs.proofId === proofId,
+        )),
+  );
+}