refactor: surface extraction cleanup failures as visible diagnostics
- Add IMH_EXTRACTION_CLEANUP_FAILED and IMH_EXTRACTION_RESIDUAL_ATTRIBUTES diagnostic codes with severity warning and extraction-error category - Check for residual data-imhotep-runtime-id attributes before extraction (both fast-geometry and CDP paths), emit diagnostic if prior run leaked them - Surface cleanup failures (attribute removal, CDP session detach) as returned diagnostics instead of silent console.warn - Wrap CDP sessionManager.detach() in try/catch to prevent finally-block unhandled throws on closed/navigated pages - Count injected vs cleaned runtime-id attributes; report mismatch as IMH_EXTRACTION_CLEANUP_FAILED with metrics - Move errors array declaration before try block in fast-geometry path so finally can append cleanup diagnostics
This commit is contained in:
@@ -88,6 +88,8 @@ export type DiagnosticCode =
|
||||
| 'IMH_STYLE_FAILED'
|
||||
| 'IMH_TOPOLOGY_PARTIAL'
|
||||
| 'IMH_TOPOLOGY_FAILED'
|
||||
| 'IMH_EXTRACTION_CLEANUP_FAILED'
|
||||
| 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES'
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Extractor planner errors (imhotep-extractor)
|
||||
@@ -420,7 +422,7 @@ export function getDefaultCategory(code: DiagnosticCode): DiagnosticCategory {
|
||||
if (code.startsWith('IMH_VALID_')) return 'validation-error'
|
||||
if (code.startsWith('IMH_SELECTOR_') || code.startsWith('IMH_FRAME_') || code.startsWith('IMH_STATE_MATERIALIZATION')) return 'resolution-error'
|
||||
if (code.startsWith('IMH_EXTRACTOR_')) return 'resolution-error'
|
||||
if (code.startsWith('IMH_EXTRACT_') || code.startsWith('IMH_CDP_') || code.startsWith('IMH_DOM_') || code.startsWith('IMH_BOX_MODEL') || code.startsWith('IMH_VISUAL_BOX') || code.startsWith('IMH_FRAGMENT') || code.startsWith('IMH_TRANSFORM') || code.startsWith('IMH_STYLE') || code.startsWith('IMH_TOPOLOGY_PARTIAL') || code.startsWith('IMH_TOPOLOGY_FAILED')) return 'extraction-error'
|
||||
if (code.startsWith('IMH_EXTRACT_') || code.startsWith('IMH_EXTRACTION_') || code.startsWith('IMH_CDP_') || code.startsWith('IMH_DOM_') || code.startsWith('IMH_BOX_MODEL') || code.startsWith('IMH_VISUAL_BOX') || code.startsWith('IMH_FRAGMENT') || code.startsWith('IMH_TRANSFORM') || code.startsWith('IMH_STYLE') || code.startsWith('IMH_TOPOLOGY_PARTIAL') || code.startsWith('IMH_TOPOLOGY_FAILED')) return 'extraction-error'
|
||||
if (code.startsWith('IMH_RELATION_') || code.startsWith('IMH_SIZE_') || code.startsWith('IMH_ALIGNMENT') || code.startsWith('IMH_TOPOLOGY_CLIPPED') || code.startsWith('IMH_TOPOLOGY_STACKING') || code.startsWith('IMH_VISIBILITY') || code.startsWith('IMH_PREDICATE') || code.startsWith('IMH_CARDINALITY')) return 'contract-failure'
|
||||
if (code.startsWith('IMH_FACT_OBSERVED_')) return 'contract-failure'
|
||||
if (code.startsWith('IMH_PROPERTY_') || code.startsWith('IMH_ENUMERATED_') || code === 'IMH_PROPERTY_RUN_FAILED') return 'contract-failure'
|
||||
@@ -453,6 +455,16 @@ export function getDefaultFixHints(code: DiagnosticCode): string[] {
|
||||
hints.push('The selector matches more than one element. Use a more specific selector or add a quantifier.')
|
||||
}
|
||||
|
||||
if (code === 'IMH_EXTRACTION_CLEANUP_FAILED') {
|
||||
hints.push('Verify the page is still interactive (not closed or navigated away).')
|
||||
hints.push('If using CDP mode, check that the browser connection is healthy.')
|
||||
}
|
||||
|
||||
if (code === 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES') {
|
||||
hints.push('Leftover data-imhotep-runtime-id attributes indicate a prior extraction did not clean up.')
|
||||
hints.push('A page reload or navigating away and back may clear residual attributes.')
|
||||
}
|
||||
|
||||
if (code === 'IMH_EXTRACT_PROTOCOL_ERROR' || code === 'IMH_CDP_SESSION_ATTACH_FAILED') {
|
||||
hints.push('Verify the page is fully loaded before running assertions.')
|
||||
hints.push('Check that selectors are valid CSS selectors or semantic references.')
|
||||
|
||||
Reference in New Issue
Block a user