refactor: surface extraction cleanup failures as visible diagnostics
- Add IMH_EXTRACTION_CLEANUP_FAILED and IMH_EXTRACTION_RESIDUAL_ATTRIBUTES diagnostic codes with severity warning and extraction-error category - Check for residual data-imhotep-runtime-id attributes before extraction (both fast-geometry and CDP paths), emit diagnostic if prior run leaked them - Surface cleanup failures (attribute removal, CDP session detach) as returned diagnostics instead of silent console.warn - Wrap CDP sessionManager.detach() in try/catch to prevent finally-block unhandled throws on closed/navigated pages - Count injected vs cleaned runtime-id attributes; report mismatch as IMH_EXTRACTION_CLEANUP_FAILED with metrics - Move errors array declaration before try block in fast-geometry path so finally can append cleanup diagnostics
This commit is contained in:
@@ -88,6 +88,8 @@ export type DiagnosticCode =
|
|||||||
| 'IMH_STYLE_FAILED'
|
| 'IMH_STYLE_FAILED'
|
||||||
| 'IMH_TOPOLOGY_PARTIAL'
|
| 'IMH_TOPOLOGY_PARTIAL'
|
||||||
| 'IMH_TOPOLOGY_FAILED'
|
| 'IMH_TOPOLOGY_FAILED'
|
||||||
|
| 'IMH_EXTRACTION_CLEANUP_FAILED'
|
||||||
|
| 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES'
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
// Extractor planner errors (imhotep-extractor)
|
// Extractor planner errors (imhotep-extractor)
|
||||||
@@ -420,7 +422,7 @@ export function getDefaultCategory(code: DiagnosticCode): DiagnosticCategory {
|
|||||||
if (code.startsWith('IMH_VALID_')) return 'validation-error'
|
if (code.startsWith('IMH_VALID_')) return 'validation-error'
|
||||||
if (code.startsWith('IMH_SELECTOR_') || code.startsWith('IMH_FRAME_') || code.startsWith('IMH_STATE_MATERIALIZATION')) return 'resolution-error'
|
if (code.startsWith('IMH_SELECTOR_') || code.startsWith('IMH_FRAME_') || code.startsWith('IMH_STATE_MATERIALIZATION')) return 'resolution-error'
|
||||||
if (code.startsWith('IMH_EXTRACTOR_')) return 'resolution-error'
|
if (code.startsWith('IMH_EXTRACTOR_')) return 'resolution-error'
|
||||||
if (code.startsWith('IMH_EXTRACT_') || code.startsWith('IMH_CDP_') || code.startsWith('IMH_DOM_') || code.startsWith('IMH_BOX_MODEL') || code.startsWith('IMH_VISUAL_BOX') || code.startsWith('IMH_FRAGMENT') || code.startsWith('IMH_TRANSFORM') || code.startsWith('IMH_STYLE') || code.startsWith('IMH_TOPOLOGY_PARTIAL') || code.startsWith('IMH_TOPOLOGY_FAILED')) return 'extraction-error'
|
if (code.startsWith('IMH_EXTRACT_') || code.startsWith('IMH_EXTRACTION_') || code.startsWith('IMH_CDP_') || code.startsWith('IMH_DOM_') || code.startsWith('IMH_BOX_MODEL') || code.startsWith('IMH_VISUAL_BOX') || code.startsWith('IMH_FRAGMENT') || code.startsWith('IMH_TRANSFORM') || code.startsWith('IMH_STYLE') || code.startsWith('IMH_TOPOLOGY_PARTIAL') || code.startsWith('IMH_TOPOLOGY_FAILED')) return 'extraction-error'
|
||||||
if (code.startsWith('IMH_RELATION_') || code.startsWith('IMH_SIZE_') || code.startsWith('IMH_ALIGNMENT') || code.startsWith('IMH_TOPOLOGY_CLIPPED') || code.startsWith('IMH_TOPOLOGY_STACKING') || code.startsWith('IMH_VISIBILITY') || code.startsWith('IMH_PREDICATE') || code.startsWith('IMH_CARDINALITY')) return 'contract-failure'
|
if (code.startsWith('IMH_RELATION_') || code.startsWith('IMH_SIZE_') || code.startsWith('IMH_ALIGNMENT') || code.startsWith('IMH_TOPOLOGY_CLIPPED') || code.startsWith('IMH_TOPOLOGY_STACKING') || code.startsWith('IMH_VISIBILITY') || code.startsWith('IMH_PREDICATE') || code.startsWith('IMH_CARDINALITY')) return 'contract-failure'
|
||||||
if (code.startsWith('IMH_FACT_OBSERVED_')) return 'contract-failure'
|
if (code.startsWith('IMH_FACT_OBSERVED_')) return 'contract-failure'
|
||||||
if (code.startsWith('IMH_PROPERTY_') || code.startsWith('IMH_ENUMERATED_') || code === 'IMH_PROPERTY_RUN_FAILED') return 'contract-failure'
|
if (code.startsWith('IMH_PROPERTY_') || code.startsWith('IMH_ENUMERATED_') || code === 'IMH_PROPERTY_RUN_FAILED') return 'contract-failure'
|
||||||
@@ -453,6 +455,16 @@ export function getDefaultFixHints(code: DiagnosticCode): string[] {
|
|||||||
hints.push('The selector matches more than one element. Use a more specific selector or add a quantifier.')
|
hints.push('The selector matches more than one element. Use a more specific selector or add a quantifier.')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (code === 'IMH_EXTRACTION_CLEANUP_FAILED') {
|
||||||
|
hints.push('Verify the page is still interactive (not closed or navigated away).')
|
||||||
|
hints.push('If using CDP mode, check that the browser connection is healthy.')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES') {
|
||||||
|
hints.push('Leftover data-imhotep-runtime-id attributes indicate a prior extraction did not clean up.')
|
||||||
|
hints.push('A page reload or navigating away and back may clear residual attributes.')
|
||||||
|
}
|
||||||
|
|
||||||
if (code === 'IMH_EXTRACT_PROTOCOL_ERROR' || code === 'IMH_CDP_SESSION_ATTACH_FAILED') {
|
if (code === 'IMH_EXTRACT_PROTOCOL_ERROR' || code === 'IMH_CDP_SESSION_ATTACH_FAILED') {
|
||||||
hints.push('Verify the page is fully loaded before running assertions.')
|
hints.push('Verify the page is fully loaded before running assertions.')
|
||||||
hints.push('Check that selectors are valid CSS selectors or semantic references.')
|
hints.push('Check that selectors are valid CSS selectors or semantic references.')
|
||||||
|
|||||||
@@ -418,12 +418,35 @@ export async function extractWorldFastGeometry(
|
|||||||
selectorToIds: Array<[string, number[]]>
|
selectorToIds: Array<[string, number[]]>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const errors: ImhotepDiagnostic[] = []
|
||||||
|
|
||||||
const selectorPlans: SelectorPlan[] = []
|
const selectorPlans: SelectorPlan[] = []
|
||||||
for (let i = 0; i < selectors.length; i++) {
|
for (let i = 0; i < selectors.length; i++) {
|
||||||
const queries = await materializeSemanticSelector(playwrightPage, selectors[i], i)
|
const queries = await materializeSemanticSelector(playwrightPage, selectors[i], i)
|
||||||
selectorPlans.push({ key: selectors[i], queries })
|
selectorPlans.push({ key: selectors[i], queries })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const residualBefore = await playwrightPage.evaluate(() =>
|
||||||
|
document.querySelectorAll('[data-imhotep-runtime-id]').length,
|
||||||
|
)
|
||||||
|
if (residualBefore > 0) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `Found ${residualBefore} residual data-imhotep-runtime-id attribute(s) from a prior extraction that did not clean up.`,
|
||||||
|
source: 'imhotep-playwright',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Leftover attributes indicate a prior extraction did not clean up. A page reload or navigating away and back may clear residual attributes.'],
|
||||||
|
metrics: { residualCount: residualBefore },
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Best-effort pre-check; proceed with extraction.
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const extracted = await playwrightPage.evaluate(({ plans, needs }: any) => {
|
const extracted = await playwrightPage.evaluate(({ plans, needs }: any) => {
|
||||||
const elements: FastExtractedElement[] = []
|
const elements: FastExtractedElement[] = []
|
||||||
@@ -701,7 +724,6 @@ export async function extractWorldFastGeometry(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const selectorToIds = new Map<string, number[]>(extracted.selectorToIds)
|
const selectorToIds = new Map<string, number[]>(extracted.selectorToIds)
|
||||||
const errors: ImhotepDiagnostic[] = []
|
|
||||||
|
|
||||||
for (const [selector, ids] of selectorToIds) {
|
for (const [selector, ids] of selectorToIds) {
|
||||||
if (ids.length === 0 && !selector.startsWith('$')) {
|
if (ids.length === 0 && !selector.startsWith('$')) {
|
||||||
@@ -732,15 +754,45 @@ export async function extractWorldFastGeometry(
|
|||||||
}
|
}
|
||||||
return { world, selectorToIds, errors }
|
return { world, selectorToIds, errors }
|
||||||
} finally {
|
} finally {
|
||||||
await playwrightPage.evaluate(() => {
|
try {
|
||||||
const nodes = Array.from(document.querySelectorAll('[data-imhotep-runtime-id]'))
|
const result = await playwrightPage.evaluate(() => {
|
||||||
for (const el of nodes) {
|
const nodes = Array.from(document.querySelectorAll<HTMLElement>('[data-imhotep-runtime-id]'))
|
||||||
el.removeAttribute('data-imhotep-runtime-id')
|
const cleaned = nodes.length
|
||||||
|
for (const el of nodes) {
|
||||||
|
el.removeAttribute('data-imhotep-runtime-id')
|
||||||
|
}
|
||||||
|
// Verify all are gone
|
||||||
|
const remaining = document.querySelectorAll('[data-imhotep-runtime-id]').length
|
||||||
|
return { cleaned, remaining }
|
||||||
|
})
|
||||||
|
if (result.remaining > 0) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `Cleanup incomplete: ${result.remaining} data-imhotep-runtime-id attribute(s) could not be removed (injected ${result.cleaned}).`,
|
||||||
|
source: 'imhotep-playwright',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
|
||||||
|
metrics: { injected: result.cleaned, remaining: result.remaining },
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}).catch((err) => {
|
} catch (err) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `Fast-geometry cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
source: 'imhotep-playwright',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
|
||||||
|
metrics: {},
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
// eslint-disable-next-line no-console
|
// eslint-disable-next-line no-console
|
||||||
console.warn('[imhotep-playwright] fast-geometry cleanup failed:', err instanceof Error ? err.message : err)
|
console.warn('[imhotep-playwright] fast-geometry cleanup failed:', err instanceof Error ? err.message : err)
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -818,6 +870,28 @@ export async function extractWorldCdp(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const sessionManager = createSessionManager(playwrightPage)
|
const sessionManager = createSessionManager(playwrightPage)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const residualBefore = await playwrightPage.evaluate(() =>
|
||||||
|
document.querySelectorAll('[data-imhotep-runtime-id]').length,
|
||||||
|
)
|
||||||
|
if (residualBefore > 0) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `Found ${residualBefore} residual data-imhotep-runtime-id attribute(s) from a prior extraction that did not clean up.`,
|
||||||
|
source: 'imhotep-cdp',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Leftover attributes indicate a prior extraction did not clean up. A page reload or navigating away and back may clear residual attributes.'],
|
||||||
|
metrics: { residualCount: residualBefore },
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Best-effort pre-check; proceed with extraction.
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await sessionManager.enableDomain('DOM')
|
await sessionManager.enableDomain('DOM')
|
||||||
|
|
||||||
@@ -925,16 +999,78 @@ export async function extractWorldCdp(
|
|||||||
}
|
}
|
||||||
return { world, selectorToIds, errors }
|
return { world, selectorToIds, errors }
|
||||||
} finally {
|
} finally {
|
||||||
await playwrightPage.evaluate(() => {
|
try {
|
||||||
const nodes = Array.from(document.querySelectorAll('[data-imhotep-runtime-id]'))
|
const result = await playwrightPage.evaluate(() => {
|
||||||
for (const el of nodes) {
|
const nodes = Array.from(document.querySelectorAll<HTMLElement>('[data-imhotep-runtime-id]'))
|
||||||
el.removeAttribute('data-imhotep-runtime-id')
|
const cleaned = nodes.length
|
||||||
|
for (const el of nodes) {
|
||||||
|
el.removeAttribute('data-imhotep-runtime-id')
|
||||||
|
}
|
||||||
|
const remaining = document.querySelectorAll('[data-imhotep-runtime-id]').length
|
||||||
|
return { cleaned, remaining }
|
||||||
|
})
|
||||||
|
if (result.remaining > 0) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `CDP cleanup incomplete: ${result.remaining} data-imhotep-runtime-id attribute(s) could not be removed (injected ${result.cleaned}).`,
|
||||||
|
source: 'imhotep-cdp',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
|
||||||
|
metrics: { injected: result.cleaned, remaining: result.remaining },
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}).catch((err) => {
|
} catch (err) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `CDP attribute cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
source: 'imhotep-cdp',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
|
||||||
|
metrics: {},
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
// eslint-disable-next-line no-console
|
// eslint-disable-next-line no-console
|
||||||
console.warn('[imhotep-playwright] CDP cleanup failed:', err instanceof Error ? err.message : err)
|
console.warn('[imhotep-playwright] CDP cleanup failed:', err instanceof Error ? err.message : err)
|
||||||
})
|
}
|
||||||
await sessionManager.detach()
|
try {
|
||||||
|
await sessionManager.detach()
|
||||||
|
} catch (err) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `CDP session detach failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
source: 'imhotep-cdp',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['The CDP session may already be closed. This is typically non-fatal.'],
|
||||||
|
metrics: {},
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.warn('[imhotep-playwright] CDP cleanup failed:', err instanceof Error ? err.message : err)
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await sessionManager.detach()
|
||||||
|
} catch (err) {
|
||||||
|
errors.push({
|
||||||
|
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
|
||||||
|
severity: 'warning',
|
||||||
|
category: 'extraction-error',
|
||||||
|
message: `CDP session detach failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
source: 'imhotep-cdp',
|
||||||
|
related: [],
|
||||||
|
fixHints: ['The CDP session may already be closed. This is typically non-fatal.'],
|
||||||
|
metrics: {},
|
||||||
|
sourceRef: {},
|
||||||
|
})
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.warn('[imhotep-playwright] CDP session detach failed:', err instanceof Error ? err.message : err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user