refactor: surface extraction cleanup failures as visible diagnostics

- Add IMH_EXTRACTION_CLEANUP_FAILED and IMH_EXTRACTION_RESIDUAL_ATTRIBUTES
  diagnostic codes with severity warning and extraction-error category
- Check for residual data-imhotep-runtime-id attributes before extraction
  (both fast-geometry and CDP paths), emit diagnostic if prior run leaked them
- Surface cleanup failures (attribute removal, CDP session detach) as
  returned diagnostics instead of silent console.warn
- Wrap CDP sessionManager.detach() in try/catch to prevent finally-block
  unhandled throws on closed/navigated pages
- Count injected vs cleaned runtime-id attributes; report mismatch as
  IMH_EXTRACTION_CLEANUP_FAILED with metrics
- Move errors array declaration before try block in fast-geometry path
  so finally can append cleanup diagnostics
This commit is contained in:
John Dvorak
2026-05-22 16:06:21 -07:00
parent 066ef9f677
commit ce04b2b3de
2 changed files with 163 additions and 15 deletions
+150 -14
View File
@@ -418,12 +418,35 @@ export async function extractWorldFastGeometry(
selectorToIds: Array<[string, number[]]>
}
const errors: ImhotepDiagnostic[] = []
const selectorPlans: SelectorPlan[] = []
for (let i = 0; i < selectors.length; i++) {
const queries = await materializeSemanticSelector(playwrightPage, selectors[i], i)
selectorPlans.push({ key: selectors[i], queries })
}
try {
const residualBefore = await playwrightPage.evaluate(() =>
document.querySelectorAll('[data-imhotep-runtime-id]').length,
)
if (residualBefore > 0) {
errors.push({
code: 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES',
severity: 'warning',
category: 'extraction-error',
message: `Found ${residualBefore} residual data-imhotep-runtime-id attribute(s) from a prior extraction that did not clean up.`,
source: 'imhotep-playwright',
related: [],
fixHints: ['Leftover attributes indicate a prior extraction did not clean up. A page reload or navigating away and back may clear residual attributes.'],
metrics: { residualCount: residualBefore },
sourceRef: {},
})
}
} catch {
// Best-effort pre-check; proceed with extraction.
}
try {
const extracted = await playwrightPage.evaluate(({ plans, needs }: any) => {
const elements: FastExtractedElement[] = []
@@ -701,7 +724,6 @@ export async function extractWorldFastGeometry(
}
const selectorToIds = new Map<string, number[]>(extracted.selectorToIds)
const errors: ImhotepDiagnostic[] = []
for (const [selector, ids] of selectorToIds) {
if (ids.length === 0 && !selector.startsWith('$')) {
@@ -732,15 +754,45 @@ export async function extractWorldFastGeometry(
}
return { world, selectorToIds, errors }
} finally {
await playwrightPage.evaluate(() => {
const nodes = Array.from(document.querySelectorAll('[data-imhotep-runtime-id]'))
for (const el of nodes) {
el.removeAttribute('data-imhotep-runtime-id')
try {
const result = await playwrightPage.evaluate(() => {
const nodes = Array.from(document.querySelectorAll<HTMLElement>('[data-imhotep-runtime-id]'))
const cleaned = nodes.length
for (const el of nodes) {
el.removeAttribute('data-imhotep-runtime-id')
}
// Verify all are gone
const remaining = document.querySelectorAll('[data-imhotep-runtime-id]').length
return { cleaned, remaining }
})
if (result.remaining > 0) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `Cleanup incomplete: ${result.remaining} data-imhotep-runtime-id attribute(s) could not be removed (injected ${result.cleaned}).`,
source: 'imhotep-playwright',
related: [],
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
metrics: { injected: result.cleaned, remaining: result.remaining },
sourceRef: {},
})
}
}).catch((err) => {
} catch (err) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `Fast-geometry cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
source: 'imhotep-playwright',
related: [],
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
metrics: {},
sourceRef: {},
})
// eslint-disable-next-line no-console
console.warn('[imhotep-playwright] fast-geometry cleanup failed:', err instanceof Error ? err.message : err)
})
}
}
}
@@ -818,6 +870,28 @@ export async function extractWorldCdp(
}
const sessionManager = createSessionManager(playwrightPage)
try {
const residualBefore = await playwrightPage.evaluate(() =>
document.querySelectorAll('[data-imhotep-runtime-id]').length,
)
if (residualBefore > 0) {
errors.push({
code: 'IMH_EXTRACTION_RESIDUAL_ATTRIBUTES',
severity: 'warning',
category: 'extraction-error',
message: `Found ${residualBefore} residual data-imhotep-runtime-id attribute(s) from a prior extraction that did not clean up.`,
source: 'imhotep-cdp',
related: [],
fixHints: ['Leftover attributes indicate a prior extraction did not clean up. A page reload or navigating away and back may clear residual attributes.'],
metrics: { residualCount: residualBefore },
sourceRef: {},
})
}
} catch {
// Best-effort pre-check; proceed with extraction.
}
try {
await sessionManager.enableDomain('DOM')
@@ -925,16 +999,78 @@ export async function extractWorldCdp(
}
return { world, selectorToIds, errors }
} finally {
await playwrightPage.evaluate(() => {
const nodes = Array.from(document.querySelectorAll('[data-imhotep-runtime-id]'))
for (const el of nodes) {
el.removeAttribute('data-imhotep-runtime-id')
try {
const result = await playwrightPage.evaluate(() => {
const nodes = Array.from(document.querySelectorAll<HTMLElement>('[data-imhotep-runtime-id]'))
const cleaned = nodes.length
for (const el of nodes) {
el.removeAttribute('data-imhotep-runtime-id')
}
const remaining = document.querySelectorAll('[data-imhotep-runtime-id]').length
return { cleaned, remaining }
})
if (result.remaining > 0) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `CDP cleanup incomplete: ${result.remaining} data-imhotep-runtime-id attribute(s) could not be removed (injected ${result.cleaned}).`,
source: 'imhotep-cdp',
related: [],
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
metrics: { injected: result.cleaned, remaining: result.remaining },
sourceRef: {},
})
}
}).catch((err) => {
} catch (err) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `CDP attribute cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
source: 'imhotep-cdp',
related: [],
fixHints: ['Verify the page is still interactive (not closed or navigated away).'],
metrics: {},
sourceRef: {},
})
// eslint-disable-next-line no-console
console.warn('[imhotep-playwright] CDP cleanup failed:', err instanceof Error ? err.message : err)
})
await sessionManager.detach()
}
try {
await sessionManager.detach()
} catch (err) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `CDP session detach failed: ${err instanceof Error ? err.message : String(err)}`,
source: 'imhotep-cdp',
related: [],
fixHints: ['The CDP session may already be closed. This is typically non-fatal.'],
metrics: {},
sourceRef: {},
})
// eslint-disable-next-line no-console
console.warn('[imhotep-playwright] CDP cleanup failed:', err instanceof Error ? err.message : err)
}
try {
await sessionManager.detach()
} catch (err) {
errors.push({
code: 'IMH_EXTRACTION_CLEANUP_FAILED',
severity: 'warning',
category: 'extraction-error',
message: `CDP session detach failed: ${err instanceof Error ? err.message : String(err)}`,
source: 'imhotep-cdp',
related: [],
fixHints: ['The CDP session may already be closed. This is typically non-fatal.'],
metrics: {},
sourceRef: {},
})
// eslint-disable-next-line no-console
console.warn('[imhotep-playwright] CDP session detach failed:', err instanceof Error ? err.message : err)
}
}
}