/** * Geometry snapshot caching for Imhotep. * * Serializes and deserializes GeometryWorld instances so extracted * browser geometry can be cached to disk and reused across assertion * batches without re-extraction. */ import type { GeometryWorld, WorldEnvironment, WorldSource } from './world.js' import type { ImhotepId } from './types.js' // --------------------------------------------------------------------------- // Serialization // --------------------------------------------------------------------------- interface SerializedTypedArray { type: 'Uint32Array' | 'Uint16Array' | 'Uint8Array' | 'Int32Array' | 'Int16Array' | 'Float64Array' | 'Float32Array' data: number[] } interface SerializedTable { [column: string]: SerializedTypedArray | string[] | number[] } interface SerializedWorld { version: number sceneId: ImhotepId snapshotId: ImhotepId env: WorldEnvironment source: WorldSource strings: { values: string[] } subjects: SerializedTable dom: SerializedTable frames: SerializedTable matrices: SerializedTable rects: SerializedTable boxes: SerializedTable visualBoxes: SerializedTable fragments: SerializedTable transforms: SerializedTable styles: SerializedTable text: SerializedTable topology: SerializedTable scroll: SerializedTable clipping: SerializedTable paint: SerializedTable visibility: SerializedTable provenance: SerializedTable confidence: SerializedTable } function serializeTypedArray(arr: TypedArray): SerializedTypedArray { const ctor = arr.constructor.name as SerializedTypedArray['type'] return { type: ctor, data: Array.from(arr) } } function deserializeTypedArray(serialized: SerializedTypedArray): TypedArray { const ctor = globalThis[serialized.type] as new (data: number[]) => TypedArray return new ctor(serialized.data) } type TypedArray = | Uint32Array | Uint16Array | Uint8Array | Int32Array | Int16Array | Float64Array | Float32Array function isTypedArray(value: unknown): value is TypedArray { return ( value instanceof Uint32Array || value instanceof Uint16Array || value instanceof Uint8Array || value instanceof Int32Array || value instanceof Int16Array || value instanceof Float64Array || value instanceof Float32Array ) } function serializeTable(table: Record): SerializedTable { const result: SerializedTable = {} if (!table) return result for (const [key, value] of Object.entries(table)) { if (isTypedArray(value)) { result[key] = serializeTypedArray(value) } else if (Array.isArray(value)) { result[key] = value as number[] | string[] } else if (typeof value === 'number' || typeof value === 'string' || typeof value === 'boolean') { result[key] = [value] as number[] | string[] } else { result[key] = value as SerializedTable[string] } } return result } function deserializeTable(serialized: SerializedTable): Record { const result: Record = {} for (const [key, value] of Object.entries(serialized)) { if (value && typeof value === 'object' && 'type' in value && 'data' in value) { result[key] = deserializeTypedArray(value as SerializedTypedArray) } else if (Array.isArray(value)) { result[key] = value } else { result[key] = value } } return result } /** * Serialize a GeometryWorld to a JSON string. */ export function serializeGeometryWorld(world: GeometryWorld): string { const serialized: SerializedWorld = { version: 1, sceneId: world.sceneId, snapshotId: world.snapshotId, env: world.env, source: world.source, strings: world.strings, subjects: serializeTable(world.subjects as unknown as Record), dom: serializeTable(world.dom as unknown as Record), frames: serializeTable(world.frames as unknown as Record), matrices: serializeTable(world.matrices as unknown as Record), rects: serializeTable(world.rects as unknown as Record), boxes: serializeTable(world.boxes as unknown as Record), visualBoxes: serializeTable(world.visualBoxes as unknown as Record), fragments: serializeTable(world.fragments as unknown as Record), transforms: serializeTable(world.transforms as unknown as Record), styles: serializeTable(world.styles as unknown as Record), text: serializeTable(world.text as unknown as Record), topology: serializeTable(world.topology as unknown as Record), scroll: serializeTable(world.scroll as unknown as Record), clipping: serializeTable(world.clipping as unknown as Record), paint: serializeTable(world.paint as unknown as Record), visibility: serializeTable(world.visibility as unknown as Record), provenance: serializeTable(world.provenance as unknown as Record), confidence: serializeTable(world.confidence as unknown as Record), } return JSON.stringify(serialized) } /** * Deserialize a JSON string back into a GeometryWorld. */ export function deserializeGeometryWorld(json: string): GeometryWorld { const serialized = JSON.parse(json) as SerializedWorld if (serialized.version !== 1) { throw new Error(`Unsupported geometry cache version: ${serialized.version}`) } return { sceneId: serialized.sceneId, snapshotId: serialized.snapshotId, env: serialized.env, source: serialized.source, strings: serialized.strings, subjects: deserializeTable(serialized.subjects) as unknown as GeometryWorld['subjects'], dom: deserializeTable(serialized.dom) as unknown as GeometryWorld['dom'], frames: deserializeTable(serialized.frames) as unknown as GeometryWorld['frames'], matrices: deserializeTable(serialized.matrices) as unknown as GeometryWorld['matrices'], rects: deserializeTable(serialized.rects) as unknown as GeometryWorld['rects'], boxes: deserializeTable(serialized.boxes) as unknown as GeometryWorld['boxes'], visualBoxes: deserializeTable(serialized.visualBoxes) as unknown as GeometryWorld['visualBoxes'], fragments: deserializeTable(serialized.fragments) as unknown as GeometryWorld['fragments'], transforms: deserializeTable(serialized.transforms) as unknown as GeometryWorld['transforms'], styles: deserializeTable(serialized.styles) as unknown as GeometryWorld['styles'], text: deserializeTable(serialized.text) as unknown as GeometryWorld['text'], topology: deserializeTable(serialized.topology) as unknown as GeometryWorld['topology'], scroll: deserializeTable(serialized.scroll) as unknown as GeometryWorld['scroll'], clipping: deserializeTable(serialized.clipping) as unknown as GeometryWorld['clipping'], paint: deserializeTable(serialized.paint) as unknown as GeometryWorld['paint'], visibility: deserializeTable(serialized.visibility) as unknown as GeometryWorld['visibility'], provenance: deserializeTable(serialized.provenance) as unknown as GeometryWorld['provenance'], confidence: deserializeTable(serialized.confidence) as unknown as GeometryWorld['confidence'], } } // --------------------------------------------------------------------------- // Cache key computation // --------------------------------------------------------------------------- function djb2Hash(str: string): string { let hash = 5381 for (let i = 0; i < str.length; i++) { hash = ((hash << 5) + hash + str.charCodeAt(i)) & 0xffffffff } return hash.toString(16).padStart(8, '0') } /** * Schema version for cache invalidation. Increment when the world schema * changes in a way that makes previously cached extraction results incompatible. */ export const WORLD_CACHE_SCHEMA_VERSION = 2 /** * Compute a stable cache key for a geometry extraction. * * The key incorporates: * - schema version (automatic invalidation on schema changes) * - page URL * - sorted selectors (so order doesn't matter) * - environment (viewport dimensions, color scheme, etc.) */ export function computeGeometryCacheKey( pageUrl: string, selectors: string[], env: Partial, ): string { const selectorsHash = djb2Hash([...selectors].sort().join('\x00')) const envHash = djb2Hash( JSON.stringify({ viewportWidth: env.viewportWidth ?? 0, viewportHeight: env.viewportHeight ?? 0, deviceScaleFactor: env.deviceScaleFactor ?? 1, colorScheme: env.colorScheme ?? 'light', pointer: env.pointer ?? 'fine', hover: env.hover ?? false, reducedMotion: env.reducedMotion ?? 'no-preference', locale: env.locale ?? 'en', writingMode: env.writingMode ?? 'horizontal-tb', }), ) const urlHash = djb2Hash(pageUrl) return `${WORLD_CACHE_SCHEMA_VERSION}-${urlHash}-${selectorsHash}-${envHash}` } // --------------------------------------------------------------------------- // File-based cache storage // --------------------------------------------------------------------------- import { existsSync, mkdirSync, rmSync, readdirSync, statSync } from 'node:fs' import { readFile, writeFile, unlink } from 'node:fs/promises' import { join } from 'node:path' const DEFAULT_CACHE_DIR = join(process.cwd(), '.imhotep-cache') const DEFAULT_MAX_CACHE_ENTRIES = 100 function cacheFilePath(cacheDir: string, cacheKey: string): string { return join(cacheDir, `${cacheKey}.json`) } /** * Evict oldest cache entries when exceeding maxEntries. */ function evictOldestEntries(cacheDir: string, maxEntries: number): void { if (!existsSync(cacheDir)) return const files = readdirSync(cacheDir) .filter((f) => f.endsWith('.json')) .map((f) => { const fullPath = join(cacheDir, f) try { return { name: f, path: fullPath, mtime: statSync(fullPath).mtimeMs } } catch (err) { console.warn(`[imhotep-core] cache eviction: stat failed for ${f}: ${err instanceof Error ? err.message : err}`) return null } }) .filter((f): f is NonNullable => f !== null) .sort((a, b) => a.mtime - b.mtime) if (files.length > maxEntries) { const toRemove = files.length - maxEntries for (let i = 0; i < toRemove; i++) { try { rmSync(files[i].path) } catch (err) { console.warn(`[imhotep-core] cache eviction: failed to remove ${files[i].path}: ${err instanceof Error ? err.message : err}`) } } } } /** * Read a cached GeometryWorld from disk (async, non-blocking). */ export async function readCachedWorld(cacheDir: string, cacheKey: string): Promise { const path = cacheFilePath(cacheDir, cacheKey) try { const json = await readFile(path, 'utf-8') return deserializeGeometryWorld(json) } catch (err) { console.warn(`[imhotep-core] readCachedWorld failed for ${cacheKey}: ${err instanceof Error ? err.message : err}. Cache miss.`) return null } } /** * Write a GeometryWorld to the cache on disk (async, non-blocking). */ export async function writeCachedWorld(cacheDir: string, cacheKey: string, world: GeometryWorld): Promise { mkdirSync(cacheDir, { recursive: true }) evictOldestEntries(cacheDir, DEFAULT_MAX_CACHE_ENTRIES) const path = cacheFilePath(cacheDir, cacheKey) await writeFile(path, serializeGeometryWorld(world)) } /** * Clear all cached geometry snapshots. */ export function clearGeometryCache(cacheDir: string = DEFAULT_CACHE_DIR): void { if (!existsSync(cacheDir)) { return } for (const file of readdirSync(cacheDir)) { if (file.endsWith('.json')) { try { rmSync(join(cacheDir, file)) } catch (err) { console.warn(`[imhotep-core] cache clear: failed to remove ${file}: ${err instanceof Error ? err.message : err}`) } } } } // --------------------------------------------------------------------------- // Full extraction result caching (world + selector mapping + errors) // --------------------------------------------------------------------------- interface CachedExtractionResult { version: number world: string selectorToIds: [string, number[]][] errors: Array<{ code: string severity: string category: string message: string source: string related: string[] fixHints: string[] metrics: Record sourceRef: Record clauseLabel?: string }> } /** * Serialize a full extraction result (world + selector mappings + errors). */ export function serializeExtractionResult( world: GeometryWorld, selectorToIds: Map, errors: Array<{ code: string severity: string category: string message: string source: string related: string[] fixHints: string[] metrics: Record sourceRef: Record clauseLabel?: string }>, ): string { const cached: CachedExtractionResult = { version: 1, world: serializeGeometryWorld(world), selectorToIds: Array.from(selectorToIds.entries()), errors, } return JSON.stringify(cached) } /** * Deserialize a full extraction result from JSON. */ export function deserializeExtractionResult(json: string): { world: GeometryWorld selectorToIds: Map errors: CachedExtractionResult['errors'] } { const cached = JSON.parse(json) as CachedExtractionResult if (cached.version !== 1) { throw new Error(`Unsupported extraction cache version: ${cached.version}`) } return { world: deserializeGeometryWorld(cached.world), selectorToIds: new Map(cached.selectorToIds), errors: cached.errors, } } /** * Read a cached extraction result from disk (async, non-blocking). */ export async function readCachedExtractionResult( cacheDir: string, cacheKey: string, ): Promise<{ world: GeometryWorld; selectorToIds: Map; errors: CachedExtractionResult['errors'] } | null> { const path = cacheFilePath(cacheDir, cacheKey) try { const json = await readFile(path, 'utf-8') return deserializeExtractionResult(json) } catch (err) { console.warn(`[imhotep-core] readCachedExtractionResult failed for cacheKey=${cacheKey}: ${err instanceof Error ? err.message : err}. Cache miss.`) return null } } /** * Write an extraction result to the cache on disk (async, non-blocking). */ export async function writeCachedExtractionResult( cacheDir: string, cacheKey: string, world: GeometryWorld, selectorToIds: Map, errors: CachedExtractionResult['errors'], ): Promise { mkdirSync(cacheDir, { recursive: true }) evictOldestEntries(cacheDir, DEFAULT_MAX_CACHE_ENTRIES) const path = cacheFilePath(cacheDir, cacheKey) await writeFile(path, serializeExtractionResult(world, selectorToIds, errors)) } /** * Return the default cache directory path. */ export function getDefaultCacheDir(): string { return DEFAULT_CACHE_DIR }