Fix Puppeteer hanging by using domcontentloaded instead of networkidle2

networkidle2 waits for all network activity to settle, which hangs
on sites with analytics, trackers, and websockets. domcontentloaded
fires much earlier, then waitForSelector handles the dynamic content.
HG Spot now completes in ~2.5s instead of timing out.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 22:26:16 +01:00
parent b243e06175
commit 75b8759805

View File

@@ -4,8 +4,8 @@ import type { ScrapedItem } from './result-parser.js';
let browser: Browser | null = null; let browser: Browser | null = null;
const PAGE_TIMEOUT = 30_000; const SELECTOR_TIMEOUT = 15_000;
const NAVIGATION_TIMEOUT = 20_000; const NAVIGATION_TIMEOUT = 15_000;
async function getBrowser(): Promise<Browser> { async function getBrowser(): Promise<Browser> {
if (browser && browser.connected) return browser; if (browser && browser.connected) return browser;
@@ -52,7 +52,7 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
// Navigate to the page // Navigate to the page
const response = await page.goto(searchUrl, { const response = await page.goto(searchUrl, {
waitUntil: 'networkidle2', waitUntil: 'domcontentloaded',
timeout: NAVIGATION_TIMEOUT, timeout: NAVIGATION_TIMEOUT,
}); });
@@ -60,7 +60,7 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
// Wait for the product container to appear // Wait for the product container to appear
try { try {
await page.waitForSelector(store.sel_container, { timeout: PAGE_TIMEOUT }); await page.waitForSelector(store.sel_container, { timeout: SELECTOR_TIMEOUT });
} catch { } catch {
// Container might not exist if no results — return empty // Container might not exist if no results — return empty
const html = await page.content(); const html = await page.content();