Fix Puppeteer hanging by using domcontentloaded instead of networkidle2
networkidle2 waits for all network activity to settle, which hangs on sites with analytics, trackers, and websockets. domcontentloaded fires much earlier, then waitForSelector handles the dynamic content. HG Spot now completes in ~2.5s instead of timing out. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,8 @@ import type { ScrapedItem } from './result-parser.js';
|
||||
|
||||
let browser: Browser | null = null;
|
||||
|
||||
const PAGE_TIMEOUT = 30_000;
|
||||
const NAVIGATION_TIMEOUT = 20_000;
|
||||
const SELECTOR_TIMEOUT = 15_000;
|
||||
const NAVIGATION_TIMEOUT = 15_000;
|
||||
|
||||
async function getBrowser(): Promise<Browser> {
|
||||
if (browser && browser.connected) return browser;
|
||||
@@ -52,7 +52,7 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
|
||||
|
||||
// Navigate to the page
|
||||
const response = await page.goto(searchUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: NAVIGATION_TIMEOUT,
|
||||
});
|
||||
|
||||
@@ -60,7 +60,7 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
|
||||
|
||||
// Wait for the product container to appear
|
||||
try {
|
||||
await page.waitForSelector(store.sel_container, { timeout: PAGE_TIMEOUT });
|
||||
await page.waitForSelector(store.sel_container, { timeout: SELECTOR_TIMEOUT });
|
||||
} catch {
|
||||
// Container might not exist if no results — return empty
|
||||
const html = await page.content();
|
||||
|
||||
Reference in New Issue
Block a user