diff --git a/src/server/scraper/browser-scraper.ts b/src/server/scraper/browser-scraper.ts index 67c9173..c581eb0 100644 --- a/src/server/scraper/browser-scraper.ts +++ b/src/server/scraper/browser-scraper.ts @@ -7,9 +7,14 @@ let browser: Browser | null = null; const SELECTOR_TIMEOUT = 15_000; const NAVIGATION_TIMEOUT = 15_000; +function log(msg: string) { + console.log(`[browser-scraper] ${msg}`); +} + async function getBrowser(): Promise { if (browser && browser.connected) return browser; + log('Launching Chromium...'); const puppeteer = await import('puppeteer'); browser = await puppeteer.default.launch({ headless: true, @@ -22,6 +27,7 @@ async function getBrowser(): Promise { '--no-first-run', ], }); + log('Chromium launched'); return browser; } @@ -33,44 +39,44 @@ export interface BrowserScrapeResult { } export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): Promise { + log(`Scraping ${store.name}: ${searchUrl}`); const b = await getBrowser(); const page = await b.newPage(); try { - // Set user agent if configured if (store.user_agent) { await page.setUserAgent(store.user_agent); } - // Set extra headers if configured if (store.headers_json) { try { const headers = JSON.parse(store.headers_json); await page.setExtraHTTPHeaders(headers); - } catch { /* ignore invalid headers */ } + } catch { /* ignore */ } } - // Navigate to the page + log(`${store.name}: navigating...`); const response = await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT, }); const statusCode = response?.status() ?? 200; + log(`${store.name}: DOM loaded (status ${statusCode})`); - // Wait for the product container to appear + log(`${store.name}: waiting for selector "${store.sel_container}"...`); try { await page.waitForSelector(store.sel_container, { timeout: SELECTOR_TIMEOUT }); } catch { - // Container might not exist if no results — return empty - const html = await page.content(); - return { items: [], html, statusCode }; + log(`${store.name}: selector not found, returning empty`); + return { items: [], html: '', statusCode }; } + log(`${store.name}: selector found`); - // Small extra wait for any remaining renders - await new Promise((r) => setTimeout(r, 500)); + // Brief wait for remaining renders + await new Promise((r) => setTimeout(r, 300)); - // Extract product data from the rendered DOM + log(`${store.name}: extracting products...`); const items = await page.evaluate((selectors) => { const containers = document.querySelectorAll(selectors.container); const results: Array<{ name: string; priceText: string; link: string; image: string | null }> = []; @@ -100,11 +106,13 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P image: store.sel_image || null, }); - const html = await page.content(); + log(`${store.name}: found ${items.length} products`); - return { items, html, statusCode }; + return { items, html: '', statusCode }; } finally { + log(`${store.name}: closing page`); await page.close(); + log(`${store.name}: done`); } }