import type { Browser } from 'puppeteer'; import type { Store } from '../models/store.js'; import type { ScrapedItem } from './result-parser.js'; let browser: Browser | null = null; const SELECTOR_TIMEOUT = 15_000; const NAVIGATION_TIMEOUT = 15_000; function log(msg: string) { console.log(`[browser-scraper] ${msg}`); } async function getBrowser(): Promise { if (browser && browser.connected) return browser; log('Launching Chromium...'); const puppeteer = await import('puppeteer'); browser = await puppeteer.default.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--disable-extensions', '--no-first-run', ], }); log('Chromium launched'); return browser; } export interface BrowserScrapeResult { items: ScrapedItem[]; html: string; statusCode: number; } export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): Promise { log(`Scraping ${store.name}: ${searchUrl}`); const b = await getBrowser(); const page = await b.newPage(); try { if (store.user_agent) { await page.setUserAgent(store.user_agent); } if (store.headers_json) { try { const headers = JSON.parse(store.headers_json); await page.setExtraHTTPHeaders(headers); } catch { /* ignore */ } } log(`${store.name}: navigating...`); const response = await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT, }); const statusCode = response?.status() ?? 200; log(`${store.name}: DOM loaded (status ${statusCode})`); log(`${store.name}: waiting for selector "${store.sel_container}"...`); try { await page.waitForSelector(store.sel_container, { timeout: SELECTOR_TIMEOUT }); } catch { log(`${store.name}: selector not found, returning empty`); return { items: [], html: '', statusCode }; } log(`${store.name}: selector found`); // Brief wait for remaining renders await new Promise((r) => setTimeout(r, 300)); log(`${store.name}: extracting products...`); const items = await page.evaluate((selectors) => { const containers = document.querySelectorAll(selectors.container); const results: Array<{ name: string; priceText: string; link: string; image: string | null }> = []; containers.forEach((el) => { const nameEl = el.querySelector(selectors.name); const priceEl = el.querySelector(selectors.price); const linkEl = el.querySelector(selectors.link) as HTMLAnchorElement | null; const imageEl = selectors.image ? el.querySelector(selectors.image) as HTMLImageElement | null : null; const name = nameEl?.textContent?.trim() || ''; const priceText = priceEl?.textContent?.trim() || ''; const link = linkEl?.getAttribute('href') || ''; const image = imageEl?.getAttribute('src') || imageEl?.getAttribute('data-src') || null; if (name && priceText) { results.push({ name, priceText, link, image }); } }); return results; }, { container: store.sel_container, name: store.sel_name, price: store.sel_price, link: store.sel_link, image: store.sel_image || null, }); log(`${store.name}: found ${items.length} products`); return { items, html: '', statusCode }; } finally { log(`${store.name}: closing page`); await page.close(); log(`${store.name}: done`); } } export async function warmupBrowser(): Promise { log('Warming up browser...'); await getBrowser(); log('Browser ready'); } export async function closeBrowser(): Promise { if (browser) { await browser.close(); browser = null; } }