import puppeteer, { type Browser } from 'puppeteer'; import type { Store } from '../models/store.js'; import type { ScrapedItem } from './result-parser.js'; let browser: Browser | null = null; const PAGE_TIMEOUT = 30_000; const NAVIGATION_TIMEOUT = 20_000; async function getBrowser(): Promise { if (browser && browser.connected) return browser; browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--disable-extensions', '--no-first-run', ], }); return browser; } export interface BrowserScrapeResult { items: ScrapedItem[]; html: string; statusCode: number; } export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): Promise { const b = await getBrowser(); const page = await b.newPage(); try { // Set user agent if configured if (store.user_agent) { await page.setUserAgent(store.user_agent); } // Set extra headers if configured if (store.headers_json) { try { const headers = JSON.parse(store.headers_json); await page.setExtraHTTPHeaders(headers); } catch { /* ignore invalid headers */ } } // Navigate to the page const response = await page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: NAVIGATION_TIMEOUT, }); const statusCode = response?.status() ?? 200; // Wait for the product container to appear try { await page.waitForSelector(store.sel_container, { timeout: PAGE_TIMEOUT }); } catch { // Container might not exist if no results — return empty const html = await page.content(); return { items: [], html, statusCode }; } // Small extra wait for any remaining renders await new Promise((r) => setTimeout(r, 500)); // Extract product data from the rendered DOM const items = await page.evaluate((selectors) => { const containers = document.querySelectorAll(selectors.container); const results: Array<{ name: string; priceText: string; link: string; image: string | null }> = []; containers.forEach((el) => { const nameEl = el.querySelector(selectors.name); const priceEl = el.querySelector(selectors.price); const linkEl = el.querySelector(selectors.link) as HTMLAnchorElement | null; const imageEl = selectors.image ? el.querySelector(selectors.image) as HTMLImageElement | null : null; const name = nameEl?.textContent?.trim() || ''; const priceText = priceEl?.textContent?.trim() || ''; const link = linkEl?.getAttribute('href') || ''; const image = imageEl?.getAttribute('src') || imageEl?.getAttribute('data-src') || null; if (name && priceText) { results.push({ name, priceText, link, image }); } }); return results; }, { container: store.sel_container, name: store.sel_name, price: store.sel_price, link: store.sel_link, image: store.sel_image || null, }); const html = await page.content(); return { items, html, statusCode }; } finally { await page.close(); } } export async function closeBrowser(): Promise { if (browser) { await browser.close(); browser = null; } }