Add Puppeteer browser scraping and HG Spot store config

- Add browser-scraper.ts using Puppeteer for JS-heavy stores
- Add render_js flag to store model, migration, YAML sync, and UI
- Scraper engine auto-selects cheerio vs Puppeteer based on flag
- Store forms include JS rendering toggle in Advanced section
- Create first store config: HG Spot (Croatian electronics retailer)
- Update Dockerfile with Chromium for production Puppeteer support

Tested: HG Spot returns 15 products per page with correct names,
prices (EUR), links, and images using headless browser rendering.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 21:36:20 +01:00
parent 97fb8d9663
commit 130ab30fcc
13 changed files with 1037 additions and 39 deletions

View File

@@ -3,6 +3,7 @@ import type { Store } from '../models/store.js';
import { getEnabledStores, getStoresByCategory, getStoresByGroup, getStoresByIds } from '../models/store.js';
import { logScrape } from '../models/scrape-log.js';
import { scrapeStore } from './http-scraper.js';
import { scrapeStoreWithBrowser } from './browser-scraper.js';
import { normalizeResult, type Product } from './result-parser.js';
import { getLimiter } from './rate-limiter.js';
@@ -66,7 +67,10 @@ export async function search(options: SearchOptions): Promise<SearchResult> {
const rateLimiter = getLimiter(store.id, 1, Math.floor(store.rate_window / store.rate_limit));
try {
const result = await rateLimiter.schedule(() => scrapeStore(store, searchUrl));
const scrapeFn = store.render_js
? () => scrapeStoreWithBrowser(store, searchUrl)
: () => scrapeStore(store, searchUrl);
const result = await rateLimiter.schedule(scrapeFn);
const duration = Date.now() - storeStart;
const products = result.items.map((item) =>