Add Puppeteer browser scraping and HG Spot store config

- Add browser-scraper.ts using Puppeteer for JS-heavy stores
- Add render_js flag to store model, migration, YAML sync, and UI
- Scraper engine auto-selects cheerio vs Puppeteer based on flag
- Store forms include JS rendering toggle in Advanced section
- Create first store config: HG Spot (Croatian electronics retailer)
- Update Dockerfile with Chromium for production Puppeteer support

Tested: HG Spot returns 15 products per page with correct names,
prices (EUR), links, and images using headless browser rendering.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 21:36:20 +01:00
parent 97fb8d9663
commit 130ab30fcc
13 changed files with 1037 additions and 39 deletions

View File

@@ -8,6 +8,7 @@ export interface StoreFileConfig {
base_url: string;
search_url: string;
enabled?: boolean;
render_js?: boolean;
category?: string;
currency?: string;
selectors: {
@@ -98,7 +99,7 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
if (existing) {
db.run(`
UPDATE stores SET
name = ?, base_url = ?, search_url = ?, enabled = ?,
name = ?, base_url = ?, search_url = ?, enabled = ?, render_js = ?,
sel_container = ?, sel_name = ?, sel_price = ?, sel_link = ?, sel_image = ?,
rate_limit = ?, rate_window = ?, proxy_url = ?, user_agent = ?, headers_json = ?,
currency = ?, category_id = ?, updated_at = datetime('now')
@@ -106,6 +107,7 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
`, [
config.name, config.base_url, config.search_url,
config.enabled === false ? 0 : 1,
config.render_js ? 1 : 0,
config.selectors.container, config.selectors.name,
config.selectors.price, config.selectors.link,
config.selectors.image || null,
@@ -117,14 +119,15 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
updated++;
} else {
db.run(`
INSERT INTO stores (name, slug, base_url, search_url, enabled,
INSERT INTO stores (name, slug, base_url, search_url, enabled, render_js,
sel_container, sel_name, sel_price, sel_link, sel_image,
rate_limit, rate_window, proxy_url, user_agent, headers_json,
currency, category_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, [
config.name, slug, config.base_url, config.search_url,
config.enabled === false ? 0 : 1,
config.render_js ? 1 : 0,
config.selectors.container, config.selectors.name,
config.selectors.price, config.selectors.link,
config.selectors.image || null,
@@ -161,6 +164,7 @@ function storeToConfig(store: any, categoryName?: string): StoreFileConfig {
if (store.sel_image) config.selectors.image = store.sel_image;
if (store.enabled === 0) config.enabled = false;
if (store.render_js) config.render_js = true;
if (categoryName) config.category = categoryName;
if (store.currency && store.currency !== 'EUR') config.currency = store.currency;
if (store.rate_limit && store.rate_limit !== 2) config.rate_limit = store.rate_limit;