Add Puppeteer browser scraping and HG Spot store config

- Add browser-scraper.ts using Puppeteer for JS-heavy stores
- Add render_js flag to store model, migration, YAML sync, and UI
- Scraper engine auto-selects cheerio vs Puppeteer based on flag
- Store forms include JS rendering toggle in Advanced section
- Create first store config: HG Spot (Croatian electronics retailer)
- Update Dockerfile with Chromium for production Puppeteer support

Tested: HG Spot returns 15 products per page with correct names,
prices (EUR), links, and images using headless browser rendering.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 21:36:20 +01:00
parent 97fb8d9663
commit 130ab30fcc
13 changed files with 1037 additions and 39 deletions

View File

@@ -7,6 +7,7 @@ export interface Store {
base_url: string;
search_url: string;
enabled: number;
render_js: number;
sel_container: string;
sel_name: string;
sel_price: string;
@@ -38,6 +39,7 @@ export interface CreateStoreInput {
sel_price: string;
sel_link: string;
sel_image?: string;
render_js?: boolean;
rate_limit?: number;
rate_window?: number;
proxy_url?: string;
@@ -118,11 +120,12 @@ export function createStore(input: CreateStoreInput): Store {
db.run(`
INSERT INTO stores (name, slug, base_url, search_url, sel_container, sel_name, sel_price, sel_link, sel_image,
rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
render_js, rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, [
input.name, slug, input.base_url, input.search_url,
input.sel_container, input.sel_name, input.sel_price, input.sel_link, input.sel_image || null,
input.render_js ? 1 : 0,
input.rate_limit ?? 2, input.rate_window ?? 1000,
input.proxy_url || null, input.user_agent || null, input.headers_json || null,
input.currency || 'EUR', input.category_id || null,