Add Puppeteer browser scraping and HG Spot store config
- Add browser-scraper.ts using Puppeteer for JS-heavy stores - Add render_js flag to store model, migration, YAML sync, and UI - Scraper engine auto-selects cheerio vs Puppeteer based on flag - Store forms include JS rendering toggle in Advanced section - Create first store config: HG Spot (Croatian electronics retailer) - Update Dockerfile with Chromium for production Puppeteer support Tested: HG Spot returns 15 products per page with correct names, prices (EUR), links, and images using headless browser rendering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ export interface StoreFileConfig {
|
||||
base_url: string;
|
||||
search_url: string;
|
||||
enabled?: boolean;
|
||||
render_js?: boolean;
|
||||
category?: string;
|
||||
currency?: string;
|
||||
selectors: {
|
||||
@@ -98,7 +99,7 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
|
||||
if (existing) {
|
||||
db.run(`
|
||||
UPDATE stores SET
|
||||
name = ?, base_url = ?, search_url = ?, enabled = ?,
|
||||
name = ?, base_url = ?, search_url = ?, enabled = ?, render_js = ?,
|
||||
sel_container = ?, sel_name = ?, sel_price = ?, sel_link = ?, sel_image = ?,
|
||||
rate_limit = ?, rate_window = ?, proxy_url = ?, user_agent = ?, headers_json = ?,
|
||||
currency = ?, category_id = ?, updated_at = datetime('now')
|
||||
@@ -106,6 +107,7 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
|
||||
`, [
|
||||
config.name, config.base_url, config.search_url,
|
||||
config.enabled === false ? 0 : 1,
|
||||
config.render_js ? 1 : 0,
|
||||
config.selectors.container, config.selectors.name,
|
||||
config.selectors.price, config.selectors.link,
|
||||
config.selectors.image || null,
|
||||
@@ -117,14 +119,15 @@ export function syncFromFiles(storesDir: string): { created: number; updated: nu
|
||||
updated++;
|
||||
} else {
|
||||
db.run(`
|
||||
INSERT INTO stores (name, slug, base_url, search_url, enabled,
|
||||
INSERT INTO stores (name, slug, base_url, search_url, enabled, render_js,
|
||||
sel_container, sel_name, sel_price, sel_link, sel_image,
|
||||
rate_limit, rate_window, proxy_url, user_agent, headers_json,
|
||||
currency, category_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, [
|
||||
config.name, slug, config.base_url, config.search_url,
|
||||
config.enabled === false ? 0 : 1,
|
||||
config.render_js ? 1 : 0,
|
||||
config.selectors.container, config.selectors.name,
|
||||
config.selectors.price, config.selectors.link,
|
||||
config.selectors.image || null,
|
||||
@@ -161,6 +164,7 @@ function storeToConfig(store: any, categoryName?: string): StoreFileConfig {
|
||||
|
||||
if (store.sel_image) config.selectors.image = store.sel_image;
|
||||
if (store.enabled === 0) config.enabled = false;
|
||||
if (store.render_js) config.render_js = true;
|
||||
if (categoryName) config.category = categoryName;
|
||||
if (store.currency && store.currency !== 'EUR') config.currency = store.currency;
|
||||
if (store.rate_limit && store.rate_limit !== 2) config.rate_limit = store.rate_limit;
|
||||
|
||||
Reference in New Issue
Block a user