Add Puppeteer browser scraping and HG Spot store config
- Add browser-scraper.ts using Puppeteer for JS-heavy stores - Add render_js flag to store model, migration, YAML sync, and UI - Scraper engine auto-selects cheerio vs Puppeteer based on flag - Store forms include JS rendering toggle in Advanced section - Create first store config: HG Spot (Croatian electronics retailer) - Update Dockerfile with Chromium for production Puppeteer support Tested: HG Spot returns 15 products per page with correct names, prices (EUR), links, and images using headless browser rendering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ export interface Store {
|
||||
base_url: string;
|
||||
search_url: string;
|
||||
enabled: number;
|
||||
render_js: number;
|
||||
sel_container: string;
|
||||
sel_name: string;
|
||||
sel_price: string;
|
||||
@@ -38,6 +39,7 @@ export interface CreateStoreInput {
|
||||
sel_price: string;
|
||||
sel_link: string;
|
||||
sel_image?: string;
|
||||
render_js?: boolean;
|
||||
rate_limit?: number;
|
||||
rate_window?: number;
|
||||
proxy_url?: string;
|
||||
@@ -118,11 +120,12 @@ export function createStore(input: CreateStoreInput): Store {
|
||||
|
||||
db.run(`
|
||||
INSERT INTO stores (name, slug, base_url, search_url, sel_container, sel_name, sel_price, sel_link, sel_image,
|
||||
rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
render_js, rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, [
|
||||
input.name, slug, input.base_url, input.search_url,
|
||||
input.sel_container, input.sel_name, input.sel_price, input.sel_link, input.sel_image || null,
|
||||
input.render_js ? 1 : 0,
|
||||
input.rate_limit ?? 2, input.rate_window ?? 1000,
|
||||
input.proxy_url || null, input.user_agent || null, input.headers_json || null,
|
||||
input.currency || 'EUR', input.category_id || null,
|
||||
|
||||
Reference in New Issue
Block a user