Add Puppeteer browser scraping and HG Spot store config

- Add browser-scraper.ts using Puppeteer for JS-heavy stores
- Add render_js flag to store model, migration, YAML sync, and UI
- Scraper engine auto-selects cheerio vs Puppeteer based on flag
- Store forms include JS rendering toggle in Advanced section
- Create first store config: HG Spot (Croatian electronics retailer)
- Update Dockerfile with Chromium for production Puppeteer support

Tested: HG Spot returns 15 products per page with correct names,
prices (EUR), links, and images using headless browser rendering.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 21:36:20 +01:00
parent 97fb8d9663
commit 130ab30fcc
13 changed files with 1037 additions and 39 deletions

View File

@@ -12,7 +12,7 @@
let form = $state({
name: '', base_url: '', search_url: '',
sel_container: '', sel_name: '', sel_price: '', sel_link: '', sel_image: '',
category_id: '', currency: 'EUR', rate_limit: 2,
category_id: '', currency: 'EUR', rate_limit: 2, render_js: false,
user_agent: '', proxy_url: '', headers_json: '',
});
@@ -24,8 +24,8 @@
sel_container: store.sel_container, sel_name: store.sel_name, sel_price: store.sel_price,
sel_link: store.sel_link, sel_image: store.sel_image || '',
category_id: store.category_id?.toString() || '', currency: store.currency,
rate_limit: store.rate_limit, user_agent: store.user_agent || '',
proxy_url: store.proxy_url || '', headers_json: store.headers_json || '',
rate_limit: store.rate_limit, render_js: !!store.render_js,
user_agent: store.user_agent || '', headers_json: store.headers_json || '',
};
loading = false;
});
@@ -114,6 +114,17 @@
<section class="card p-5">
<h2 class="text-xs font-semibold text-text-primary uppercase tracking-wider mb-4">Advanced</h2>
<div class="flex items-center gap-3 mb-4 px-1">
<button type="button" onclick={() => form.render_js = !form.render_js}
class="w-8 h-[18px] rounded-full transition-colors relative {form.render_js ? 'bg-accent' : 'bg-surface-hover border border-surface-border'}"
aria-label="Toggle JavaScript rendering">
<span class="absolute top-[2px] w-[14px] h-[14px] bg-white rounded-full shadow transition-all {form.render_js ? 'right-[2px]' : 'left-[2px]'}"></span>
</button>
<div>
<span class="text-sm text-text-primary">JavaScript Rendering</span>
<p class="text-2xs text-text-tertiary">Use a headless browser for JS-heavy stores</p>
</div>
</div>
<div class="grid grid-cols-2 gap-4">
<div><label class="label">Rate Limit</label><input type="number" bind:value={form.rate_limit} min="1" max="10" class="input-field" /></div>
<div><label class="label">User Agent</label><input type="text" bind:value={form.user_agent} class="input-field" /></div>

View File

@@ -10,7 +10,7 @@
let form = $state({
name: '', base_url: '', search_url: '',
sel_container: '', sel_name: '', sel_price: '', sel_link: '', sel_image: '',
category_id: '', currency: 'EUR', rate_limit: 2,
category_id: '', currency: 'EUR', rate_limit: 2, render_js: false,
user_agent: '', proxy_url: '', headers_json: '',
});
@@ -111,6 +111,17 @@
<section class="card p-5">
<h2 class="text-xs font-semibold text-text-primary uppercase tracking-wider mb-4">Advanced</h2>
<div class="flex items-center gap-3 mb-4 px-1">
<button type="button" onclick={() => form.render_js = !form.render_js}
class="w-8 h-[18px] rounded-full transition-colors relative {form.render_js ? 'bg-accent' : 'bg-surface-hover border border-surface-border'}"
aria-label="Toggle JavaScript rendering">
<span class="absolute top-[2px] w-[14px] h-[14px] bg-white rounded-full shadow transition-all {form.render_js ? 'right-[2px]' : 'left-[2px]'}"></span>
</button>
<div>
<span class="text-sm text-text-primary">JavaScript Rendering</span>
<p class="text-2xs text-text-tertiary">Use a headless browser for JS-heavy stores (slower but handles dynamic content)</p>
</div>
</div>
<div class="grid grid-cols-2 gap-4">
<div>
<label class="label">Rate Limit (req/sec)</label>