Add Puppeteer browser scraping and HG Spot store config
- Add browser-scraper.ts using Puppeteer for JS-heavy stores - Add render_js flag to store model, migration, YAML sync, and UI - Scraper engine auto-selects cheerio vs Puppeteer based on flag - Store forms include JS rendering toggle in Advanced section - Create first store config: HG Spot (Croatian electronics retailer) - Update Dockerfile with Chromium for production Puppeteer support Tested: HG Spot returns 15 products per page with correct names, prices (EUR), links, and images using headless browser rendering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,7 +12,7 @@
|
||||
let form = $state({
|
||||
name: '', base_url: '', search_url: '',
|
||||
sel_container: '', sel_name: '', sel_price: '', sel_link: '', sel_image: '',
|
||||
category_id: '', currency: 'EUR', rate_limit: 2,
|
||||
category_id: '', currency: 'EUR', rate_limit: 2, render_js: false,
|
||||
user_agent: '', proxy_url: '', headers_json: '',
|
||||
});
|
||||
|
||||
@@ -24,8 +24,8 @@
|
||||
sel_container: store.sel_container, sel_name: store.sel_name, sel_price: store.sel_price,
|
||||
sel_link: store.sel_link, sel_image: store.sel_image || '',
|
||||
category_id: store.category_id?.toString() || '', currency: store.currency,
|
||||
rate_limit: store.rate_limit, user_agent: store.user_agent || '',
|
||||
proxy_url: store.proxy_url || '', headers_json: store.headers_json || '',
|
||||
rate_limit: store.rate_limit, render_js: !!store.render_js,
|
||||
user_agent: store.user_agent || '', headers_json: store.headers_json || '',
|
||||
};
|
||||
loading = false;
|
||||
});
|
||||
@@ -114,6 +114,17 @@
|
||||
|
||||
<section class="card p-5">
|
||||
<h2 class="text-xs font-semibold text-text-primary uppercase tracking-wider mb-4">Advanced</h2>
|
||||
<div class="flex items-center gap-3 mb-4 px-1">
|
||||
<button type="button" onclick={() => form.render_js = !form.render_js}
|
||||
class="w-8 h-[18px] rounded-full transition-colors relative {form.render_js ? 'bg-accent' : 'bg-surface-hover border border-surface-border'}"
|
||||
aria-label="Toggle JavaScript rendering">
|
||||
<span class="absolute top-[2px] w-[14px] h-[14px] bg-white rounded-full shadow transition-all {form.render_js ? 'right-[2px]' : 'left-[2px]'}"></span>
|
||||
</button>
|
||||
<div>
|
||||
<span class="text-sm text-text-primary">JavaScript Rendering</span>
|
||||
<p class="text-2xs text-text-tertiary">Use a headless browser for JS-heavy stores</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div><label class="label">Rate Limit</label><input type="number" bind:value={form.rate_limit} min="1" max="10" class="input-field" /></div>
|
||||
<div><label class="label">User Agent</label><input type="text" bind:value={form.user_agent} class="input-field" /></div>
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
let form = $state({
|
||||
name: '', base_url: '', search_url: '',
|
||||
sel_container: '', sel_name: '', sel_price: '', sel_link: '', sel_image: '',
|
||||
category_id: '', currency: 'EUR', rate_limit: 2,
|
||||
category_id: '', currency: 'EUR', rate_limit: 2, render_js: false,
|
||||
user_agent: '', proxy_url: '', headers_json: '',
|
||||
});
|
||||
|
||||
@@ -111,6 +111,17 @@
|
||||
|
||||
<section class="card p-5">
|
||||
<h2 class="text-xs font-semibold text-text-primary uppercase tracking-wider mb-4">Advanced</h2>
|
||||
<div class="flex items-center gap-3 mb-4 px-1">
|
||||
<button type="button" onclick={() => form.render_js = !form.render_js}
|
||||
class="w-8 h-[18px] rounded-full transition-colors relative {form.render_js ? 'bg-accent' : 'bg-surface-hover border border-surface-border'}"
|
||||
aria-label="Toggle JavaScript rendering">
|
||||
<span class="absolute top-[2px] w-[14px] h-[14px] bg-white rounded-full shadow transition-all {form.render_js ? 'right-[2px]' : 'left-[2px]'}"></span>
|
||||
</button>
|
||||
<div>
|
||||
<span class="text-sm text-text-primary">JavaScript Rendering</span>
|
||||
<p class="text-2xs text-text-tertiary">Use a headless browser for JS-heavy stores (slower but handles dynamic content)</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<label class="label">Rate Limit (req/sec)</label>
|
||||
|
||||
Reference in New Issue
Block a user