Add Puppeteer browser scraping and HG Spot store config
- Add browser-scraper.ts using Puppeteer for JS-heavy stores - Add render_js flag to store model, migration, YAML sync, and UI - Scraper engine auto-selects cheerio vs Puppeteer based on flag - Store forms include JS rendering toggle in Advanced section - Create first store config: HG Spot (Croatian electronics retailer) - Update Dockerfile with Chromium for production Puppeteer support Tested: HG Spot returns 15 products per page with correct names, prices (EUR), links, and images using headless browser rendering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ import type { Store } from '../models/store.js';
|
||||
import { getEnabledStores, getStoresByCategory, getStoresByGroup, getStoresByIds } from '../models/store.js';
|
||||
import { logScrape } from '../models/scrape-log.js';
|
||||
import { scrapeStore } from './http-scraper.js';
|
||||
import { scrapeStoreWithBrowser } from './browser-scraper.js';
|
||||
import { normalizeResult, type Product } from './result-parser.js';
|
||||
import { getLimiter } from './rate-limiter.js';
|
||||
|
||||
@@ -66,7 +67,10 @@ export async function search(options: SearchOptions): Promise<SearchResult> {
|
||||
const rateLimiter = getLimiter(store.id, 1, Math.floor(store.rate_window / store.rate_limit));
|
||||
|
||||
try {
|
||||
const result = await rateLimiter.schedule(() => scrapeStore(store, searchUrl));
|
||||
const scrapeFn = store.render_js
|
||||
? () => scrapeStoreWithBrowser(store, searchUrl)
|
||||
: () => scrapeStore(store, searchUrl);
|
||||
const result = await rateLimiter.schedule(scrapeFn);
|
||||
const duration = Date.now() - storeStart;
|
||||
|
||||
const products = result.items.map((item) =>
|
||||
|
||||
Reference in New Issue
Block a user