Use remote Chromium container instead of local Puppeteer launch

- Add browserless/chromium container to docker-compose
- Add docker-compose.dev.yml for local dev (Chromium on port 3001)
- Browser scraper connects via WebSocket (CHROMIUM_WS env var)
- Falls back to local launch if CHROMIUM_WS not set
- Remove Chromium install from Dockerfile (smaller image)
- Auto-reconnect on browser disconnect

Tested: remote Chromium connects in ~500ms, HG Spot scrapes in
~2.2s total. No longer blocks the Node.js event loop.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 22:47:40 +01:00
parent 0e2e8d1766
commit a3ae3b248f
6 changed files with 58 additions and 17 deletions

View File

@@ -5,5 +5,6 @@ export const config = {
host: process.env.HOST || '0.0.0.0',
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
storesDir: process.env.STORES_DIR || './stores',
chromiumWs: process.env.CHROMIUM_WS || '',
isProduction: process.env.NODE_ENV === 'production',
};

View File

@@ -1,6 +1,7 @@
import type { Browser } from 'puppeteer';
import type { Store } from '../models/store.js';
import type { ScrapedItem } from './result-parser.js';
import { config } from '../config.js';
let browser: Browser | null = null;
@@ -14,20 +15,37 @@ function log(msg: string) {
async function getBrowser(): Promise<Browser> {
if (browser && browser.connected) return browser;
log('Launching Chromium...');
const puppeteer = await import('puppeteer');
browser = await puppeteer.default.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--disable-extensions',
'--no-first-run',
],
if (config.chromiumWs) {
// Connect to remote Chromium (Docker container)
log(`Connecting to remote Chromium at ${config.chromiumWs}`);
browser = await puppeteer.default.connect({
browserWSEndpoint: config.chromiumWs,
});
log('Connected to remote Chromium');
} else {
// Fall back to local launch
log('Launching local Chromium...');
browser = await puppeteer.default.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--disable-extensions',
'--no-first-run',
],
});
log('Local Chromium launched');
}
// Reconnect if browser disconnects
browser.on('disconnected', () => {
log('Browser disconnected');
browser = null;
});
log('Chromium launched');
return browser;
}
@@ -73,7 +91,6 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
}
log(`${store.name}: selector found`);
// Brief wait for remaining renders
await new Promise((r) => setTimeout(r, 300));
log(`${store.name}: extracting products...`);
@@ -124,7 +141,11 @@ export async function warmupBrowser(): Promise<void> {
export async function closeBrowser(): Promise<void> {
if (browser) {
await browser.close();
if (config.chromiumWs) {
browser.disconnect();
} else {
await browser.close();
}
browser = null;
}
}