Use remote Chromium container instead of local Puppeteer launch

- Add browserless/chromium container to docker-compose
- Add docker-compose.dev.yml for local dev (Chromium on port 3001)
- Browser scraper connects via WebSocket (CHROMIUM_WS env var)
- Falls back to local launch if CHROMIUM_WS not set
- Remove Chromium install from Dockerfile (smaller image)
- Auto-reconnect on browser disconnect

Tested: remote Chromium connects in ~500ms, HG Spot scrapes in
~2.2s total. No longer blocks the Node.js event loop.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 22:47:40 +01:00
parent 0e2e8d1766
commit a3ae3b248f
6 changed files with 58 additions and 17 deletions

View File

@@ -1,3 +1,4 @@
PORT=3000
DATABASE_PATH=./data/pricehunter.db
STORES_DIR=./stores
CHROMIUM_WS=ws://localhost:3001

View File

@@ -19,9 +19,7 @@ RUN npx tsc
FROM node:20-alpine
WORKDIR /app
# Install Chromium for Puppeteer
RUN apk add --no-cache chromium
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
# Skip Chromium download — we use a remote Chromium container
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
COPY package*.json ./
@@ -35,6 +33,7 @@ ENV NODE_ENV=production
ENV PORT=3000
ENV DATABASE_PATH=/app/data/pricehunter.db
ENV STORES_DIR=/app/stores
ENV CHROMIUM_WS=ws://chromium:3000
EXPOSE 3000
VOLUME /app/data

9
docker-compose.dev.yml Normal file
View File

@@ -0,0 +1,9 @@
services:
chromium:
image: ghcr.io/browserless/chromium
ports:
- "3001:3000"
environment:
- MAX_CONCURRENT_SESSIONS=5
- CONNECTION_TIMEOUT=30000
restart: unless-stopped

View File

@@ -10,5 +10,15 @@ services:
- NODE_ENV=production
- DATABASE_PATH=/app/data/pricehunter.db
- STORES_DIR=/app/stores
- CHROMIUM_WS=ws://chromium:3000
- PORT=3000
depends_on:
- chromium
restart: unless-stopped
chromium:
image: ghcr.io/browserless/chromium
environment:
- MAX_CONCURRENT_SESSIONS=5
- CONNECTION_TIMEOUT=30000
restart: unless-stopped

View File

@@ -5,5 +5,6 @@ export const config = {
host: process.env.HOST || '0.0.0.0',
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
storesDir: process.env.STORES_DIR || './stores',
chromiumWs: process.env.CHROMIUM_WS || '',
isProduction: process.env.NODE_ENV === 'production',
};

View File

@@ -1,6 +1,7 @@
import type { Browser } from 'puppeteer';
import type { Store } from '../models/store.js';
import type { ScrapedItem } from './result-parser.js';
import { config } from '../config.js';
let browser: Browser | null = null;
@@ -14,8 +15,18 @@ function log(msg: string) {
async function getBrowser(): Promise<Browser> {
if (browser && browser.connected) return browser;
log('Launching Chromium...');
const puppeteer = await import('puppeteer');
if (config.chromiumWs) {
// Connect to remote Chromium (Docker container)
log(`Connecting to remote Chromium at ${config.chromiumWs}`);
browser = await puppeteer.default.connect({
browserWSEndpoint: config.chromiumWs,
});
log('Connected to remote Chromium');
} else {
// Fall back to local launch
log('Launching local Chromium...');
browser = await puppeteer.default.launch({
headless: true,
args: [
@@ -27,7 +38,14 @@ async function getBrowser(): Promise<Browser> {
'--no-first-run',
],
});
log('Chromium launched');
log('Local Chromium launched');
}
// Reconnect if browser disconnects
browser.on('disconnected', () => {
log('Browser disconnected');
browser = null;
});
return browser;
}
@@ -73,7 +91,6 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
}
log(`${store.name}: selector found`);
// Brief wait for remaining renders
await new Promise((r) => setTimeout(r, 300));
log(`${store.name}: extracting products...`);
@@ -124,7 +141,11 @@ export async function warmupBrowser(): Promise<void> {
export async function closeBrowser(): Promise<void> {
if (browser) {
if (config.chromiumWs) {
browser.disconnect();
} else {
await browser.close();
}
browser = null;
}
}