Use remote Chromium container instead of local Puppeteer launch

- Add browserless/chromium container to docker-compose
- Add docker-compose.dev.yml for local dev (Chromium on port 3001)
- Browser scraper connects via WebSocket (CHROMIUM_WS env var)
- Falls back to local launch if CHROMIUM_WS not set
- Remove Chromium install from Dockerfile (smaller image)
- Auto-reconnect on browser disconnect

Tested: remote Chromium connects in ~500ms, HG Spot scrapes in
~2.2s total. No longer blocks the Node.js event loop.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 22:47:40 +01:00
parent 0e2e8d1766
commit a3ae3b248f
6 changed files with 58 additions and 17 deletions

View File

@@ -1,3 +1,4 @@
PORT=3000 PORT=3000
DATABASE_PATH=./data/pricehunter.db DATABASE_PATH=./data/pricehunter.db
STORES_DIR=./stores STORES_DIR=./stores
CHROMIUM_WS=ws://localhost:3001

View File

@@ -19,9 +19,7 @@ RUN npx tsc
FROM node:20-alpine FROM node:20-alpine
WORKDIR /app WORKDIR /app
# Install Chromium for Puppeteer # Skip Chromium download — we use a remote Chromium container
RUN apk add --no-cache chromium
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
COPY package*.json ./ COPY package*.json ./
@@ -35,6 +33,7 @@ ENV NODE_ENV=production
ENV PORT=3000 ENV PORT=3000
ENV DATABASE_PATH=/app/data/pricehunter.db ENV DATABASE_PATH=/app/data/pricehunter.db
ENV STORES_DIR=/app/stores ENV STORES_DIR=/app/stores
ENV CHROMIUM_WS=ws://chromium:3000
EXPOSE 3000 EXPOSE 3000
VOLUME /app/data VOLUME /app/data

9
docker-compose.dev.yml Normal file
View File

@@ -0,0 +1,9 @@
services:
chromium:
image: ghcr.io/browserless/chromium
ports:
- "3001:3000"
environment:
- MAX_CONCURRENT_SESSIONS=5
- CONNECTION_TIMEOUT=30000
restart: unless-stopped

View File

@@ -10,5 +10,15 @@ services:
- NODE_ENV=production - NODE_ENV=production
- DATABASE_PATH=/app/data/pricehunter.db - DATABASE_PATH=/app/data/pricehunter.db
- STORES_DIR=/app/stores - STORES_DIR=/app/stores
- CHROMIUM_WS=ws://chromium:3000
- PORT=3000 - PORT=3000
depends_on:
- chromium
restart: unless-stopped
chromium:
image: ghcr.io/browserless/chromium
environment:
- MAX_CONCURRENT_SESSIONS=5
- CONNECTION_TIMEOUT=30000
restart: unless-stopped restart: unless-stopped

View File

@@ -5,5 +5,6 @@ export const config = {
host: process.env.HOST || '0.0.0.0', host: process.env.HOST || '0.0.0.0',
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db', databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
storesDir: process.env.STORES_DIR || './stores', storesDir: process.env.STORES_DIR || './stores',
chromiumWs: process.env.CHROMIUM_WS || '',
isProduction: process.env.NODE_ENV === 'production', isProduction: process.env.NODE_ENV === 'production',
}; };

View File

@@ -1,6 +1,7 @@
import type { Browser } from 'puppeteer'; import type { Browser } from 'puppeteer';
import type { Store } from '../models/store.js'; import type { Store } from '../models/store.js';
import type { ScrapedItem } from './result-parser.js'; import type { ScrapedItem } from './result-parser.js';
import { config } from '../config.js';
let browser: Browser | null = null; let browser: Browser | null = null;
@@ -14,20 +15,37 @@ function log(msg: string) {
async function getBrowser(): Promise<Browser> { async function getBrowser(): Promise<Browser> {
if (browser && browser.connected) return browser; if (browser && browser.connected) return browser;
log('Launching Chromium...');
const puppeteer = await import('puppeteer'); const puppeteer = await import('puppeteer');
browser = await puppeteer.default.launch({
headless: true, if (config.chromiumWs) {
args: [ // Connect to remote Chromium (Docker container)
'--no-sandbox', log(`Connecting to remote Chromium at ${config.chromiumWs}`);
'--disable-setuid-sandbox', browser = await puppeteer.default.connect({
'--disable-dev-shm-usage', browserWSEndpoint: config.chromiumWs,
'--disable-gpu', });
'--disable-extensions', log('Connected to remote Chromium');
'--no-first-run', } else {
], // Fall back to local launch
log('Launching local Chromium...');
browser = await puppeteer.default.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--disable-extensions',
'--no-first-run',
],
});
log('Local Chromium launched');
}
// Reconnect if browser disconnects
browser.on('disconnected', () => {
log('Browser disconnected');
browser = null;
}); });
log('Chromium launched');
return browser; return browser;
} }
@@ -73,7 +91,6 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
} }
log(`${store.name}: selector found`); log(`${store.name}: selector found`);
// Brief wait for remaining renders
await new Promise((r) => setTimeout(r, 300)); await new Promise((r) => setTimeout(r, 300));
log(`${store.name}: extracting products...`); log(`${store.name}: extracting products...`);
@@ -124,7 +141,11 @@ export async function warmupBrowser(): Promise<void> {
export async function closeBrowser(): Promise<void> { export async function closeBrowser(): Promise<void> {
if (browser) { if (browser) {
await browser.close(); if (config.chromiumWs) {
browser.disconnect();
} else {
await browser.close();
}
browser = null; browser = null;
} }
} }