Use remote Chromium container instead of local Puppeteer launch
- Add browserless/chromium container to docker-compose - Add docker-compose.dev.yml for local dev (Chromium on port 3001) - Browser scraper connects via WebSocket (CHROMIUM_WS env var) - Falls back to local launch if CHROMIUM_WS not set - Remove Chromium install from Dockerfile (smaller image) - Auto-reconnect on browser disconnect Tested: remote Chromium connects in ~500ms, HG Spot scrapes in ~2.2s total. No longer blocks the Node.js event loop. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
PORT=3000
|
||||
DATABASE_PATH=./data/pricehunter.db
|
||||
STORES_DIR=./stores
|
||||
CHROMIUM_WS=ws://localhost:3001
|
||||
|
||||
@@ -19,9 +19,7 @@ RUN npx tsc
|
||||
FROM node:20-alpine
|
||||
WORKDIR /app
|
||||
|
||||
# Install Chromium for Puppeteer
|
||||
RUN apk add --no-cache chromium
|
||||
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
||||
# Skip Chromium download — we use a remote Chromium container
|
||||
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
||||
|
||||
COPY package*.json ./
|
||||
@@ -35,6 +33,7 @@ ENV NODE_ENV=production
|
||||
ENV PORT=3000
|
||||
ENV DATABASE_PATH=/app/data/pricehunter.db
|
||||
ENV STORES_DIR=/app/stores
|
||||
ENV CHROMIUM_WS=ws://chromium:3000
|
||||
|
||||
EXPOSE 3000
|
||||
VOLUME /app/data
|
||||
|
||||
9
docker-compose.dev.yml
Normal file
9
docker-compose.dev.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
services:
|
||||
chromium:
|
||||
image: ghcr.io/browserless/chromium
|
||||
ports:
|
||||
- "3001:3000"
|
||||
environment:
|
||||
- MAX_CONCURRENT_SESSIONS=5
|
||||
- CONNECTION_TIMEOUT=30000
|
||||
restart: unless-stopped
|
||||
@@ -10,5 +10,15 @@ services:
|
||||
- NODE_ENV=production
|
||||
- DATABASE_PATH=/app/data/pricehunter.db
|
||||
- STORES_DIR=/app/stores
|
||||
- CHROMIUM_WS=ws://chromium:3000
|
||||
- PORT=3000
|
||||
depends_on:
|
||||
- chromium
|
||||
restart: unless-stopped
|
||||
|
||||
chromium:
|
||||
image: ghcr.io/browserless/chromium
|
||||
environment:
|
||||
- MAX_CONCURRENT_SESSIONS=5
|
||||
- CONNECTION_TIMEOUT=30000
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -5,5 +5,6 @@ export const config = {
|
||||
host: process.env.HOST || '0.0.0.0',
|
||||
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
|
||||
storesDir: process.env.STORES_DIR || './stores',
|
||||
chromiumWs: process.env.CHROMIUM_WS || '',
|
||||
isProduction: process.env.NODE_ENV === 'production',
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { Browser } from 'puppeteer';
|
||||
import type { Store } from '../models/store.js';
|
||||
import type { ScrapedItem } from './result-parser.js';
|
||||
import { config } from '../config.js';
|
||||
|
||||
let browser: Browser | null = null;
|
||||
|
||||
@@ -14,20 +15,37 @@ function log(msg: string) {
|
||||
async function getBrowser(): Promise<Browser> {
|
||||
if (browser && browser.connected) return browser;
|
||||
|
||||
log('Launching Chromium...');
|
||||
const puppeteer = await import('puppeteer');
|
||||
browser = await puppeteer.default.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-extensions',
|
||||
'--no-first-run',
|
||||
],
|
||||
|
||||
if (config.chromiumWs) {
|
||||
// Connect to remote Chromium (Docker container)
|
||||
log(`Connecting to remote Chromium at ${config.chromiumWs}`);
|
||||
browser = await puppeteer.default.connect({
|
||||
browserWSEndpoint: config.chromiumWs,
|
||||
});
|
||||
log('Connected to remote Chromium');
|
||||
} else {
|
||||
// Fall back to local launch
|
||||
log('Launching local Chromium...');
|
||||
browser = await puppeteer.default.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-extensions',
|
||||
'--no-first-run',
|
||||
],
|
||||
});
|
||||
log('Local Chromium launched');
|
||||
}
|
||||
|
||||
// Reconnect if browser disconnects
|
||||
browser.on('disconnected', () => {
|
||||
log('Browser disconnected');
|
||||
browser = null;
|
||||
});
|
||||
log('Chromium launched');
|
||||
|
||||
return browser;
|
||||
}
|
||||
@@ -73,7 +91,6 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
|
||||
}
|
||||
log(`${store.name}: selector found`);
|
||||
|
||||
// Brief wait for remaining renders
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
|
||||
log(`${store.name}: extracting products...`);
|
||||
@@ -124,7 +141,11 @@ export async function warmupBrowser(): Promise<void> {
|
||||
|
||||
export async function closeBrowser(): Promise<void> {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
if (config.chromiumWs) {
|
||||
browser.disconnect();
|
||||
} else {
|
||||
await browser.close();
|
||||
}
|
||||
browser = null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user