Use remote Chromium container instead of local Puppeteer launch
- Add browserless/chromium container to docker-compose - Add docker-compose.dev.yml for local dev (Chromium on port 3001) - Browser scraper connects via WebSocket (CHROMIUM_WS env var) - Falls back to local launch if CHROMIUM_WS not set - Remove Chromium install from Dockerfile (smaller image) - Auto-reconnect on browser disconnect Tested: remote Chromium connects in ~500ms, HG Spot scrapes in ~2.2s total. No longer blocks the Node.js event loop. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,5 +5,6 @@ export const config = {
|
||||
host: process.env.HOST || '0.0.0.0',
|
||||
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
|
||||
storesDir: process.env.STORES_DIR || './stores',
|
||||
chromiumWs: process.env.CHROMIUM_WS || '',
|
||||
isProduction: process.env.NODE_ENV === 'production',
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { Browser } from 'puppeteer';
|
||||
import type { Store } from '../models/store.js';
|
||||
import type { ScrapedItem } from './result-parser.js';
|
||||
import { config } from '../config.js';
|
||||
|
||||
let browser: Browser | null = null;
|
||||
|
||||
@@ -14,20 +15,37 @@ function log(msg: string) {
|
||||
async function getBrowser(): Promise<Browser> {
|
||||
if (browser && browser.connected) return browser;
|
||||
|
||||
log('Launching Chromium...');
|
||||
const puppeteer = await import('puppeteer');
|
||||
browser = await puppeteer.default.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-extensions',
|
||||
'--no-first-run',
|
||||
],
|
||||
|
||||
if (config.chromiumWs) {
|
||||
// Connect to remote Chromium (Docker container)
|
||||
log(`Connecting to remote Chromium at ${config.chromiumWs}`);
|
||||
browser = await puppeteer.default.connect({
|
||||
browserWSEndpoint: config.chromiumWs,
|
||||
});
|
||||
log('Connected to remote Chromium');
|
||||
} else {
|
||||
// Fall back to local launch
|
||||
log('Launching local Chromium...');
|
||||
browser = await puppeteer.default.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-extensions',
|
||||
'--no-first-run',
|
||||
],
|
||||
});
|
||||
log('Local Chromium launched');
|
||||
}
|
||||
|
||||
// Reconnect if browser disconnects
|
||||
browser.on('disconnected', () => {
|
||||
log('Browser disconnected');
|
||||
browser = null;
|
||||
});
|
||||
log('Chromium launched');
|
||||
|
||||
return browser;
|
||||
}
|
||||
@@ -73,7 +91,6 @@ export async function scrapeStoreWithBrowser(store: Store, searchUrl: string): P
|
||||
}
|
||||
log(`${store.name}: selector found`);
|
||||
|
||||
// Brief wait for remaining renders
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
|
||||
log(`${store.name}: extracting products...`);
|
||||
@@ -124,7 +141,11 @@ export async function warmupBrowser(): Promise<void> {
|
||||
|
||||
export async function closeBrowser(): Promise<void> {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
if (config.chromiumWs) {
|
||||
browser.disconnect();
|
||||
} else {
|
||||
await browser.close();
|
||||
}
|
||||
browser = null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user