diff --git a/package-lock.json b/package-lock.json index d92fd21..813d61a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,7 +15,7 @@ "dotenv": "^16.4.7", "fastify": "^5.2.1", "p-limit": "^6.2.0", - "puppeteer": "^24.40.0", + "puppeteer-core": "^24.40.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "sql.js": "^1.11.0", @@ -30,29 +30,6 @@ "vitest": "^2.1.8" } }, - "node_modules/@babel/code-frame": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", - "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.28.5", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.28.5", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", - "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.4", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", @@ -1324,12 +1301,6 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "license": "Python-2.0" - }, "node_modules/arr-union": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", @@ -1556,15 +1527,6 @@ "node": ">=8" } }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, "node_modules/chai": { "version": "5.3.3", "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", @@ -1788,32 +1750,6 @@ "url": "https://opencollective.com/express" } }, - "node_modules/cosmiconfig": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", - "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", - "license": "MIT", - "dependencies": { - "env-paths": "^2.2.1", - "import-fresh": "^3.3.0", - "js-yaml": "^4.1.0", - "parse-json": "^5.2.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/d-fischer" - }, - "peerDependencies": { - "typescript": ">=4.9.5" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -1944,9 +1880,9 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1581282", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", - "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", + "version": "0.0.1604597", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1604597.tgz", + "integrity": "sha512-7DH4+FDIwg5AxeW+kvFb5qxJuDLSNK2S9FurqLpggMrUxS3tlvN/J2kP6uOghn584shRnvKheKSSvS4bgnzWYA==", "license": "BSD-3-Clause", "peer": true }, @@ -2057,24 +1993,6 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, - "node_modules/env-paths": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", - "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/error-ex": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", - "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, "node_modules/es-module-lexer": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", @@ -2661,22 +2579,6 @@ "node": ">=0.10.0" } }, - "node_modules/import-fresh": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", - "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", - "license": "MIT", - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -2712,12 +2614,6 @@ "node": ">= 10" } }, - "node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", - "license": "MIT" - }, "node_modules/is-buffer": { "version": "1.1.6", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", @@ -2794,30 +2690,6 @@ "node": ">=10" } }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "license": "MIT" - }, - "node_modules/js-yaml": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", - "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", - "license": "MIT", - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/json-parse-even-better-errors": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", - "license": "MIT" - }, "node_modules/json-schema-ref-resolver": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/json-schema-ref-resolver/-/json-schema-ref-resolver-3.0.0.tgz", @@ -2913,12 +2785,6 @@ ], "license": "MIT" }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "license": "MIT" - }, "node_modules/loupe": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", @@ -3165,36 +3031,6 @@ "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", "license": "BlueOak-1.0.0" }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "license": "MIT", - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/parse5": { "version": "7.3.0", "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", @@ -3305,6 +3141,7 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, "license": "ISC" }, "node_modules/pino": { @@ -3467,33 +3304,12 @@ "once": "^1.3.1" } }, - "node_modules/puppeteer": { - "version": "24.40.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-24.40.0.tgz", - "integrity": "sha512-IxQbDq93XHVVLWHrAkFP7F7iHvb9o0mgfsSIMlhHb+JM+JjM1V4v4MNSQfcRWJopx9dsNOr9adYv0U5fm9BJBQ==", - "hasInstallScript": true, - "license": "Apache-2.0", - "peer": true, - "dependencies": { - "@puppeteer/browsers": "2.13.0", - "chromium-bidi": "14.0.0", - "cosmiconfig": "^9.0.0", - "devtools-protocol": "0.0.1581282", - "puppeteer-core": "24.40.0", - "typed-query-selector": "^2.12.1" - }, - "bin": { - "puppeteer": "lib/cjs/puppeteer/node/cli.js" - }, - "engines": { - "node": ">=18" - } - }, "node_modules/puppeteer-core": { "version": "24.40.0", "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.40.0.tgz", "integrity": "sha512-MWL3XbUCfVgGR0gRsidzT6oKJT2QydPLhMITU6HoVWiiv4gkb6gJi3pcdAa8q4HwjBTbqISOWVP4aJiiyUJvag==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@puppeteer/browsers": "2.13.0", "chromium-bidi": "14.0.0", @@ -3507,6 +3323,12 @@ "node": ">=18" } }, + "node_modules/puppeteer-core/node_modules/devtools-protocol": { + "version": "0.0.1581282", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", + "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", + "license": "BSD-3-Clause" + }, "node_modules/puppeteer-extra": { "version": "3.3.6", "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz", @@ -3677,15 +3499,6 @@ "node": ">=0.10.0" } }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "license": "MIT", - "engines": { - "node": ">=4" - } - }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -4362,9 +4175,8 @@ "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", - "devOptional": true, + "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/package.json b/package.json index 9f8c87f..1f9e310 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "dotenv": "^16.4.7", "fastify": "^5.2.1", "p-limit": "^6.2.0", - "puppeteer": "^24.40.0", + "puppeteer-core": "^24.40.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "sql.js": "^1.11.0", diff --git a/src/server/scraper/browser-scraper.ts b/src/server/scraper/browser-scraper.ts index c31604b..ca5b30e 100644 --- a/src/server/scraper/browser-scraper.ts +++ b/src/server/scraper/browser-scraper.ts @@ -1,4 +1,4 @@ -import type { Browser } from 'puppeteer'; +import type { Browser } from 'puppeteer-core'; import type { Store } from '../models/store.js'; import type { ScrapedItem } from './result-parser.js'; import { config } from '../config.js'; @@ -15,31 +15,19 @@ function log(msg: string) { async function getBrowser(): Promise { if (browser && browser.connected) return browser; + if (!config.chromiumWs) { + throw new Error('CHROMIUM_WS environment variable is required. Run: docker compose -f docker-compose.dev.yml up -d'); + } + const puppeteerExtra = await import('puppeteer-extra'); const StealthPlugin = await import('puppeteer-extra-plugin-stealth'); puppeteerExtra.default.use(StealthPlugin.default()); - if (config.chromiumWs) { - log(`Connecting to remote Chromium at ${config.chromiumWs}`); - browser = await puppeteerExtra.default.connect({ - browserWSEndpoint: config.chromiumWs, - }) as unknown as Browser; - log('Connected to remote Chromium (stealth mode)'); - } else { - log('Launching local Chromium (stealth mode)...'); - browser = await puppeteerExtra.default.launch({ - headless: true, - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-gpu', - '--disable-extensions', - '--no-first-run', - ], - }) as unknown as Browser; - log('Local Chromium launched'); - } + log(`Connecting to Chromium at ${config.chromiumWs}`); + browser = await puppeteerExtra.default.connect({ + browserWSEndpoint: config.chromiumWs, + }) as unknown as Browser; + log('Connected (stealth mode)'); // Reconnect if browser disconnects browser.on('disconnected', () => { @@ -141,11 +129,7 @@ export async function warmupBrowser(): Promise { export async function closeBrowser(): Promise { if (browser) { - if (config.chromiumWs) { - browser.disconnect(); - } else { - await browser.close(); - } + browser.disconnect(); browser = null; } }