From 4a67539a07c6c4d386fd303e4649b9d93d504108 Mon Sep 17 00:00:00 2001 From: mariosemes Date: Thu, 26 Mar 2026 23:36:19 +0100 Subject: [PATCH] Add stealth mode to browser scraper and Ronis.hr store - Switch puppeteer to puppeteer-extra with stealth plugin to bypass Cloudflare bot detection - Add Ronis.hr store config (JS-rendered, 48 products per page) - Stealth mode patches navigator.webdriver, chrome runtime, and other fingerprints that Cloudflare checks Co-Authored-By: Claude Opus 4.6 (1M context) --- package-lock.json | 477 ++++++++++++++++++++++++++ package.json | 2 + src/server/scraper/browser-scraper.ts | 18 +- stores/ronis-hr.yaml | 14 + 4 files changed, 502 insertions(+), 9 deletions(-) create mode 100644 stores/ronis-hr.yaml diff --git a/package-lock.json b/package-lock.json index 91bfbd6..d92fd21 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,8 @@ "fastify": "^5.2.1", "p-limit": "^6.2.0", "puppeteer": "^24.40.0", + "puppeteer-extra": "^3.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2", "sql.js": "^1.11.0", "yaml": "^2.8.3" }, @@ -1095,6 +1097,15 @@ "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", "license": "MIT" }, + "node_modules/@types/debug": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", + "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", + "license": "MIT", + "dependencies": { + "@types/ms": "*" + } + }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", @@ -1102,6 +1113,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/ms": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", + "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", + "license": "MIT" + }, "node_modules/@types/node": { "version": "22.19.15", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.15.tgz", @@ -1313,6 +1330,15 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", "license": "Python-2.0" }, + "node_modules/arr-union": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", + "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/assertion-error": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", @@ -1665,6 +1691,22 @@ "node": ">=12" } }, + "node_modules/clone-deep": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz", + "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==", + "license": "MIT", + "dependencies": { + "for-own": "^0.1.3", + "is-plain-object": "^2.0.1", + "kind-of": "^3.0.2", + "lazy-cache": "^1.0.3", + "shallow-clone": "^0.1.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -1690,6 +1732,12 @@ "dev": true, "license": "MIT" }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "license": "MIT" + }, "node_modules/concurrently": { "version": "9.2.1", "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz", @@ -1854,6 +1902,15 @@ "node": ">=6" } }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/degenerator": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", @@ -2345,6 +2402,27 @@ "node": ">=20" } }, + "node_modules/for-in": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz", + "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/for-own": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", + "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==", + "license": "MIT", + "dependencies": { + "for-in": "^1.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/foreground-child": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", @@ -2361,6 +2439,26 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/fs-extra": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", + "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "license": "ISC" + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -2451,6 +2549,12 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "license": "ISC" + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -2573,6 +2677,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "license": "ISC", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", @@ -2603,6 +2718,21 @@ "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", "license": "MIT" }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", + "license": "MIT" + }, + "node_modules/is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -2612,12 +2742,33 @@ "node": ">=8" } }, + "node_modules/is-plain-object": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", + "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", + "license": "MIT", + "dependencies": { + "isobject": "^3.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", "license": "ISC" }, + "node_modules/isobject": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/jackspeak": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.2.3.tgz", @@ -2692,6 +2843,39 @@ "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", "license": "MIT" }, + "node_modules/jsonfile": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", + "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==", + "license": "MIT", + "dependencies": { + "is-buffer": "^1.1.5" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/lazy-cache": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz", + "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/light-my-request": { "version": "6.6.0", "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-6.6.0.tgz", @@ -2761,6 +2945,20 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/merge-deep": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz", + "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==", + "license": "MIT", + "dependencies": { + "arr-union": "^3.1.0", + "clone-deep": "^0.2.4", + "kind-of": "^3.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/mime": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", @@ -2813,6 +3011,28 @@ "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", "license": "MIT" }, + "node_modules/mixin-object": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz", + "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==", + "license": "MIT", + "dependencies": { + "for-in": "^0.1.3", + "is-extendable": "^0.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/mixin-object/node_modules/for-in": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz", + "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/mnemonist": { "version": "0.40.0", "resolved": "https://registry.npmjs.org/mnemonist/-/mnemonist-0.40.0.tgz", @@ -3024,6 +3244,15 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/path-key": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", @@ -3244,6 +3473,7 @@ "integrity": "sha512-IxQbDq93XHVVLWHrAkFP7F7iHvb9o0mgfsSIMlhHb+JM+JjM1V4v4MNSQfcRWJopx9dsNOr9adYv0U5fm9BJBQ==", "hasInstallScript": true, "license": "Apache-2.0", + "peer": true, "dependencies": { "@puppeteer/browsers": "2.13.0", "chromium-bidi": "14.0.0", @@ -3277,6 +3507,143 @@ "node": ">=18" } }, + "node_modules/puppeteer-extra": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz", + "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==", + "license": "MIT", + "peer": true, + "dependencies": { + "@types/debug": "^4.1.0", + "debug": "^4.1.1", + "deepmerge": "^4.2.2" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "@types/puppeteer": "*", + "puppeteer": "*", + "puppeteer-core": "*" + }, + "peerDependenciesMeta": { + "@types/puppeteer": { + "optional": true + }, + "puppeteer": { + "optional": true + }, + "puppeteer-core": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz", + "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==", + "license": "MIT", + "dependencies": { + "@types/debug": "^4.1.0", + "debug": "^4.1.1", + "merge-deep": "^3.0.1" + }, + "engines": { + "node": ">=9.11.2" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-stealth": { + "version": "2.11.2", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz", + "integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.1.1", + "puppeteer-extra-plugin": "^3.2.3", + "puppeteer-extra-plugin-user-preferences": "^2.4.1" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-user-data-dir": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz", + "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==", + "license": "MIT", + "dependencies": { + "debug": "^4.1.1", + "fs-extra": "^10.0.0", + "puppeteer-extra-plugin": "^3.2.3", + "rimraf": "^3.0.2" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-user-preferences": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz", + "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==", + "license": "MIT", + "dependencies": { + "debug": "^4.1.1", + "deepmerge": "^4.2.2", + "puppeteer-extra-plugin": "^3.2.3", + "puppeteer-extra-plugin-user-data-dir": "^2.4.1" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, "node_modules/quick-format-unescaped": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", @@ -3354,6 +3721,71 @@ "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", "license": "MIT" }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", + "license": "ISC", + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "license": "MIT" + }, + "node_modules/rimraf/node_modules/brace-expansion": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/rimraf/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/minimatch": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, "node_modules/rollup": { "version": "4.60.0", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.0.tgz", @@ -3506,6 +3938,42 @@ "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", "license": "ISC" }, + "node_modules/shallow-clone": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz", + "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==", + "license": "MIT", + "dependencies": { + "is-extendable": "^0.1.1", + "kind-of": "^2.0.1", + "lazy-cache": "^0.2.3", + "mixin-object": "^2.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/shallow-clone/node_modules/kind-of": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz", + "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==", + "license": "MIT", + "dependencies": { + "is-buffer": "^1.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/shallow-clone/node_modules/lazy-cache": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz", + "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -3921,6 +4389,15 @@ "devOptional": true, "license": "MIT" }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/vite": { "version": "5.4.21", "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz", diff --git a/package.json b/package.json index 88c4dd5..9f8c87f 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,8 @@ "fastify": "^5.2.1", "p-limit": "^6.2.0", "puppeteer": "^24.40.0", + "puppeteer-extra": "^3.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2", "sql.js": "^1.11.0", "yaml": "^2.8.3" }, diff --git a/src/server/scraper/browser-scraper.ts b/src/server/scraper/browser-scraper.ts index a978002..c31604b 100644 --- a/src/server/scraper/browser-scraper.ts +++ b/src/server/scraper/browser-scraper.ts @@ -15,19 +15,19 @@ function log(msg: string) { async function getBrowser(): Promise { if (browser && browser.connected) return browser; - const puppeteer = await import('puppeteer'); + const puppeteerExtra = await import('puppeteer-extra'); + const StealthPlugin = await import('puppeteer-extra-plugin-stealth'); + puppeteerExtra.default.use(StealthPlugin.default()); if (config.chromiumWs) { - // Connect to remote Chromium (Docker container) log(`Connecting to remote Chromium at ${config.chromiumWs}`); - browser = await puppeteer.default.connect({ + browser = await puppeteerExtra.default.connect({ browserWSEndpoint: config.chromiumWs, - }); - log('Connected to remote Chromium'); + }) as unknown as Browser; + log('Connected to remote Chromium (stealth mode)'); } else { - // Fall back to local launch - log('Launching local Chromium...'); - browser = await puppeteer.default.launch({ + log('Launching local Chromium (stealth mode)...'); + browser = await puppeteerExtra.default.launch({ headless: true, args: [ '--no-sandbox', @@ -37,7 +37,7 @@ async function getBrowser(): Promise { '--disable-extensions', '--no-first-run', ], - }); + }) as unknown as Browser; log('Local Chromium launched'); } diff --git a/stores/ronis-hr.yaml b/stores/ronis-hr.yaml new file mode 100644 index 0000000..0d667df --- /dev/null +++ b/stores/ronis-hr.yaml @@ -0,0 +1,14 @@ +name: Ronis.hr +base_url: https://www.ronis.hr +search_url: https://www.ronis.hr/hr/rezultati-pretrage?s=1&keyword={query} +category: Electronics +currency: EUR +render_js: true +test_query: logitech + +selectors: + container: ".product-display-grid.item" + name: ".product-info h3" + price: ".product-price .price" + link: "a.product-display-grid-image" + image: ".image-wrapper img"