Initial commit: Price Hunter — self-hosted price comparison engine
Complete application scaffolding with: - Backend: Node.js + Fastify + sql.js (SQLite) - Frontend: SvelteKit + Tailwind CSS - Scraper engine with parallel fan-out, rate limiting, cheerio-based parsing - Store management with CSS selector config and per-store test pages - Docker setup for single-command deployment Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
8
src/server/config.ts
Normal file
8
src/server/config.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import 'dotenv/config';
|
||||
|
||||
export const config = {
|
||||
port: parseInt(process.env.PORT || '3000', 10),
|
||||
host: process.env.HOST || '0.0.0.0',
|
||||
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
|
||||
isProduction: process.env.NODE_ENV === 'production',
|
||||
};
|
||||
60
src/server/db/connection.ts
Normal file
60
src/server/db/connection.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import initSqlJs, { type Database } from 'sql.js';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { config } from '../config.js';
|
||||
|
||||
let db: Database;
|
||||
|
||||
export async function initDatabase(): Promise<Database> {
|
||||
if (db) return db;
|
||||
|
||||
const SQL = await initSqlJs();
|
||||
|
||||
const dbDir = path.dirname(config.databasePath);
|
||||
if (!fs.existsSync(dbDir)) {
|
||||
fs.mkdirSync(dbDir, { recursive: true });
|
||||
}
|
||||
|
||||
if (fs.existsSync(config.databasePath)) {
|
||||
const buffer = fs.readFileSync(config.databasePath);
|
||||
db = new SQL.Database(buffer);
|
||||
} else {
|
||||
db = new SQL.Database();
|
||||
}
|
||||
|
||||
db.run('PRAGMA journal_mode = WAL');
|
||||
db.run('PRAGMA foreign_keys = ON');
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
export function getDatabase(): Database {
|
||||
if (!db) {
|
||||
throw new Error('Database not initialized. Call initDatabase() first.');
|
||||
}
|
||||
return db;
|
||||
}
|
||||
|
||||
export function saveDatabase(): void {
|
||||
if (!db) return;
|
||||
const data = db.export();
|
||||
const buffer = Buffer.from(data);
|
||||
fs.writeFileSync(config.databasePath, buffer);
|
||||
}
|
||||
|
||||
// Auto-save periodically
|
||||
let saveInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
export function startAutoSave(intervalMs = 5000): void {
|
||||
if (saveInterval) return;
|
||||
saveInterval = setInterval(() => {
|
||||
try { saveDatabase(); } catch { /* ignore */ }
|
||||
}, intervalMs);
|
||||
}
|
||||
|
||||
export function stopAutoSave(): void {
|
||||
if (saveInterval) {
|
||||
clearInterval(saveInterval);
|
||||
saveInterval = null;
|
||||
}
|
||||
}
|
||||
51
src/server/db/migrate.ts
Normal file
51
src/server/db/migrate.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { getDatabase, saveDatabase } from './connection.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
export function runMigrations(): void {
|
||||
const db = getDatabase();
|
||||
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS _migrations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
applied_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
const migrationsDir = path.join(__dirname, 'migrations');
|
||||
|
||||
// In production (compiled), migrations may be alongside the compiled JS
|
||||
const altMigrationsDir = path.join(__dirname, '..', '..', '..', 'src', 'server', 'db', 'migrations');
|
||||
const dir = fs.existsSync(migrationsDir) ? migrationsDir : (fs.existsSync(altMigrationsDir) ? altMigrationsDir : null);
|
||||
|
||||
if (!dir) {
|
||||
console.warn('No migrations directory found');
|
||||
return;
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(dir)
|
||||
.filter((f) => f.endsWith('.sql'))
|
||||
.sort();
|
||||
|
||||
const appliedStmt = db.prepare('SELECT name FROM _migrations');
|
||||
const applied = new Set<string>();
|
||||
while (appliedStmt.step()) {
|
||||
applied.add(appliedStmt.getAsObject().name as string);
|
||||
}
|
||||
appliedStmt.free();
|
||||
|
||||
for (const file of files) {
|
||||
if (applied.has(file)) continue;
|
||||
|
||||
const sql = fs.readFileSync(path.join(dir, file), 'utf-8');
|
||||
db.run(sql);
|
||||
db.run('INSERT INTO _migrations (name) VALUES (?)', [file]);
|
||||
console.log(`Migration applied: ${file}`);
|
||||
}
|
||||
|
||||
saveDatabase();
|
||||
}
|
||||
57
src/server/db/migrations/001_initial.sql
Normal file
57
src/server/db/migrations/001_initial.sql
Normal file
@@ -0,0 +1,57 @@
|
||||
CREATE TABLE categories (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
color TEXT DEFAULT '#6B7280',
|
||||
sort_order INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE stores (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
slug TEXT NOT NULL UNIQUE,
|
||||
base_url TEXT NOT NULL,
|
||||
search_url TEXT NOT NULL,
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
sel_container TEXT NOT NULL,
|
||||
sel_name TEXT NOT NULL,
|
||||
sel_price TEXT NOT NULL,
|
||||
sel_link TEXT NOT NULL,
|
||||
sel_image TEXT,
|
||||
rate_limit INTEGER DEFAULT 2,
|
||||
rate_window INTEGER DEFAULT 1000,
|
||||
proxy_url TEXT,
|
||||
user_agent TEXT,
|
||||
headers_json TEXT,
|
||||
currency TEXT DEFAULT 'EUR',
|
||||
category_id INTEGER REFERENCES categories(id) ON DELETE SET NULL,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE TABLE store_groups (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
description TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE store_group_members (
|
||||
group_id INTEGER REFERENCES store_groups(id) ON DELETE CASCADE,
|
||||
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (group_id, store_id)
|
||||
);
|
||||
|
||||
CREATE TABLE scrape_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
|
||||
query TEXT NOT NULL,
|
||||
success INTEGER NOT NULL,
|
||||
result_count INTEGER DEFAULT 0,
|
||||
duration_ms INTEGER,
|
||||
error_message TEXT,
|
||||
scraped_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX idx_scrape_logs_store_id ON scrape_logs(store_id);
|
||||
CREATE INDEX idx_scrape_logs_scraped_at ON scrape_logs(scraped_at);
|
||||
CREATE INDEX idx_stores_enabled ON stores(enabled);
|
||||
CREATE INDEX idx_stores_category_id ON stores(category_id);
|
||||
66
src/server/index.ts
Normal file
66
src/server/index.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import Fastify from 'fastify';
|
||||
import cors from '@fastify/cors';
|
||||
import fastifyStatic from '@fastify/static';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { config } from './config.js';
|
||||
import { initDatabase, startAutoSave, saveDatabase } from './db/connection.js';
|
||||
import { runMigrations } from './db/migrate.js';
|
||||
import { storeRoutes } from './routes/stores.js';
|
||||
import { categoryRoutes } from './routes/categories.js';
|
||||
import { searchRoutes } from './routes/search.js';
|
||||
import { testRoutes } from './routes/test.js';
|
||||
import { healthRoutes } from './routes/health.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const app = Fastify({
|
||||
logger: {
|
||||
level: config.isProduction ? 'info' : 'debug',
|
||||
transport: config.isProduction ? undefined : { target: 'pino-pretty' },
|
||||
},
|
||||
});
|
||||
|
||||
await app.register(cors, { origin: true });
|
||||
|
||||
// API routes
|
||||
await app.register(storeRoutes, { prefix: '/api' });
|
||||
await app.register(categoryRoutes, { prefix: '/api' });
|
||||
await app.register(searchRoutes, { prefix: '/api' });
|
||||
await app.register(testRoutes, { prefix: '/api' });
|
||||
await app.register(healthRoutes, { prefix: '/api' });
|
||||
|
||||
// Serve static frontend in production
|
||||
if (config.isProduction) {
|
||||
const clientPath = path.join(__dirname, '..', 'client');
|
||||
await app.register(fastifyStatic, {
|
||||
root: clientPath,
|
||||
wildcard: false,
|
||||
});
|
||||
|
||||
// SPA fallback: serve index.html for all non-API routes
|
||||
app.setNotFoundHandler((request, reply) => {
|
||||
if (request.url.startsWith('/api')) {
|
||||
reply.code(404).send({ error: 'Not found' });
|
||||
} else {
|
||||
reply.sendFile('index.html');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Initialize database and run migrations
|
||||
await initDatabase();
|
||||
runMigrations();
|
||||
startAutoSave();
|
||||
|
||||
// Save database on shutdown
|
||||
process.on('SIGINT', () => { saveDatabase(); process.exit(0); });
|
||||
process.on('SIGTERM', () => { saveDatabase(); process.exit(0); });
|
||||
|
||||
try {
|
||||
await app.listen({ port: config.port, host: config.host });
|
||||
app.log.info(`Price Hunter running at http://localhost:${config.port}`);
|
||||
} catch (err) {
|
||||
app.log.error(err);
|
||||
process.exit(1);
|
||||
}
|
||||
139
src/server/models/category.ts
Normal file
139
src/server/models/category.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
import { getDatabase, saveDatabase } from '../db/connection.js';
|
||||
|
||||
export interface Category {
|
||||
id: number;
|
||||
name: string;
|
||||
color: string;
|
||||
sort_order: number;
|
||||
}
|
||||
|
||||
export interface StoreGroup {
|
||||
id: number;
|
||||
name: string;
|
||||
description: string | null;
|
||||
}
|
||||
|
||||
export interface StoreGroupWithMembers extends StoreGroup {
|
||||
store_ids: number[];
|
||||
}
|
||||
|
||||
function queryAll(sql: string, params: any[] = []): any[] {
|
||||
const db = getDatabase();
|
||||
const stmt = db.prepare(sql);
|
||||
if (params.length) stmt.bind(params);
|
||||
const rows: any[] = [];
|
||||
while (stmt.step()) {
|
||||
rows.push(stmt.getAsObject());
|
||||
}
|
||||
stmt.free();
|
||||
return rows;
|
||||
}
|
||||
|
||||
function queryOne(sql: string, params: any[] = []): any | undefined {
|
||||
const rows = queryAll(sql, params);
|
||||
return rows[0];
|
||||
}
|
||||
|
||||
// Categories
|
||||
|
||||
export function getAllCategories(): Category[] {
|
||||
return queryAll('SELECT * FROM categories ORDER BY sort_order, name');
|
||||
}
|
||||
|
||||
export function getCategoryById(id: number): Category | undefined {
|
||||
return queryOne('SELECT * FROM categories WHERE id = ?', [id]);
|
||||
}
|
||||
|
||||
export function createCategory(name: string, color?: string): Category {
|
||||
const db = getDatabase();
|
||||
const maxOrder = queryOne('SELECT MAX(sort_order) as max_order FROM categories');
|
||||
const sortOrder = (maxOrder?.max_order ?? -1) + 1;
|
||||
db.run('INSERT INTO categories (name, color, sort_order) VALUES (?, ?, ?)', [name, color || '#6B7280', sortOrder]);
|
||||
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
|
||||
saveDatabase();
|
||||
return getCategoryById(lastId) as Category;
|
||||
}
|
||||
|
||||
export function updateCategory(id: number, data: { name?: string; color?: string; sort_order?: number }): Category | undefined {
|
||||
const db = getDatabase();
|
||||
const fields: string[] = [];
|
||||
const values: any[] = [];
|
||||
|
||||
if (data.name !== undefined) { fields.push('name = ?'); values.push(data.name); }
|
||||
if (data.color !== undefined) { fields.push('color = ?'); values.push(data.color); }
|
||||
if (data.sort_order !== undefined) { fields.push('sort_order = ?'); values.push(data.sort_order); }
|
||||
|
||||
if (fields.length === 0) return getCategoryById(id);
|
||||
|
||||
values.push(id);
|
||||
db.run(`UPDATE categories SET ${fields.join(', ')} WHERE id = ?`, values);
|
||||
saveDatabase();
|
||||
return getCategoryById(id);
|
||||
}
|
||||
|
||||
export function deleteCategory(id: number): boolean {
|
||||
const db = getDatabase();
|
||||
db.run('DELETE FROM categories WHERE id = ?', [id]);
|
||||
const changes = db.getRowsModified();
|
||||
if (changes > 0) saveDatabase();
|
||||
return changes > 0;
|
||||
}
|
||||
|
||||
// Groups
|
||||
|
||||
export function getAllGroups(): StoreGroupWithMembers[] {
|
||||
const groups = queryAll('SELECT * FROM store_groups ORDER BY name');
|
||||
return groups.map((group) => {
|
||||
const members = queryAll('SELECT store_id FROM store_group_members WHERE group_id = ?', [group.id]);
|
||||
return { ...group, store_ids: members.map((m: any) => m.store_id) };
|
||||
});
|
||||
}
|
||||
|
||||
export function getGroupById(id: number): StoreGroupWithMembers | undefined {
|
||||
const group = queryOne('SELECT * FROM store_groups WHERE id = ?', [id]);
|
||||
if (!group) return undefined;
|
||||
|
||||
const members = queryAll('SELECT store_id FROM store_group_members WHERE group_id = ?', [id]);
|
||||
return { ...group, store_ids: members.map((m: any) => m.store_id) };
|
||||
}
|
||||
|
||||
export function createGroup(name: string, description?: string): StoreGroupWithMembers {
|
||||
const db = getDatabase();
|
||||
db.run('INSERT INTO store_groups (name, description) VALUES (?, ?)', [name, description || null]);
|
||||
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
|
||||
saveDatabase();
|
||||
return getGroupById(lastId) as StoreGroupWithMembers;
|
||||
}
|
||||
|
||||
export function updateGroup(id: number, data: { name?: string; description?: string }): StoreGroupWithMembers | undefined {
|
||||
const db = getDatabase();
|
||||
const fields: string[] = [];
|
||||
const values: any[] = [];
|
||||
|
||||
if (data.name !== undefined) { fields.push('name = ?'); values.push(data.name); }
|
||||
if (data.description !== undefined) { fields.push('description = ?'); values.push(data.description); }
|
||||
|
||||
if (fields.length > 0) {
|
||||
values.push(id);
|
||||
db.run(`UPDATE store_groups SET ${fields.join(', ')} WHERE id = ?`, values);
|
||||
saveDatabase();
|
||||
}
|
||||
return getGroupById(id);
|
||||
}
|
||||
|
||||
export function deleteGroup(id: number): boolean {
|
||||
const db = getDatabase();
|
||||
db.run('DELETE FROM store_groups WHERE id = ?', [id]);
|
||||
const changes = db.getRowsModified();
|
||||
if (changes > 0) saveDatabase();
|
||||
return changes > 0;
|
||||
}
|
||||
|
||||
export function setGroupMembers(groupId: number, storeIds: number[]): void {
|
||||
const db = getDatabase();
|
||||
db.run('DELETE FROM store_group_members WHERE group_id = ?', [groupId]);
|
||||
for (const storeId of storeIds) {
|
||||
db.run('INSERT INTO store_group_members (group_id, store_id) VALUES (?, ?)', [groupId, storeId]);
|
||||
}
|
||||
saveDatabase();
|
||||
}
|
||||
85
src/server/models/scrape-log.ts
Normal file
85
src/server/models/scrape-log.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import { getDatabase, saveDatabase } from '../db/connection.js';
|
||||
|
||||
export interface ScrapeLog {
|
||||
id: number;
|
||||
store_id: number;
|
||||
query: string;
|
||||
success: number;
|
||||
result_count: number;
|
||||
duration_ms: number;
|
||||
error_message: string | null;
|
||||
scraped_at: string;
|
||||
}
|
||||
|
||||
function queryAll(sql: string, params: any[] = []): any[] {
|
||||
const db = getDatabase();
|
||||
const stmt = db.prepare(sql);
|
||||
if (params.length) stmt.bind(params);
|
||||
const rows: any[] = [];
|
||||
while (stmt.step()) {
|
||||
rows.push(stmt.getAsObject());
|
||||
}
|
||||
stmt.free();
|
||||
return rows;
|
||||
}
|
||||
|
||||
function queryOne(sql: string, params: any[] = []): any | undefined {
|
||||
const rows = queryAll(sql, params);
|
||||
return rows[0];
|
||||
}
|
||||
|
||||
export function logScrape(
|
||||
storeId: number,
|
||||
query: string,
|
||||
success: boolean,
|
||||
resultCount: number,
|
||||
durationMs: number,
|
||||
errorMessage?: string
|
||||
): ScrapeLog {
|
||||
const db = getDatabase();
|
||||
db.run(`
|
||||
INSERT INTO scrape_logs (store_id, query, success, result_count, duration_ms, error_message)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`, [storeId, query, success ? 1 : 0, resultCount, durationMs, errorMessage || null]);
|
||||
|
||||
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
|
||||
saveDatabase();
|
||||
return queryOne('SELECT * FROM scrape_logs WHERE id = ?', [lastId]) as ScrapeLog;
|
||||
}
|
||||
|
||||
export function getLogsByStore(storeId: number, limit = 20): ScrapeLog[] {
|
||||
return queryAll('SELECT * FROM scrape_logs WHERE store_id = ? ORDER BY scraped_at DESC LIMIT ?', [storeId, limit]);
|
||||
}
|
||||
|
||||
export function getStoreHealth(storeId: number): {
|
||||
total: number;
|
||||
successful: number;
|
||||
failed: number;
|
||||
avg_duration_ms: number;
|
||||
last_success: string | null;
|
||||
last_error: string | null;
|
||||
} {
|
||||
const stats = queryOne(`
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
|
||||
SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failed,
|
||||
AVG(duration_ms) as avg_duration_ms,
|
||||
MAX(CASE WHEN success = 1 THEN scraped_at END) as last_success
|
||||
FROM scrape_logs WHERE store_id = ?
|
||||
`, [storeId]);
|
||||
|
||||
const lastError = queryOne(
|
||||
'SELECT error_message FROM scrape_logs WHERE store_id = ? AND success = 0 ORDER BY scraped_at DESC LIMIT 1',
|
||||
[storeId]
|
||||
);
|
||||
|
||||
return {
|
||||
total: stats?.total || 0,
|
||||
successful: stats?.successful || 0,
|
||||
failed: stats?.failed || 0,
|
||||
avg_duration_ms: Math.round(stats?.avg_duration_ms || 0),
|
||||
last_success: stats?.last_success || null,
|
||||
last_error: lastError?.error_message || null,
|
||||
};
|
||||
}
|
||||
174
src/server/models/store.ts
Normal file
174
src/server/models/store.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { getDatabase, saveDatabase } from '../db/connection.js';
|
||||
|
||||
export interface Store {
|
||||
id: number;
|
||||
name: string;
|
||||
slug: string;
|
||||
base_url: string;
|
||||
search_url: string;
|
||||
enabled: number;
|
||||
sel_container: string;
|
||||
sel_name: string;
|
||||
sel_price: string;
|
||||
sel_link: string;
|
||||
sel_image: string | null;
|
||||
rate_limit: number;
|
||||
rate_window: number;
|
||||
proxy_url: string | null;
|
||||
user_agent: string | null;
|
||||
headers_json: string | null;
|
||||
currency: string;
|
||||
category_id: number | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export interface StoreWithCategory extends Store {
|
||||
category_name: string | null;
|
||||
category_color: string | null;
|
||||
}
|
||||
|
||||
export interface CreateStoreInput {
|
||||
name: string;
|
||||
slug?: string;
|
||||
base_url: string;
|
||||
search_url: string;
|
||||
sel_container: string;
|
||||
sel_name: string;
|
||||
sel_price: string;
|
||||
sel_link: string;
|
||||
sel_image?: string;
|
||||
rate_limit?: number;
|
||||
rate_window?: number;
|
||||
proxy_url?: string;
|
||||
user_agent?: string;
|
||||
headers_json?: string;
|
||||
currency?: string;
|
||||
category_id?: number;
|
||||
}
|
||||
|
||||
function queryAll(sql: string, params: any[] = []): any[] {
|
||||
const db = getDatabase();
|
||||
const stmt = db.prepare(sql);
|
||||
if (params.length) stmt.bind(params);
|
||||
const rows: any[] = [];
|
||||
while (stmt.step()) {
|
||||
rows.push(stmt.getAsObject());
|
||||
}
|
||||
stmt.free();
|
||||
return rows;
|
||||
}
|
||||
|
||||
function queryOne(sql: string, params: any[] = []): any | undefined {
|
||||
const rows = queryAll(sql, params);
|
||||
return rows[0];
|
||||
}
|
||||
|
||||
function slugify(text: string): string {
|
||||
return text
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
}
|
||||
|
||||
export function getAllStores(): StoreWithCategory[] {
|
||||
return queryAll(`
|
||||
SELECT s.*, c.name as category_name, c.color as category_color
|
||||
FROM stores s
|
||||
LEFT JOIN categories c ON s.category_id = c.id
|
||||
ORDER BY s.name
|
||||
`);
|
||||
}
|
||||
|
||||
export function getStoreById(id: number): StoreWithCategory | undefined {
|
||||
return queryOne(`
|
||||
SELECT s.*, c.name as category_name, c.color as category_color
|
||||
FROM stores s
|
||||
LEFT JOIN categories c ON s.category_id = c.id
|
||||
WHERE s.id = ?
|
||||
`, [id]);
|
||||
}
|
||||
|
||||
export function getEnabledStores(): Store[] {
|
||||
return queryAll('SELECT * FROM stores WHERE enabled = 1 ORDER BY name');
|
||||
}
|
||||
|
||||
export function getStoresByCategory(categoryId: number): Store[] {
|
||||
return queryAll('SELECT * FROM stores WHERE enabled = 1 AND category_id = ? ORDER BY name', [categoryId]);
|
||||
}
|
||||
|
||||
export function getStoresByGroup(groupId: number): Store[] {
|
||||
return queryAll(`
|
||||
SELECT s.* FROM stores s
|
||||
JOIN store_group_members sgm ON s.id = sgm.store_id
|
||||
WHERE s.enabled = 1 AND sgm.group_id = ?
|
||||
ORDER BY s.name
|
||||
`, [groupId]);
|
||||
}
|
||||
|
||||
export function getStoresByIds(ids: number[]): Store[] {
|
||||
if (ids.length === 0) return [];
|
||||
const placeholders = ids.map(() => '?').join(',');
|
||||
return queryAll(`SELECT * FROM stores WHERE enabled = 1 AND id IN (${placeholders}) ORDER BY name`, ids);
|
||||
}
|
||||
|
||||
export function createStore(input: CreateStoreInput): Store {
|
||||
const db = getDatabase();
|
||||
const slug = input.slug || slugify(input.name);
|
||||
|
||||
db.run(`
|
||||
INSERT INTO stores (name, slug, base_url, search_url, sel_container, sel_name, sel_price, sel_link, sel_image,
|
||||
rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, [
|
||||
input.name, slug, input.base_url, input.search_url,
|
||||
input.sel_container, input.sel_name, input.sel_price, input.sel_link, input.sel_image || null,
|
||||
input.rate_limit ?? 2, input.rate_window ?? 1000,
|
||||
input.proxy_url || null, input.user_agent || null, input.headers_json || null,
|
||||
input.currency || 'EUR', input.category_id || null,
|
||||
]);
|
||||
|
||||
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
|
||||
saveDatabase();
|
||||
return getStoreById(lastId) as Store;
|
||||
}
|
||||
|
||||
export function updateStore(id: number, input: Partial<CreateStoreInput>): Store | undefined {
|
||||
const existing = getStoreById(id);
|
||||
if (!existing) return undefined;
|
||||
|
||||
const db = getDatabase();
|
||||
const fields: string[] = [];
|
||||
const values: any[] = [];
|
||||
|
||||
for (const [key, value] of Object.entries(input)) {
|
||||
if (value !== undefined) {
|
||||
fields.push(`${key} = ?`);
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
if (fields.length === 0) return existing;
|
||||
|
||||
fields.push("updated_at = datetime('now')");
|
||||
values.push(id);
|
||||
|
||||
db.run(`UPDATE stores SET ${fields.join(', ')} WHERE id = ?`, values);
|
||||
saveDatabase();
|
||||
return getStoreById(id);
|
||||
}
|
||||
|
||||
export function toggleStoreEnabled(id: number): Store | undefined {
|
||||
const db = getDatabase();
|
||||
db.run("UPDATE stores SET enabled = CASE WHEN enabled = 1 THEN 0 ELSE 1 END, updated_at = datetime('now') WHERE id = ?", [id]);
|
||||
saveDatabase();
|
||||
return getStoreById(id);
|
||||
}
|
||||
|
||||
export function deleteStore(id: number): boolean {
|
||||
const db = getDatabase();
|
||||
db.run('DELETE FROM stores WHERE id = ?', [id]);
|
||||
const changes = db.getRowsModified();
|
||||
if (changes > 0) saveDatabase();
|
||||
return changes > 0;
|
||||
}
|
||||
98
src/server/routes/categories.ts
Normal file
98
src/server/routes/categories.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import type { FastifyPluginAsync } from 'fastify';
|
||||
import {
|
||||
getAllCategories, createCategory, updateCategory, deleteCategory,
|
||||
getAllGroups, createGroup, updateGroup, deleteGroup, setGroupMembers,
|
||||
} from '../models/category.js';
|
||||
|
||||
export const categoryRoutes: FastifyPluginAsync = async (app) => {
|
||||
// Categories
|
||||
app.get('/categories', async () => getAllCategories());
|
||||
|
||||
app.post<{ Body: { name: string; color?: string } }>('/categories', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['name'],
|
||||
properties: {
|
||||
name: { type: 'string', minLength: 1 },
|
||||
color: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request, reply) => {
|
||||
try {
|
||||
const category = createCategory(request.body.name, request.body.color);
|
||||
return reply.code(201).send(category);
|
||||
} catch (err: any) {
|
||||
if (err.message?.includes('UNIQUE constraint failed')) {
|
||||
return reply.code(409).send({ error: 'Category already exists' });
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
app.put<{ Params: { id: string }; Body: { name?: string; color?: string; sort_order?: number } }>('/categories/:id', async (request, reply) => {
|
||||
const result = updateCategory(Number(request.params.id), request.body);
|
||||
if (!result) return reply.code(404).send({ error: 'Category not found' });
|
||||
return result;
|
||||
});
|
||||
|
||||
app.delete<{ Params: { id: string } }>('/categories/:id', async (request, reply) => {
|
||||
const deleted = deleteCategory(Number(request.params.id));
|
||||
if (!deleted) return reply.code(404).send({ error: 'Category not found' });
|
||||
return reply.code(204).send();
|
||||
});
|
||||
|
||||
// Groups
|
||||
app.get('/groups', async () => getAllGroups());
|
||||
|
||||
app.post<{ Body: { name: string; description?: string } }>('/groups', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['name'],
|
||||
properties: {
|
||||
name: { type: 'string', minLength: 1 },
|
||||
description: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request, reply) => {
|
||||
try {
|
||||
const group = createGroup(request.body.name, request.body.description);
|
||||
return reply.code(201).send(group);
|
||||
} catch (err: any) {
|
||||
if (err.message?.includes('UNIQUE constraint failed')) {
|
||||
return reply.code(409).send({ error: 'Group already exists' });
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
app.put<{ Params: { id: string }; Body: { name?: string; description?: string } }>('/groups/:id', async (request, reply) => {
|
||||
const result = updateGroup(Number(request.params.id), request.body);
|
||||
if (!result) return reply.code(404).send({ error: 'Group not found' });
|
||||
return result;
|
||||
});
|
||||
|
||||
app.put<{ Params: { id: string }; Body: { store_ids: number[] } }>('/groups/:id/members', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['store_ids'],
|
||||
properties: {
|
||||
store_ids: { type: 'array', items: { type: 'number' } },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request, reply) => {
|
||||
setGroupMembers(Number(request.params.id), request.body.store_ids);
|
||||
return { success: true };
|
||||
});
|
||||
|
||||
app.delete<{ Params: { id: string } }>('/groups/:id', async (request, reply) => {
|
||||
const deleted = deleteGroup(Number(request.params.id));
|
||||
if (!deleted) return reply.code(404).send({ error: 'Group not found' });
|
||||
return reply.code(204).send();
|
||||
});
|
||||
};
|
||||
37
src/server/routes/health.ts
Normal file
37
src/server/routes/health.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import type { FastifyPluginAsync } from 'fastify';
|
||||
import fs from 'node:fs';
|
||||
import { config } from '../config.js';
|
||||
import { getDatabase } from '../db/connection.js';
|
||||
|
||||
function queryOne(sql: string, params: any[] = []): any | undefined {
|
||||
const db = getDatabase();
|
||||
const stmt = db.prepare(sql);
|
||||
if (params.length) stmt.bind(params);
|
||||
let result: any;
|
||||
if (stmt.step()) {
|
||||
result = stmt.getAsObject();
|
||||
}
|
||||
stmt.free();
|
||||
return result;
|
||||
}
|
||||
|
||||
export const healthRoutes: FastifyPluginAsync = async (app) => {
|
||||
app.get('/health', async () => {
|
||||
const storeCount = queryOne('SELECT COUNT(*) as count FROM stores')?.count ?? 0;
|
||||
const enabledCount = queryOne('SELECT COUNT(*) as count FROM stores WHERE enabled = 1')?.count ?? 0;
|
||||
|
||||
let dbSizeBytes = 0;
|
||||
try {
|
||||
const stats = fs.statSync(config.databasePath);
|
||||
dbSizeBytes = stats.size;
|
||||
} catch {
|
||||
// DB file may not exist yet
|
||||
}
|
||||
|
||||
return {
|
||||
status: 'ok',
|
||||
stores: { total: storeCount, enabled: enabledCount },
|
||||
database: { sizeBytes: dbSizeBytes, sizeMB: Math.round(dbSizeBytes / 1024 / 1024 * 100) / 100 },
|
||||
};
|
||||
});
|
||||
};
|
||||
39
src/server/routes/search.ts
Normal file
39
src/server/routes/search.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import type { FastifyPluginAsync } from 'fastify';
|
||||
import { search } from '../scraper/engine.js';
|
||||
|
||||
export const searchRoutes: FastifyPluginAsync = async (app) => {
|
||||
app.get<{
|
||||
Querystring: {
|
||||
q: string;
|
||||
stores?: string;
|
||||
category?: string;
|
||||
group?: string;
|
||||
};
|
||||
}>('/search', {
|
||||
schema: {
|
||||
querystring: {
|
||||
type: 'object',
|
||||
required: ['q'],
|
||||
properties: {
|
||||
q: { type: 'string', minLength: 1 },
|
||||
stores: { type: 'string' },
|
||||
category: { type: 'string' },
|
||||
group: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request) => {
|
||||
const { q, stores, category, group } = request.query;
|
||||
|
||||
const storeIds = stores
|
||||
? stores.split(',').map(Number).filter((n) => !isNaN(n))
|
||||
: undefined;
|
||||
|
||||
return search({
|
||||
query: q,
|
||||
storeIds,
|
||||
categoryId: category ? Number(category) : undefined,
|
||||
groupId: group ? Number(group) : undefined,
|
||||
});
|
||||
});
|
||||
};
|
||||
73
src/server/routes/stores.ts
Normal file
73
src/server/routes/stores.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import type { FastifyPluginAsync } from 'fastify';
|
||||
import { getAllStores, getStoreById, createStore, updateStore, toggleStoreEnabled, deleteStore } from '../models/store.js';
|
||||
import { getLogsByStore, getStoreHealth } from '../models/scrape-log.js';
|
||||
|
||||
export const storeRoutes: FastifyPluginAsync = async (app) => {
|
||||
app.get('/stores', async () => {
|
||||
return getAllStores();
|
||||
});
|
||||
|
||||
app.get<{ Params: { id: string } }>('/stores/:id', async (request, reply) => {
|
||||
const store = getStoreById(Number(request.params.id));
|
||||
if (!store) return reply.code(404).send({ error: 'Store not found' });
|
||||
|
||||
const health = getStoreHealth(store.id);
|
||||
const recentLogs = getLogsByStore(store.id, 10);
|
||||
return { ...store, health, recentLogs };
|
||||
});
|
||||
|
||||
app.post<{ Body: any }>('/stores', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['name', 'base_url', 'search_url', 'sel_container', 'sel_name', 'sel_price', 'sel_link'],
|
||||
properties: {
|
||||
name: { type: 'string', minLength: 1 },
|
||||
slug: { type: 'string' },
|
||||
base_url: { type: 'string', minLength: 1 },
|
||||
search_url: { type: 'string', minLength: 1 },
|
||||
sel_container: { type: 'string', minLength: 1 },
|
||||
sel_name: { type: 'string', minLength: 1 },
|
||||
sel_price: { type: 'string', minLength: 1 },
|
||||
sel_link: { type: 'string', minLength: 1 },
|
||||
sel_image: { type: 'string' },
|
||||
rate_limit: { type: 'number' },
|
||||
rate_window: { type: 'number' },
|
||||
proxy_url: { type: 'string' },
|
||||
user_agent: { type: 'string' },
|
||||
headers_json: { type: 'string' },
|
||||
currency: { type: 'string' },
|
||||
category_id: { type: 'number' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request, reply) => {
|
||||
try {
|
||||
const store = createStore(request.body);
|
||||
return reply.code(201).send(store);
|
||||
} catch (err: any) {
|
||||
if (err.message?.includes('UNIQUE constraint failed')) {
|
||||
return reply.code(409).send({ error: 'A store with this slug already exists' });
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
app.put<{ Params: { id: string }; Body: any }>('/stores/:id', async (request, reply) => {
|
||||
const store = updateStore(Number(request.params.id), request.body);
|
||||
if (!store) return reply.code(404).send({ error: 'Store not found' });
|
||||
return store;
|
||||
});
|
||||
|
||||
app.patch<{ Params: { id: string } }>('/stores/:id/toggle', async (request, reply) => {
|
||||
const store = toggleStoreEnabled(Number(request.params.id));
|
||||
if (!store) return reply.code(404).send({ error: 'Store not found' });
|
||||
return store;
|
||||
});
|
||||
|
||||
app.delete<{ Params: { id: string } }>('/stores/:id', async (request, reply) => {
|
||||
const deleted = deleteStore(Number(request.params.id));
|
||||
if (!deleted) return reply.code(404).send({ error: 'Store not found' });
|
||||
return reply.code(204).send();
|
||||
});
|
||||
};
|
||||
66
src/server/routes/test.ts
Normal file
66
src/server/routes/test.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { FastifyPluginAsync } from 'fastify';
|
||||
import { getStoreById } from '../models/store.js';
|
||||
import { logScrape, getLogsByStore, getStoreHealth } from '../models/scrape-log.js';
|
||||
import { scrapeStore } from '../scraper/http-scraper.js';
|
||||
import { normalizeResult } from '../scraper/result-parser.js';
|
||||
|
||||
export const testRoutes: FastifyPluginAsync = async (app) => {
|
||||
app.post<{
|
||||
Params: { id: string };
|
||||
Body: { query: string };
|
||||
}>('/stores/:id/test', {
|
||||
schema: {
|
||||
body: {
|
||||
type: 'object',
|
||||
required: ['query'],
|
||||
properties: {
|
||||
query: { type: 'string', minLength: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
}, async (request, reply) => {
|
||||
const store = getStoreById(Number(request.params.id));
|
||||
if (!store) return reply.code(404).send({ error: 'Store not found' });
|
||||
|
||||
const searchUrl = store.search_url.replace('{query}', encodeURIComponent(request.body.query));
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await scrapeStore(store, searchUrl);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
const products = result.items.map((item) =>
|
||||
normalizeResult(item, store.id, store.name, store.base_url, store.currency)
|
||||
);
|
||||
|
||||
logScrape(store.id, request.body.query, true, products.length, duration);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
searchUrl,
|
||||
statusCode: result.statusCode,
|
||||
duration,
|
||||
rawHtmlLength: result.html.length,
|
||||
rawHtmlPreview: result.html.substring(0, 5000),
|
||||
itemsFound: result.items.length,
|
||||
rawItems: result.items,
|
||||
parsedProducts: products,
|
||||
health: getStoreHealth(store.id),
|
||||
recentLogs: getLogsByStore(store.id, 10),
|
||||
};
|
||||
} catch (err) {
|
||||
const duration = Date.now() - startTime;
|
||||
const errorMessage = err instanceof Error ? err.message : String(err);
|
||||
logScrape(store.id, request.body.query, false, 0, duration, errorMessage);
|
||||
|
||||
return {
|
||||
success: false,
|
||||
searchUrl,
|
||||
duration,
|
||||
error: errorMessage,
|
||||
health: getStoreHealth(store.id),
|
||||
recentLogs: getLogsByStore(store.id, 10),
|
||||
};
|
||||
}
|
||||
});
|
||||
};
|
||||
120
src/server/scraper/engine.ts
Normal file
120
src/server/scraper/engine.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import pLimit from 'p-limit';
|
||||
import type { Store } from '../models/store.js';
|
||||
import { getEnabledStores, getStoresByCategory, getStoresByGroup, getStoresByIds } from '../models/store.js';
|
||||
import { logScrape } from '../models/scrape-log.js';
|
||||
import { scrapeStore } from './http-scraper.js';
|
||||
import { normalizeResult, type Product } from './result-parser.js';
|
||||
import { getLimiter } from './rate-limiter.js';
|
||||
|
||||
const MAX_CONCURRENCY = 5;
|
||||
const SEARCH_TIMEOUT = 60_000;
|
||||
|
||||
export interface SearchOptions {
|
||||
query: string;
|
||||
storeIds?: number[];
|
||||
categoryId?: number;
|
||||
groupId?: number;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
results: Product[];
|
||||
meta: {
|
||||
query: string;
|
||||
duration: number;
|
||||
storeCount: number;
|
||||
totalResults: number;
|
||||
errors: Array<{ storeId: number; storeName: string; error: string }>;
|
||||
};
|
||||
}
|
||||
|
||||
export async function search(options: SearchOptions): Promise<SearchResult> {
|
||||
const startTime = Date.now();
|
||||
const { query } = options;
|
||||
|
||||
// Determine which stores to scrape
|
||||
let stores: Store[];
|
||||
if (options.storeIds?.length) {
|
||||
stores = getStoresByIds(options.storeIds);
|
||||
} else if (options.groupId) {
|
||||
stores = getStoresByGroup(options.groupId);
|
||||
} else if (options.categoryId) {
|
||||
stores = getStoresByCategory(options.categoryId);
|
||||
} else {
|
||||
stores = getEnabledStores();
|
||||
}
|
||||
|
||||
if (stores.length === 0) {
|
||||
return {
|
||||
results: [],
|
||||
meta: { query, duration: Date.now() - startTime, storeCount: 0, totalResults: 0, errors: [] },
|
||||
};
|
||||
}
|
||||
|
||||
const limit = pLimit(MAX_CONCURRENCY);
|
||||
const errors: SearchResult['meta']['errors'] = [];
|
||||
const allProducts: Product[] = [];
|
||||
|
||||
// Create an overall timeout
|
||||
const timeoutPromise = new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Search timeout')), SEARCH_TIMEOUT)
|
||||
);
|
||||
|
||||
const scrapePromises = stores.map((store) =>
|
||||
limit(async () => {
|
||||
const searchUrl = store.search_url.replace('{query}', encodeURIComponent(query));
|
||||
const storeStart = Date.now();
|
||||
const rateLimiter = getLimiter(store.id, 1, Math.floor(store.rate_window / store.rate_limit));
|
||||
|
||||
try {
|
||||
const result = await rateLimiter.schedule(() => scrapeStore(store, searchUrl));
|
||||
const duration = Date.now() - storeStart;
|
||||
|
||||
const products = result.items.map((item) =>
|
||||
normalizeResult(item, store.id, store.name, store.base_url, store.currency)
|
||||
);
|
||||
|
||||
logScrape(store.id, query, true, products.length, duration);
|
||||
return products;
|
||||
} catch (err) {
|
||||
const duration = Date.now() - storeStart;
|
||||
const errorMessage = err instanceof Error ? err.message : String(err);
|
||||
logScrape(store.id, query, false, 0, duration, errorMessage);
|
||||
errors.push({ storeId: store.id, storeName: store.name, error: errorMessage });
|
||||
return [];
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
try {
|
||||
const results = await Promise.race([
|
||||
Promise.all(scrapePromises),
|
||||
timeoutPromise,
|
||||
]) as Product[][];
|
||||
|
||||
for (const products of results) {
|
||||
allProducts.push(...products);
|
||||
}
|
||||
} catch (err) {
|
||||
// Timeout — collect whatever we have
|
||||
errors.push({ storeId: 0, storeName: 'System', error: 'Search timed out' });
|
||||
}
|
||||
|
||||
// Sort by price ascending, nulls last
|
||||
allProducts.sort((a, b) => {
|
||||
if (a.price === null && b.price === null) return 0;
|
||||
if (a.price === null) return 1;
|
||||
if (b.price === null) return -1;
|
||||
return a.price - b.price;
|
||||
});
|
||||
|
||||
return {
|
||||
results: allProducts,
|
||||
meta: {
|
||||
query,
|
||||
duration: Date.now() - startTime,
|
||||
storeCount: stores.length,
|
||||
totalResults: allProducts.length,
|
||||
errors,
|
||||
},
|
||||
};
|
||||
}
|
||||
64
src/server/scraper/http-scraper.ts
Normal file
64
src/server/scraper/http-scraper.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import type { Store } from '../models/store.js';
|
||||
import type { ScrapedItem } from './result-parser.js';
|
||||
|
||||
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
const DEFAULT_TIMEOUT = 10_000;
|
||||
|
||||
export interface ScrapeResult {
|
||||
items: ScrapedItem[];
|
||||
html: string;
|
||||
statusCode: number;
|
||||
}
|
||||
|
||||
export async function scrapeStore(store: Store, searchUrl: string): Promise<ScrapeResult> {
|
||||
const headers: Record<string, string> = {
|
||||
'User-Agent': store.user_agent || DEFAULT_USER_AGENT,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
};
|
||||
|
||||
if (store.headers_json) {
|
||||
try {
|
||||
const extra = JSON.parse(store.headers_json);
|
||||
Object.assign(headers, extra);
|
||||
} catch {
|
||||
// Ignore invalid headers JSON
|
||||
}
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT);
|
||||
|
||||
try {
|
||||
const response = await fetch(searchUrl, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
redirect: 'follow',
|
||||
});
|
||||
|
||||
const html = await response.text();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const items: ScrapedItem[] = [];
|
||||
const containers = $(store.sel_container);
|
||||
|
||||
containers.each((_, el) => {
|
||||
const $el = $(el);
|
||||
const name = $el.find(store.sel_name).first().text().trim();
|
||||
const priceText = $el.find(store.sel_price).first().text().trim();
|
||||
const link = $el.find(store.sel_link).first().attr('href') || '';
|
||||
const image = store.sel_image
|
||||
? $el.find(store.sel_image).first().attr('src') || $el.find(store.sel_image).first().attr('data-src') || null
|
||||
: null;
|
||||
|
||||
if (name && priceText) {
|
||||
items.push({ name, priceText, link, image });
|
||||
}
|
||||
});
|
||||
|
||||
return { items, html, statusCode: response.status };
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
22
src/server/scraper/rate-limiter.ts
Normal file
22
src/server/scraper/rate-limiter.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import Bottleneck from 'bottleneck';
|
||||
|
||||
const limiters = new Map<number, Bottleneck>();
|
||||
|
||||
export function getLimiter(storeId: number, maxConcurrent: number, minTime: number): Bottleneck {
|
||||
let limiter = limiters.get(storeId);
|
||||
if (!limiter) {
|
||||
limiter = new Bottleneck({
|
||||
maxConcurrent,
|
||||
minTime,
|
||||
});
|
||||
limiters.set(storeId, limiter);
|
||||
}
|
||||
return limiter;
|
||||
}
|
||||
|
||||
export function clearLimiters(): void {
|
||||
for (const limiter of limiters.values()) {
|
||||
limiter.disconnect();
|
||||
}
|
||||
limiters.clear();
|
||||
}
|
||||
83
src/server/scraper/result-parser.ts
Normal file
83
src/server/scraper/result-parser.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
export interface ScrapedItem {
|
||||
name: string;
|
||||
priceText: string;
|
||||
link: string;
|
||||
image: string | null;
|
||||
}
|
||||
|
||||
export interface Product {
|
||||
name: string;
|
||||
price: number | null;
|
||||
priceText: string;
|
||||
currency: string;
|
||||
url: string;
|
||||
image: string | null;
|
||||
storeName: string;
|
||||
storeId: number;
|
||||
}
|
||||
|
||||
export function parsePrice(text: string): number | null {
|
||||
if (!text) return null;
|
||||
|
||||
const cleaned = text.trim().toLowerCase();
|
||||
if (cleaned === 'free' || cleaned === 'gratis') return 0;
|
||||
|
||||
// Handle range prices like "$12 - $15" — take the lower bound
|
||||
const rangeParts = cleaned.split(/\s*[-–]\s*/);
|
||||
const priceStr = rangeParts[0];
|
||||
|
||||
// Remove currency symbols and whitespace
|
||||
let normalized = priceStr.replace(/[^\d.,]/g, '').trim();
|
||||
|
||||
if (!normalized) return null;
|
||||
|
||||
// Determine decimal separator:
|
||||
// "1.299,00" or "1 299,00" → comma is decimal
|
||||
// "1,299.00" → period is decimal
|
||||
// "12,99" → comma is decimal (no thousands)
|
||||
// "12.99" → period is decimal
|
||||
|
||||
const lastComma = normalized.lastIndexOf(',');
|
||||
const lastPeriod = normalized.lastIndexOf('.');
|
||||
|
||||
if (lastComma > lastPeriod) {
|
||||
// Comma is the decimal separator (European style)
|
||||
normalized = normalized.replace(/\./g, '').replace(',', '.');
|
||||
} else if (lastPeriod > lastComma) {
|
||||
// Period is the decimal separator (US style)
|
||||
normalized = normalized.replace(/,/g, '');
|
||||
} else {
|
||||
// Only one type or neither
|
||||
normalized = normalized.replace(/,/g, '.');
|
||||
}
|
||||
|
||||
const parsed = parseFloat(normalized);
|
||||
return isNaN(parsed) ? null : Math.round(parsed * 100) / 100;
|
||||
}
|
||||
|
||||
export function normalizeUrl(href: string, baseUrl: string): string {
|
||||
if (!href) return baseUrl;
|
||||
|
||||
try {
|
||||
// Already absolute
|
||||
if (href.startsWith('http://') || href.startsWith('https://')) {
|
||||
return href;
|
||||
}
|
||||
return new URL(href, baseUrl).href;
|
||||
} catch {
|
||||
return baseUrl;
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizeResult(raw: ScrapedItem, storeId: number, storeName: string, baseUrl: string, currency: string): Product {
|
||||
return {
|
||||
name: raw.name.trim(),
|
||||
price: parsePrice(raw.priceText),
|
||||
priceText: raw.priceText.trim(),
|
||||
currency,
|
||||
url: normalizeUrl(raw.link, baseUrl),
|
||||
image: raw.image ? normalizeUrl(raw.image, baseUrl) : null,
|
||||
storeName,
|
||||
storeId,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user