Initial commit: Price Hunter — self-hosted price comparison engine

Complete application scaffolding with:
- Backend: Node.js + Fastify + sql.js (SQLite)
- Frontend: SvelteKit + Tailwind CSS
- Scraper engine with parallel fan-out, rate limiting, cheerio-based parsing
- Store management with CSS selector config and per-store test pages
- Docker setup for single-command deployment

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 20:54:52 +01:00
commit e0f67d0835
47 changed files with 9181 additions and 0 deletions

8
src/server/config.ts Normal file
View File

@@ -0,0 +1,8 @@
import 'dotenv/config';
export const config = {
port: parseInt(process.env.PORT || '3000', 10),
host: process.env.HOST || '0.0.0.0',
databasePath: process.env.DATABASE_PATH || './data/pricehunter.db',
isProduction: process.env.NODE_ENV === 'production',
};

View File

@@ -0,0 +1,60 @@
import initSqlJs, { type Database } from 'sql.js';
import fs from 'node:fs';
import path from 'node:path';
import { config } from '../config.js';
let db: Database;
export async function initDatabase(): Promise<Database> {
if (db) return db;
const SQL = await initSqlJs();
const dbDir = path.dirname(config.databasePath);
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
}
if (fs.existsSync(config.databasePath)) {
const buffer = fs.readFileSync(config.databasePath);
db = new SQL.Database(buffer);
} else {
db = new SQL.Database();
}
db.run('PRAGMA journal_mode = WAL');
db.run('PRAGMA foreign_keys = ON');
return db;
}
export function getDatabase(): Database {
if (!db) {
throw new Error('Database not initialized. Call initDatabase() first.');
}
return db;
}
export function saveDatabase(): void {
if (!db) return;
const data = db.export();
const buffer = Buffer.from(data);
fs.writeFileSync(config.databasePath, buffer);
}
// Auto-save periodically
let saveInterval: ReturnType<typeof setInterval> | null = null;
export function startAutoSave(intervalMs = 5000): void {
if (saveInterval) return;
saveInterval = setInterval(() => {
try { saveDatabase(); } catch { /* ignore */ }
}, intervalMs);
}
export function stopAutoSave(): void {
if (saveInterval) {
clearInterval(saveInterval);
saveInterval = null;
}
}

51
src/server/db/migrate.ts Normal file
View File

@@ -0,0 +1,51 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getDatabase, saveDatabase } from './connection.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
export function runMigrations(): void {
const db = getDatabase();
db.run(`
CREATE TABLE IF NOT EXISTS _migrations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
applied_at TEXT DEFAULT (datetime('now'))
)
`);
const migrationsDir = path.join(__dirname, 'migrations');
// In production (compiled), migrations may be alongside the compiled JS
const altMigrationsDir = path.join(__dirname, '..', '..', '..', 'src', 'server', 'db', 'migrations');
const dir = fs.existsSync(migrationsDir) ? migrationsDir : (fs.existsSync(altMigrationsDir) ? altMigrationsDir : null);
if (!dir) {
console.warn('No migrations directory found');
return;
}
const files = fs.readdirSync(dir)
.filter((f) => f.endsWith('.sql'))
.sort();
const appliedStmt = db.prepare('SELECT name FROM _migrations');
const applied = new Set<string>();
while (appliedStmt.step()) {
applied.add(appliedStmt.getAsObject().name as string);
}
appliedStmt.free();
for (const file of files) {
if (applied.has(file)) continue;
const sql = fs.readFileSync(path.join(dir, file), 'utf-8');
db.run(sql);
db.run('INSERT INTO _migrations (name) VALUES (?)', [file]);
console.log(`Migration applied: ${file}`);
}
saveDatabase();
}

View File

@@ -0,0 +1,57 @@
CREATE TABLE categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
color TEXT DEFAULT '#6B7280',
sort_order INTEGER DEFAULT 0
);
CREATE TABLE stores (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
slug TEXT NOT NULL UNIQUE,
base_url TEXT NOT NULL,
search_url TEXT NOT NULL,
enabled INTEGER NOT NULL DEFAULT 1,
sel_container TEXT NOT NULL,
sel_name TEXT NOT NULL,
sel_price TEXT NOT NULL,
sel_link TEXT NOT NULL,
sel_image TEXT,
rate_limit INTEGER DEFAULT 2,
rate_window INTEGER DEFAULT 1000,
proxy_url TEXT,
user_agent TEXT,
headers_json TEXT,
currency TEXT DEFAULT 'EUR',
category_id INTEGER REFERENCES categories(id) ON DELETE SET NULL,
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now'))
);
CREATE TABLE store_groups (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
description TEXT
);
CREATE TABLE store_group_members (
group_id INTEGER REFERENCES store_groups(id) ON DELETE CASCADE,
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
PRIMARY KEY (group_id, store_id)
);
CREATE TABLE scrape_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
query TEXT NOT NULL,
success INTEGER NOT NULL,
result_count INTEGER DEFAULT 0,
duration_ms INTEGER,
error_message TEXT,
scraped_at TEXT DEFAULT (datetime('now'))
);
CREATE INDEX idx_scrape_logs_store_id ON scrape_logs(store_id);
CREATE INDEX idx_scrape_logs_scraped_at ON scrape_logs(scraped_at);
CREATE INDEX idx_stores_enabled ON stores(enabled);
CREATE INDEX idx_stores_category_id ON stores(category_id);

66
src/server/index.ts Normal file
View File

@@ -0,0 +1,66 @@
import Fastify from 'fastify';
import cors from '@fastify/cors';
import fastifyStatic from '@fastify/static';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { config } from './config.js';
import { initDatabase, startAutoSave, saveDatabase } from './db/connection.js';
import { runMigrations } from './db/migrate.js';
import { storeRoutes } from './routes/stores.js';
import { categoryRoutes } from './routes/categories.js';
import { searchRoutes } from './routes/search.js';
import { testRoutes } from './routes/test.js';
import { healthRoutes } from './routes/health.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const app = Fastify({
logger: {
level: config.isProduction ? 'info' : 'debug',
transport: config.isProduction ? undefined : { target: 'pino-pretty' },
},
});
await app.register(cors, { origin: true });
// API routes
await app.register(storeRoutes, { prefix: '/api' });
await app.register(categoryRoutes, { prefix: '/api' });
await app.register(searchRoutes, { prefix: '/api' });
await app.register(testRoutes, { prefix: '/api' });
await app.register(healthRoutes, { prefix: '/api' });
// Serve static frontend in production
if (config.isProduction) {
const clientPath = path.join(__dirname, '..', 'client');
await app.register(fastifyStatic, {
root: clientPath,
wildcard: false,
});
// SPA fallback: serve index.html for all non-API routes
app.setNotFoundHandler((request, reply) => {
if (request.url.startsWith('/api')) {
reply.code(404).send({ error: 'Not found' });
} else {
reply.sendFile('index.html');
}
});
}
// Initialize database and run migrations
await initDatabase();
runMigrations();
startAutoSave();
// Save database on shutdown
process.on('SIGINT', () => { saveDatabase(); process.exit(0); });
process.on('SIGTERM', () => { saveDatabase(); process.exit(0); });
try {
await app.listen({ port: config.port, host: config.host });
app.log.info(`Price Hunter running at http://localhost:${config.port}`);
} catch (err) {
app.log.error(err);
process.exit(1);
}

View File

@@ -0,0 +1,139 @@
import { getDatabase, saveDatabase } from '../db/connection.js';
export interface Category {
id: number;
name: string;
color: string;
sort_order: number;
}
export interface StoreGroup {
id: number;
name: string;
description: string | null;
}
export interface StoreGroupWithMembers extends StoreGroup {
store_ids: number[];
}
function queryAll(sql: string, params: any[] = []): any[] {
const db = getDatabase();
const stmt = db.prepare(sql);
if (params.length) stmt.bind(params);
const rows: any[] = [];
while (stmt.step()) {
rows.push(stmt.getAsObject());
}
stmt.free();
return rows;
}
function queryOne(sql: string, params: any[] = []): any | undefined {
const rows = queryAll(sql, params);
return rows[0];
}
// Categories
export function getAllCategories(): Category[] {
return queryAll('SELECT * FROM categories ORDER BY sort_order, name');
}
export function getCategoryById(id: number): Category | undefined {
return queryOne('SELECT * FROM categories WHERE id = ?', [id]);
}
export function createCategory(name: string, color?: string): Category {
const db = getDatabase();
const maxOrder = queryOne('SELECT MAX(sort_order) as max_order FROM categories');
const sortOrder = (maxOrder?.max_order ?? -1) + 1;
db.run('INSERT INTO categories (name, color, sort_order) VALUES (?, ?, ?)', [name, color || '#6B7280', sortOrder]);
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
saveDatabase();
return getCategoryById(lastId) as Category;
}
export function updateCategory(id: number, data: { name?: string; color?: string; sort_order?: number }): Category | undefined {
const db = getDatabase();
const fields: string[] = [];
const values: any[] = [];
if (data.name !== undefined) { fields.push('name = ?'); values.push(data.name); }
if (data.color !== undefined) { fields.push('color = ?'); values.push(data.color); }
if (data.sort_order !== undefined) { fields.push('sort_order = ?'); values.push(data.sort_order); }
if (fields.length === 0) return getCategoryById(id);
values.push(id);
db.run(`UPDATE categories SET ${fields.join(', ')} WHERE id = ?`, values);
saveDatabase();
return getCategoryById(id);
}
export function deleteCategory(id: number): boolean {
const db = getDatabase();
db.run('DELETE FROM categories WHERE id = ?', [id]);
const changes = db.getRowsModified();
if (changes > 0) saveDatabase();
return changes > 0;
}
// Groups
export function getAllGroups(): StoreGroupWithMembers[] {
const groups = queryAll('SELECT * FROM store_groups ORDER BY name');
return groups.map((group) => {
const members = queryAll('SELECT store_id FROM store_group_members WHERE group_id = ?', [group.id]);
return { ...group, store_ids: members.map((m: any) => m.store_id) };
});
}
export function getGroupById(id: number): StoreGroupWithMembers | undefined {
const group = queryOne('SELECT * FROM store_groups WHERE id = ?', [id]);
if (!group) return undefined;
const members = queryAll('SELECT store_id FROM store_group_members WHERE group_id = ?', [id]);
return { ...group, store_ids: members.map((m: any) => m.store_id) };
}
export function createGroup(name: string, description?: string): StoreGroupWithMembers {
const db = getDatabase();
db.run('INSERT INTO store_groups (name, description) VALUES (?, ?)', [name, description || null]);
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
saveDatabase();
return getGroupById(lastId) as StoreGroupWithMembers;
}
export function updateGroup(id: number, data: { name?: string; description?: string }): StoreGroupWithMembers | undefined {
const db = getDatabase();
const fields: string[] = [];
const values: any[] = [];
if (data.name !== undefined) { fields.push('name = ?'); values.push(data.name); }
if (data.description !== undefined) { fields.push('description = ?'); values.push(data.description); }
if (fields.length > 0) {
values.push(id);
db.run(`UPDATE store_groups SET ${fields.join(', ')} WHERE id = ?`, values);
saveDatabase();
}
return getGroupById(id);
}
export function deleteGroup(id: number): boolean {
const db = getDatabase();
db.run('DELETE FROM store_groups WHERE id = ?', [id]);
const changes = db.getRowsModified();
if (changes > 0) saveDatabase();
return changes > 0;
}
export function setGroupMembers(groupId: number, storeIds: number[]): void {
const db = getDatabase();
db.run('DELETE FROM store_group_members WHERE group_id = ?', [groupId]);
for (const storeId of storeIds) {
db.run('INSERT INTO store_group_members (group_id, store_id) VALUES (?, ?)', [groupId, storeId]);
}
saveDatabase();
}

View File

@@ -0,0 +1,85 @@
import { getDatabase, saveDatabase } from '../db/connection.js';
export interface ScrapeLog {
id: number;
store_id: number;
query: string;
success: number;
result_count: number;
duration_ms: number;
error_message: string | null;
scraped_at: string;
}
function queryAll(sql: string, params: any[] = []): any[] {
const db = getDatabase();
const stmt = db.prepare(sql);
if (params.length) stmt.bind(params);
const rows: any[] = [];
while (stmt.step()) {
rows.push(stmt.getAsObject());
}
stmt.free();
return rows;
}
function queryOne(sql: string, params: any[] = []): any | undefined {
const rows = queryAll(sql, params);
return rows[0];
}
export function logScrape(
storeId: number,
query: string,
success: boolean,
resultCount: number,
durationMs: number,
errorMessage?: string
): ScrapeLog {
const db = getDatabase();
db.run(`
INSERT INTO scrape_logs (store_id, query, success, result_count, duration_ms, error_message)
VALUES (?, ?, ?, ?, ?, ?)
`, [storeId, query, success ? 1 : 0, resultCount, durationMs, errorMessage || null]);
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
saveDatabase();
return queryOne('SELECT * FROM scrape_logs WHERE id = ?', [lastId]) as ScrapeLog;
}
export function getLogsByStore(storeId: number, limit = 20): ScrapeLog[] {
return queryAll('SELECT * FROM scrape_logs WHERE store_id = ? ORDER BY scraped_at DESC LIMIT ?', [storeId, limit]);
}
export function getStoreHealth(storeId: number): {
total: number;
successful: number;
failed: number;
avg_duration_ms: number;
last_success: string | null;
last_error: string | null;
} {
const stats = queryOne(`
SELECT
COUNT(*) as total,
SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failed,
AVG(duration_ms) as avg_duration_ms,
MAX(CASE WHEN success = 1 THEN scraped_at END) as last_success
FROM scrape_logs WHERE store_id = ?
`, [storeId]);
const lastError = queryOne(
'SELECT error_message FROM scrape_logs WHERE store_id = ? AND success = 0 ORDER BY scraped_at DESC LIMIT 1',
[storeId]
);
return {
total: stats?.total || 0,
successful: stats?.successful || 0,
failed: stats?.failed || 0,
avg_duration_ms: Math.round(stats?.avg_duration_ms || 0),
last_success: stats?.last_success || null,
last_error: lastError?.error_message || null,
};
}

174
src/server/models/store.ts Normal file
View File

@@ -0,0 +1,174 @@
import { getDatabase, saveDatabase } from '../db/connection.js';
export interface Store {
id: number;
name: string;
slug: string;
base_url: string;
search_url: string;
enabled: number;
sel_container: string;
sel_name: string;
sel_price: string;
sel_link: string;
sel_image: string | null;
rate_limit: number;
rate_window: number;
proxy_url: string | null;
user_agent: string | null;
headers_json: string | null;
currency: string;
category_id: number | null;
created_at: string;
updated_at: string;
}
export interface StoreWithCategory extends Store {
category_name: string | null;
category_color: string | null;
}
export interface CreateStoreInput {
name: string;
slug?: string;
base_url: string;
search_url: string;
sel_container: string;
sel_name: string;
sel_price: string;
sel_link: string;
sel_image?: string;
rate_limit?: number;
rate_window?: number;
proxy_url?: string;
user_agent?: string;
headers_json?: string;
currency?: string;
category_id?: number;
}
function queryAll(sql: string, params: any[] = []): any[] {
const db = getDatabase();
const stmt = db.prepare(sql);
if (params.length) stmt.bind(params);
const rows: any[] = [];
while (stmt.step()) {
rows.push(stmt.getAsObject());
}
stmt.free();
return rows;
}
function queryOne(sql: string, params: any[] = []): any | undefined {
const rows = queryAll(sql, params);
return rows[0];
}
function slugify(text: string): string {
return text
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '');
}
export function getAllStores(): StoreWithCategory[] {
return queryAll(`
SELECT s.*, c.name as category_name, c.color as category_color
FROM stores s
LEFT JOIN categories c ON s.category_id = c.id
ORDER BY s.name
`);
}
export function getStoreById(id: number): StoreWithCategory | undefined {
return queryOne(`
SELECT s.*, c.name as category_name, c.color as category_color
FROM stores s
LEFT JOIN categories c ON s.category_id = c.id
WHERE s.id = ?
`, [id]);
}
export function getEnabledStores(): Store[] {
return queryAll('SELECT * FROM stores WHERE enabled = 1 ORDER BY name');
}
export function getStoresByCategory(categoryId: number): Store[] {
return queryAll('SELECT * FROM stores WHERE enabled = 1 AND category_id = ? ORDER BY name', [categoryId]);
}
export function getStoresByGroup(groupId: number): Store[] {
return queryAll(`
SELECT s.* FROM stores s
JOIN store_group_members sgm ON s.id = sgm.store_id
WHERE s.enabled = 1 AND sgm.group_id = ?
ORDER BY s.name
`, [groupId]);
}
export function getStoresByIds(ids: number[]): Store[] {
if (ids.length === 0) return [];
const placeholders = ids.map(() => '?').join(',');
return queryAll(`SELECT * FROM stores WHERE enabled = 1 AND id IN (${placeholders}) ORDER BY name`, ids);
}
export function createStore(input: CreateStoreInput): Store {
const db = getDatabase();
const slug = input.slug || slugify(input.name);
db.run(`
INSERT INTO stores (name, slug, base_url, search_url, sel_container, sel_name, sel_price, sel_link, sel_image,
rate_limit, rate_window, proxy_url, user_agent, headers_json, currency, category_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`, [
input.name, slug, input.base_url, input.search_url,
input.sel_container, input.sel_name, input.sel_price, input.sel_link, input.sel_image || null,
input.rate_limit ?? 2, input.rate_window ?? 1000,
input.proxy_url || null, input.user_agent || null, input.headers_json || null,
input.currency || 'EUR', input.category_id || null,
]);
const lastId = queryOne('SELECT last_insert_rowid() as id')?.id;
saveDatabase();
return getStoreById(lastId) as Store;
}
export function updateStore(id: number, input: Partial<CreateStoreInput>): Store | undefined {
const existing = getStoreById(id);
if (!existing) return undefined;
const db = getDatabase();
const fields: string[] = [];
const values: any[] = [];
for (const [key, value] of Object.entries(input)) {
if (value !== undefined) {
fields.push(`${key} = ?`);
values.push(value);
}
}
if (fields.length === 0) return existing;
fields.push("updated_at = datetime('now')");
values.push(id);
db.run(`UPDATE stores SET ${fields.join(', ')} WHERE id = ?`, values);
saveDatabase();
return getStoreById(id);
}
export function toggleStoreEnabled(id: number): Store | undefined {
const db = getDatabase();
db.run("UPDATE stores SET enabled = CASE WHEN enabled = 1 THEN 0 ELSE 1 END, updated_at = datetime('now') WHERE id = ?", [id]);
saveDatabase();
return getStoreById(id);
}
export function deleteStore(id: number): boolean {
const db = getDatabase();
db.run('DELETE FROM stores WHERE id = ?', [id]);
const changes = db.getRowsModified();
if (changes > 0) saveDatabase();
return changes > 0;
}

View File

@@ -0,0 +1,98 @@
import type { FastifyPluginAsync } from 'fastify';
import {
getAllCategories, createCategory, updateCategory, deleteCategory,
getAllGroups, createGroup, updateGroup, deleteGroup, setGroupMembers,
} from '../models/category.js';
export const categoryRoutes: FastifyPluginAsync = async (app) => {
// Categories
app.get('/categories', async () => getAllCategories());
app.post<{ Body: { name: string; color?: string } }>('/categories', {
schema: {
body: {
type: 'object',
required: ['name'],
properties: {
name: { type: 'string', minLength: 1 },
color: { type: 'string' },
},
},
},
}, async (request, reply) => {
try {
const category = createCategory(request.body.name, request.body.color);
return reply.code(201).send(category);
} catch (err: any) {
if (err.message?.includes('UNIQUE constraint failed')) {
return reply.code(409).send({ error: 'Category already exists' });
}
throw err;
}
});
app.put<{ Params: { id: string }; Body: { name?: string; color?: string; sort_order?: number } }>('/categories/:id', async (request, reply) => {
const result = updateCategory(Number(request.params.id), request.body);
if (!result) return reply.code(404).send({ error: 'Category not found' });
return result;
});
app.delete<{ Params: { id: string } }>('/categories/:id', async (request, reply) => {
const deleted = deleteCategory(Number(request.params.id));
if (!deleted) return reply.code(404).send({ error: 'Category not found' });
return reply.code(204).send();
});
// Groups
app.get('/groups', async () => getAllGroups());
app.post<{ Body: { name: string; description?: string } }>('/groups', {
schema: {
body: {
type: 'object',
required: ['name'],
properties: {
name: { type: 'string', minLength: 1 },
description: { type: 'string' },
},
},
},
}, async (request, reply) => {
try {
const group = createGroup(request.body.name, request.body.description);
return reply.code(201).send(group);
} catch (err: any) {
if (err.message?.includes('UNIQUE constraint failed')) {
return reply.code(409).send({ error: 'Group already exists' });
}
throw err;
}
});
app.put<{ Params: { id: string }; Body: { name?: string; description?: string } }>('/groups/:id', async (request, reply) => {
const result = updateGroup(Number(request.params.id), request.body);
if (!result) return reply.code(404).send({ error: 'Group not found' });
return result;
});
app.put<{ Params: { id: string }; Body: { store_ids: number[] } }>('/groups/:id/members', {
schema: {
body: {
type: 'object',
required: ['store_ids'],
properties: {
store_ids: { type: 'array', items: { type: 'number' } },
},
},
},
}, async (request, reply) => {
setGroupMembers(Number(request.params.id), request.body.store_ids);
return { success: true };
});
app.delete<{ Params: { id: string } }>('/groups/:id', async (request, reply) => {
const deleted = deleteGroup(Number(request.params.id));
if (!deleted) return reply.code(404).send({ error: 'Group not found' });
return reply.code(204).send();
});
};

View File

@@ -0,0 +1,37 @@
import type { FastifyPluginAsync } from 'fastify';
import fs from 'node:fs';
import { config } from '../config.js';
import { getDatabase } from '../db/connection.js';
function queryOne(sql: string, params: any[] = []): any | undefined {
const db = getDatabase();
const stmt = db.prepare(sql);
if (params.length) stmt.bind(params);
let result: any;
if (stmt.step()) {
result = stmt.getAsObject();
}
stmt.free();
return result;
}
export const healthRoutes: FastifyPluginAsync = async (app) => {
app.get('/health', async () => {
const storeCount = queryOne('SELECT COUNT(*) as count FROM stores')?.count ?? 0;
const enabledCount = queryOne('SELECT COUNT(*) as count FROM stores WHERE enabled = 1')?.count ?? 0;
let dbSizeBytes = 0;
try {
const stats = fs.statSync(config.databasePath);
dbSizeBytes = stats.size;
} catch {
// DB file may not exist yet
}
return {
status: 'ok',
stores: { total: storeCount, enabled: enabledCount },
database: { sizeBytes: dbSizeBytes, sizeMB: Math.round(dbSizeBytes / 1024 / 1024 * 100) / 100 },
};
});
};

View File

@@ -0,0 +1,39 @@
import type { FastifyPluginAsync } from 'fastify';
import { search } from '../scraper/engine.js';
export const searchRoutes: FastifyPluginAsync = async (app) => {
app.get<{
Querystring: {
q: string;
stores?: string;
category?: string;
group?: string;
};
}>('/search', {
schema: {
querystring: {
type: 'object',
required: ['q'],
properties: {
q: { type: 'string', minLength: 1 },
stores: { type: 'string' },
category: { type: 'string' },
group: { type: 'string' },
},
},
},
}, async (request) => {
const { q, stores, category, group } = request.query;
const storeIds = stores
? stores.split(',').map(Number).filter((n) => !isNaN(n))
: undefined;
return search({
query: q,
storeIds,
categoryId: category ? Number(category) : undefined,
groupId: group ? Number(group) : undefined,
});
});
};

View File

@@ -0,0 +1,73 @@
import type { FastifyPluginAsync } from 'fastify';
import { getAllStores, getStoreById, createStore, updateStore, toggleStoreEnabled, deleteStore } from '../models/store.js';
import { getLogsByStore, getStoreHealth } from '../models/scrape-log.js';
export const storeRoutes: FastifyPluginAsync = async (app) => {
app.get('/stores', async () => {
return getAllStores();
});
app.get<{ Params: { id: string } }>('/stores/:id', async (request, reply) => {
const store = getStoreById(Number(request.params.id));
if (!store) return reply.code(404).send({ error: 'Store not found' });
const health = getStoreHealth(store.id);
const recentLogs = getLogsByStore(store.id, 10);
return { ...store, health, recentLogs };
});
app.post<{ Body: any }>('/stores', {
schema: {
body: {
type: 'object',
required: ['name', 'base_url', 'search_url', 'sel_container', 'sel_name', 'sel_price', 'sel_link'],
properties: {
name: { type: 'string', minLength: 1 },
slug: { type: 'string' },
base_url: { type: 'string', minLength: 1 },
search_url: { type: 'string', minLength: 1 },
sel_container: { type: 'string', minLength: 1 },
sel_name: { type: 'string', minLength: 1 },
sel_price: { type: 'string', minLength: 1 },
sel_link: { type: 'string', minLength: 1 },
sel_image: { type: 'string' },
rate_limit: { type: 'number' },
rate_window: { type: 'number' },
proxy_url: { type: 'string' },
user_agent: { type: 'string' },
headers_json: { type: 'string' },
currency: { type: 'string' },
category_id: { type: 'number' },
},
},
},
}, async (request, reply) => {
try {
const store = createStore(request.body);
return reply.code(201).send(store);
} catch (err: any) {
if (err.message?.includes('UNIQUE constraint failed')) {
return reply.code(409).send({ error: 'A store with this slug already exists' });
}
throw err;
}
});
app.put<{ Params: { id: string }; Body: any }>('/stores/:id', async (request, reply) => {
const store = updateStore(Number(request.params.id), request.body);
if (!store) return reply.code(404).send({ error: 'Store not found' });
return store;
});
app.patch<{ Params: { id: string } }>('/stores/:id/toggle', async (request, reply) => {
const store = toggleStoreEnabled(Number(request.params.id));
if (!store) return reply.code(404).send({ error: 'Store not found' });
return store;
});
app.delete<{ Params: { id: string } }>('/stores/:id', async (request, reply) => {
const deleted = deleteStore(Number(request.params.id));
if (!deleted) return reply.code(404).send({ error: 'Store not found' });
return reply.code(204).send();
});
};

66
src/server/routes/test.ts Normal file
View File

@@ -0,0 +1,66 @@
import type { FastifyPluginAsync } from 'fastify';
import { getStoreById } from '../models/store.js';
import { logScrape, getLogsByStore, getStoreHealth } from '../models/scrape-log.js';
import { scrapeStore } from '../scraper/http-scraper.js';
import { normalizeResult } from '../scraper/result-parser.js';
export const testRoutes: FastifyPluginAsync = async (app) => {
app.post<{
Params: { id: string };
Body: { query: string };
}>('/stores/:id/test', {
schema: {
body: {
type: 'object',
required: ['query'],
properties: {
query: { type: 'string', minLength: 1 },
},
},
},
}, async (request, reply) => {
const store = getStoreById(Number(request.params.id));
if (!store) return reply.code(404).send({ error: 'Store not found' });
const searchUrl = store.search_url.replace('{query}', encodeURIComponent(request.body.query));
const startTime = Date.now();
try {
const result = await scrapeStore(store, searchUrl);
const duration = Date.now() - startTime;
const products = result.items.map((item) =>
normalizeResult(item, store.id, store.name, store.base_url, store.currency)
);
logScrape(store.id, request.body.query, true, products.length, duration);
return {
success: true,
searchUrl,
statusCode: result.statusCode,
duration,
rawHtmlLength: result.html.length,
rawHtmlPreview: result.html.substring(0, 5000),
itemsFound: result.items.length,
rawItems: result.items,
parsedProducts: products,
health: getStoreHealth(store.id),
recentLogs: getLogsByStore(store.id, 10),
};
} catch (err) {
const duration = Date.now() - startTime;
const errorMessage = err instanceof Error ? err.message : String(err);
logScrape(store.id, request.body.query, false, 0, duration, errorMessage);
return {
success: false,
searchUrl,
duration,
error: errorMessage,
health: getStoreHealth(store.id),
recentLogs: getLogsByStore(store.id, 10),
};
}
});
};

View File

@@ -0,0 +1,120 @@
import pLimit from 'p-limit';
import type { Store } from '../models/store.js';
import { getEnabledStores, getStoresByCategory, getStoresByGroup, getStoresByIds } from '../models/store.js';
import { logScrape } from '../models/scrape-log.js';
import { scrapeStore } from './http-scraper.js';
import { normalizeResult, type Product } from './result-parser.js';
import { getLimiter } from './rate-limiter.js';
const MAX_CONCURRENCY = 5;
const SEARCH_TIMEOUT = 60_000;
export interface SearchOptions {
query: string;
storeIds?: number[];
categoryId?: number;
groupId?: number;
}
export interface SearchResult {
results: Product[];
meta: {
query: string;
duration: number;
storeCount: number;
totalResults: number;
errors: Array<{ storeId: number; storeName: string; error: string }>;
};
}
export async function search(options: SearchOptions): Promise<SearchResult> {
const startTime = Date.now();
const { query } = options;
// Determine which stores to scrape
let stores: Store[];
if (options.storeIds?.length) {
stores = getStoresByIds(options.storeIds);
} else if (options.groupId) {
stores = getStoresByGroup(options.groupId);
} else if (options.categoryId) {
stores = getStoresByCategory(options.categoryId);
} else {
stores = getEnabledStores();
}
if (stores.length === 0) {
return {
results: [],
meta: { query, duration: Date.now() - startTime, storeCount: 0, totalResults: 0, errors: [] },
};
}
const limit = pLimit(MAX_CONCURRENCY);
const errors: SearchResult['meta']['errors'] = [];
const allProducts: Product[] = [];
// Create an overall timeout
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error('Search timeout')), SEARCH_TIMEOUT)
);
const scrapePromises = stores.map((store) =>
limit(async () => {
const searchUrl = store.search_url.replace('{query}', encodeURIComponent(query));
const storeStart = Date.now();
const rateLimiter = getLimiter(store.id, 1, Math.floor(store.rate_window / store.rate_limit));
try {
const result = await rateLimiter.schedule(() => scrapeStore(store, searchUrl));
const duration = Date.now() - storeStart;
const products = result.items.map((item) =>
normalizeResult(item, store.id, store.name, store.base_url, store.currency)
);
logScrape(store.id, query, true, products.length, duration);
return products;
} catch (err) {
const duration = Date.now() - storeStart;
const errorMessage = err instanceof Error ? err.message : String(err);
logScrape(store.id, query, false, 0, duration, errorMessage);
errors.push({ storeId: store.id, storeName: store.name, error: errorMessage });
return [];
}
})
);
try {
const results = await Promise.race([
Promise.all(scrapePromises),
timeoutPromise,
]) as Product[][];
for (const products of results) {
allProducts.push(...products);
}
} catch (err) {
// Timeout — collect whatever we have
errors.push({ storeId: 0, storeName: 'System', error: 'Search timed out' });
}
// Sort by price ascending, nulls last
allProducts.sort((a, b) => {
if (a.price === null && b.price === null) return 0;
if (a.price === null) return 1;
if (b.price === null) return -1;
return a.price - b.price;
});
return {
results: allProducts,
meta: {
query,
duration: Date.now() - startTime,
storeCount: stores.length,
totalResults: allProducts.length,
errors,
},
};
}

View File

@@ -0,0 +1,64 @@
import * as cheerio from 'cheerio';
import type { Store } from '../models/store.js';
import type { ScrapedItem } from './result-parser.js';
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
const DEFAULT_TIMEOUT = 10_000;
export interface ScrapeResult {
items: ScrapedItem[];
html: string;
statusCode: number;
}
export async function scrapeStore(store: Store, searchUrl: string): Promise<ScrapeResult> {
const headers: Record<string, string> = {
'User-Agent': store.user_agent || DEFAULT_USER_AGENT,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
};
if (store.headers_json) {
try {
const extra = JSON.parse(store.headers_json);
Object.assign(headers, extra);
} catch {
// Ignore invalid headers JSON
}
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT);
try {
const response = await fetch(searchUrl, {
headers,
signal: controller.signal,
redirect: 'follow',
});
const html = await response.text();
const $ = cheerio.load(html);
const items: ScrapedItem[] = [];
const containers = $(store.sel_container);
containers.each((_, el) => {
const $el = $(el);
const name = $el.find(store.sel_name).first().text().trim();
const priceText = $el.find(store.sel_price).first().text().trim();
const link = $el.find(store.sel_link).first().attr('href') || '';
const image = store.sel_image
? $el.find(store.sel_image).first().attr('src') || $el.find(store.sel_image).first().attr('data-src') || null
: null;
if (name && priceText) {
items.push({ name, priceText, link, image });
}
});
return { items, html, statusCode: response.status };
} finally {
clearTimeout(timeout);
}
}

View File

@@ -0,0 +1,22 @@
import Bottleneck from 'bottleneck';
const limiters = new Map<number, Bottleneck>();
export function getLimiter(storeId: number, maxConcurrent: number, minTime: number): Bottleneck {
let limiter = limiters.get(storeId);
if (!limiter) {
limiter = new Bottleneck({
maxConcurrent,
minTime,
});
limiters.set(storeId, limiter);
}
return limiter;
}
export function clearLimiters(): void {
for (const limiter of limiters.values()) {
limiter.disconnect();
}
limiters.clear();
}

View File

@@ -0,0 +1,83 @@
export interface ScrapedItem {
name: string;
priceText: string;
link: string;
image: string | null;
}
export interface Product {
name: string;
price: number | null;
priceText: string;
currency: string;
url: string;
image: string | null;
storeName: string;
storeId: number;
}
export function parsePrice(text: string): number | null {
if (!text) return null;
const cleaned = text.trim().toLowerCase();
if (cleaned === 'free' || cleaned === 'gratis') return 0;
// Handle range prices like "$12 - $15" — take the lower bound
const rangeParts = cleaned.split(/\s*[-]\s*/);
const priceStr = rangeParts[0];
// Remove currency symbols and whitespace
let normalized = priceStr.replace(/[^\d.,]/g, '').trim();
if (!normalized) return null;
// Determine decimal separator:
// "1.299,00" or "1 299,00" → comma is decimal
// "1,299.00" → period is decimal
// "12,99" → comma is decimal (no thousands)
// "12.99" → period is decimal
const lastComma = normalized.lastIndexOf(',');
const lastPeriod = normalized.lastIndexOf('.');
if (lastComma > lastPeriod) {
// Comma is the decimal separator (European style)
normalized = normalized.replace(/\./g, '').replace(',', '.');
} else if (lastPeriod > lastComma) {
// Period is the decimal separator (US style)
normalized = normalized.replace(/,/g, '');
} else {
// Only one type or neither
normalized = normalized.replace(/,/g, '.');
}
const parsed = parseFloat(normalized);
return isNaN(parsed) ? null : Math.round(parsed * 100) / 100;
}
export function normalizeUrl(href: string, baseUrl: string): string {
if (!href) return baseUrl;
try {
// Already absolute
if (href.startsWith('http://') || href.startsWith('https://')) {
return href;
}
return new URL(href, baseUrl).href;
} catch {
return baseUrl;
}
}
export function normalizeResult(raw: ScrapedItem, storeId: number, storeName: string, baseUrl: string, currency: string): Product {
return {
name: raw.name.trim(),
price: parsePrice(raw.priceText),
priceText: raw.priceText.trim(),
currency,
url: normalizeUrl(raw.link, baseUrl),
image: raw.image ? normalizeUrl(raw.image, baseUrl) : null,
storeName,
storeId,
};
}