Initial commit: Price Hunter — self-hosted price comparison engine

Complete application scaffolding with:
- Backend: Node.js + Fastify + sql.js (SQLite)
- Frontend: SvelteKit + Tailwind CSS
- Scraper engine with parallel fan-out, rate limiting, cheerio-based parsing
- Store management with CSS selector config and per-store test pages
- Docker setup for single-command deployment

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
mariosemes
2026-03-26 20:54:52 +01:00
commit e0f67d0835
47 changed files with 9181 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
import pLimit from 'p-limit';
import type { Store } from '../models/store.js';
import { getEnabledStores, getStoresByCategory, getStoresByGroup, getStoresByIds } from '../models/store.js';
import { logScrape } from '../models/scrape-log.js';
import { scrapeStore } from './http-scraper.js';
import { normalizeResult, type Product } from './result-parser.js';
import { getLimiter } from './rate-limiter.js';
const MAX_CONCURRENCY = 5;
const SEARCH_TIMEOUT = 60_000;
export interface SearchOptions {
query: string;
storeIds?: number[];
categoryId?: number;
groupId?: number;
}
export interface SearchResult {
results: Product[];
meta: {
query: string;
duration: number;
storeCount: number;
totalResults: number;
errors: Array<{ storeId: number; storeName: string; error: string }>;
};
}
export async function search(options: SearchOptions): Promise<SearchResult> {
const startTime = Date.now();
const { query } = options;
// Determine which stores to scrape
let stores: Store[];
if (options.storeIds?.length) {
stores = getStoresByIds(options.storeIds);
} else if (options.groupId) {
stores = getStoresByGroup(options.groupId);
} else if (options.categoryId) {
stores = getStoresByCategory(options.categoryId);
} else {
stores = getEnabledStores();
}
if (stores.length === 0) {
return {
results: [],
meta: { query, duration: Date.now() - startTime, storeCount: 0, totalResults: 0, errors: [] },
};
}
const limit = pLimit(MAX_CONCURRENCY);
const errors: SearchResult['meta']['errors'] = [];
const allProducts: Product[] = [];
// Create an overall timeout
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error('Search timeout')), SEARCH_TIMEOUT)
);
const scrapePromises = stores.map((store) =>
limit(async () => {
const searchUrl = store.search_url.replace('{query}', encodeURIComponent(query));
const storeStart = Date.now();
const rateLimiter = getLimiter(store.id, 1, Math.floor(store.rate_window / store.rate_limit));
try {
const result = await rateLimiter.schedule(() => scrapeStore(store, searchUrl));
const duration = Date.now() - storeStart;
const products = result.items.map((item) =>
normalizeResult(item, store.id, store.name, store.base_url, store.currency)
);
logScrape(store.id, query, true, products.length, duration);
return products;
} catch (err) {
const duration = Date.now() - storeStart;
const errorMessage = err instanceof Error ? err.message : String(err);
logScrape(store.id, query, false, 0, duration, errorMessage);
errors.push({ storeId: store.id, storeName: store.name, error: errorMessage });
return [];
}
})
);
try {
const results = await Promise.race([
Promise.all(scrapePromises),
timeoutPromise,
]) as Product[][];
for (const products of results) {
allProducts.push(...products);
}
} catch (err) {
// Timeout — collect whatever we have
errors.push({ storeId: 0, storeName: 'System', error: 'Search timed out' });
}
// Sort by price ascending, nulls last
allProducts.sort((a, b) => {
if (a.price === null && b.price === null) return 0;
if (a.price === null) return 1;
if (b.price === null) return -1;
return a.price - b.price;
});
return {
results: allProducts,
meta: {
query,
duration: Date.now() - startTime,
storeCount: stores.length,
totalResults: allProducts.length,
errors,
},
};
}