UX Improvements

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-10 16:45:22 -08:00
parent e9f8f805c5
commit 7a33d51c90
73 changed files with 13094 additions and 13094 deletions

View file

@ -4,7 +4,7 @@
const { main } = require("../src/main"); const { main } = require("../src/main");
main().catch((e) => { main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e); const msg = e && e.stack ? e.stack : String(e);
console.error(msg); console.error(msg);
process.exitCode = 1; process.exitCode = 1;
}); });

View file

@ -7,327 +7,327 @@ const { setTimeout: setTimeoutCb, clearTimeout } = require("timers");
/* ---------------- Errors ---------------- */ /* ---------------- Errors ---------------- */
class RetryableError extends Error { class RetryableError extends Error {
constructor(msg) { constructor(msg) {
super(msg); super(msg);
this.name = "RetryableError"; this.name = "RetryableError";
} }
} }
function isRetryable(e) { function isRetryable(e) {
if (!e) return false; if (!e) return false;
if (e.name === "AbortError") return true; if (e.name === "AbortError") return true;
if (e instanceof RetryableError) return true; if (e instanceof RetryableError) return true;
const msg = String(e.message || e); const msg = String(e.message || e);
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg); return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
} }
/* ---------------- Backoff ---------------- */ /* ---------------- Backoff ---------------- */
function backoffMs(attempt) { function backoffMs(attempt) {
const base = Math.min(12000, 500 * Math.pow(2, attempt)); const base = Math.min(12000, 500 * Math.pow(2, attempt));
const jitter = Math.floor(Math.random() * 400); const jitter = Math.floor(Math.random() * 400);
return base + jitter; return base + jitter;
} }
function retryAfterMs(res) { function retryAfterMs(res) {
const ra = res?.headers?.get ? res.headers.get("retry-after") : null; const ra = res?.headers?.get ? res.headers.get("retry-after") : null;
if (!ra) return 0; if (!ra) return 0;
const secs = Number(String(ra).trim()); const secs = Number(String(ra).trim());
if (Number.isFinite(secs)) return Math.max(0, secs * 1000); if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
const dt = Date.parse(String(ra)); const dt = Date.parse(String(ra));
if (Number.isFinite(dt)) return Math.max(0, dt - Date.now()); if (Number.isFinite(dt)) return Math.max(0, dt - Date.now());
return 0; return 0;
} }
/* ---------------- Utils ---------------- */ /* ---------------- Utils ---------------- */
async function safeText(res) { async function safeText(res) {
try { try {
return await res.text(); return await res.text();
} catch { } catch {
return ""; return "";
} }
} }
function hostFromUrl(u) { function hostFromUrl(u) {
try { try {
return new URL(u).host || ""; return new URL(u).host || "";
} catch { } catch {
return ""; return "";
} }
} }
/* ---------------- Cookies (simple jar) ---------------- */ /* ---------------- Cookies (simple jar) ---------------- */
// host -> Map(cookieName -> "name=value") // host -> Map(cookieName -> "name=value")
function createCookieJar() { function createCookieJar() {
const jar = new Map(); const jar = new Map();
function parseSetCookieLine(line) { function parseSetCookieLine(line) {
const s = String(line || "").trim(); const s = String(line || "").trim();
if (!s) return null; if (!s) return null;
const first = s.split(";")[0] || ""; const first = s.split(";")[0] || "";
const eq = first.indexOf("="); const eq = first.indexOf("=");
if (eq <= 0) return null; if (eq <= 0) return null;
const name = first.slice(0, eq).trim(); const name = first.slice(0, eq).trim();
const value = first.slice(eq + 1).trim(); const value = first.slice(eq + 1).trim();
if (!name) return null; if (!name) return null;
return { name, pair: `${name}=${value}` }; return { name, pair: `${name}=${value}` };
} }
function getSetCookieArray(headers) { function getSetCookieArray(headers) {
if (headers && typeof headers.getSetCookie === "function") { if (headers && typeof headers.getSetCookie === "function") {
try { try {
const arr = headers.getSetCookie(); const arr = headers.getSetCookie();
return Array.isArray(arr) ? arr : []; return Array.isArray(arr) ? arr : [];
} catch {} } catch {}
} }
const one = headers?.get ? headers.get("set-cookie") : null; const one = headers?.get ? headers.get("set-cookie") : null;
if (!one) return []; if (!one) return [];
return String(one) return String(one)
.split(/,(?=[^;,]*=)/g) .split(/,(?=[^;,]*=)/g)
.map((x) => x.trim()) .map((x) => x.trim())
.filter(Boolean); .filter(Boolean);
} }
function storeFromResponse(url, res) { function storeFromResponse(url, res) {
const host = hostFromUrl(res?.url || url); const host = hostFromUrl(res?.url || url);
if (!host) return; if (!host) return;
const lines = getSetCookieArray(res?.headers); const lines = getSetCookieArray(res?.headers);
if (!lines.length) return; if (!lines.length) return;
let m = jar.get(host); let m = jar.get(host);
if (!m) { if (!m) {
m = new Map(); m = new Map();
jar.set(host, m); jar.set(host, m);
} }
for (const line of lines) { for (const line of lines) {
const c = parseSetCookieLine(line); const c = parseSetCookieLine(line);
if (c) m.set(c.name, c.pair); if (c) m.set(c.name, c.pair);
} }
} }
function cookieHeaderFor(url) { function cookieHeaderFor(url) {
const host = hostFromUrl(url); const host = hostFromUrl(url);
if (!host) return ""; if (!host) return "";
const m = jar.get(host); const m = jar.get(host);
if (!m || m.size === 0) return ""; if (!m || m.size === 0) return "";
return [...m.values()].join("; "); return [...m.values()].join("; ");
} }
return { storeFromResponse, cookieHeaderFor }; return { storeFromResponse, cookieHeaderFor };
} }
/* ---------------- HTTP client ---------------- */ /* ---------------- HTTP client ---------------- */
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
let inflight = 0; let inflight = 0;
let reqSeq = 0; let reqSeq = 0;
const cookieJar = createCookieJar(); const cookieJar = createCookieJar();
// host -> epoch ms when next request is allowed // host -> epoch ms when next request is allowed
const hostNextOkAt = new Map(); const hostNextOkAt = new Map();
// Conservative pacing defaults (slow > blocked) // Conservative pacing defaults (slow > blocked)
const minHostIntervalMs = 2500; const minHostIntervalMs = 2500;
// Per-host inflight clamp (prevents bursts when global concurrency is high) // Per-host inflight clamp (prevents bursts when global concurrency is high)
const hostInflight = new Map(); const hostInflight = new Map();
const maxHostInflight = 1; const maxHostInflight = 1;
function inflightStr() { function inflightStr() {
return `inflight=${inflight}`; return `inflight=${inflight}`;
} }
async function acquireHost(url) { async function acquireHost(url) {
const host = hostFromUrl(url); const host = hostFromUrl(url);
if (!host) return () => {}; if (!host) return () => {};
while (true) { while (true) {
const cur = hostInflight.get(host) || 0; const cur = hostInflight.get(host) || 0;
if (cur < maxHostInflight) { if (cur < maxHostInflight) {
hostInflight.set(host, cur + 1); hostInflight.set(host, cur + 1);
return () => { return () => {
const n = (hostInflight.get(host) || 1) - 1; const n = (hostInflight.get(host) || 1) - 1;
if (n <= 0) hostInflight.delete(host); if (n <= 0) hostInflight.delete(host);
else hostInflight.set(host, n); else hostInflight.set(host, n);
}; };
} }
await sleep(50); await sleep(50);
} }
} }
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent // ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
async function throttleHost(url) { async function throttleHost(url) {
const host = hostFromUrl(url); const host = hostFromUrl(url);
if (!host) return; if (!host) return;
while (true) { while (true) {
const now = Date.now(); const now = Date.now();
const next = hostNextOkAt.get(host) || 0; const next = hostNextOkAt.get(host) || 0;
const wait = next - now; const wait = next - now;
if (wait > 0) { if (wait > 0) {
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`); logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
await sleep(wait); await sleep(wait);
continue; continue;
} }
// Reserve immediately to prevent concurrent pass-through // Reserve immediately to prevent concurrent pass-through
hostNextOkAt.set(host, now + minHostIntervalMs); hostNextOkAt.set(host, now + minHostIntervalMs);
return; return;
} }
} }
function noteHost(url, extraDelayMs = 0) { function noteHost(url, extraDelayMs = 0) {
const host = hostFromUrl(url); const host = hostFromUrl(url);
if (!host) return; if (!host) return;
const now = Date.now(); const now = Date.now();
const current = hostNextOkAt.get(host) || 0; const current = hostNextOkAt.get(host) || 0;
// Extend (never shorten) any existing cooldown // Extend (never shorten) any existing cooldown
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs); const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
hostNextOkAt.set(host, Math.max(current, target)); hostNextOkAt.set(host, Math.max(current, target));
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`); logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
} }
async function fetchWithRetry( async function fetchWithRetry(
url, url,
tag, tag,
ua, ua,
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {} { mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {},
) { ) {
for (let attempt = 0; attempt <= maxRetries; attempt++) { for (let attempt = 0; attempt <= maxRetries; attempt++) {
const reqId = ++reqSeq; const reqId = ++reqSeq;
const start = Date.now(); const start = Date.now();
inflight++; inflight++;
logger?.dbg?.(`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`); logger?.dbg?.(
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`,
);
const releaseHost = await acquireHost(url); const releaseHost = await acquireHost(url);
try { try {
await throttleHost(url); await throttleHost(url);
const ctrl = new AbortController(); const ctrl = new AbortController();
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs); const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
const cookieHdr = const cookieHdr =
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : ""; cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
const res = await fetch(url, { const res = await fetch(url, {
method, method,
redirect: "follow", redirect: "follow",
headers: { headers: {
"user-agent": ua || defaultUa, "user-agent": ua || defaultUa,
"accept-language": "en-US,en;q=0.9", "accept-language": "en-US,en;q=0.9",
...(mode === "text" ...(mode === "text"
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" } ? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
: { accept: "application/json, text/plain, */*" }), : { accept: "application/json, text/plain, */*" }),
...(cookieHdr ? { cookie: cookieHdr } : {}), ...(cookieHdr ? { cookie: cookieHdr } : {}),
...headers, ...headers,
}, },
body, body,
signal: ctrl.signal, signal: ctrl.signal,
}).finally(() => clearTimeout(t)); }).finally(() => clearTimeout(t));
const status = res.status; const status = res.status;
const finalUrl = res.url || url; const finalUrl = res.url || url;
const elapsed = Date.now() - start; const elapsed = Date.now() - start;
// Always pace the host a bit after any response // Always pace the host a bit after any response
noteHost(finalUrl); noteHost(finalUrl);
if (cookies) cookieJar.storeFromResponse(url, res); if (cookies) cookieJar.storeFromResponse(url, res);
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`); logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
if (status === 429) { if (status === 429) {
let raMs = retryAfterMs(res); let raMs = retryAfterMs(res);
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it) // ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000); if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
noteHost(finalUrl, raMs); noteHost(finalUrl, raMs);
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`); logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
throw new RetryableError("HTTP 429"); throw new RetryableError("HTTP 429");
} }
if (status === 408 || (status >= 500 && status <= 599)) { if (status === 408 || (status >= 500 && status <= 599)) {
throw new RetryableError(`HTTP ${status}`); throw new RetryableError(`HTTP ${status}`);
} }
if (status >= 400) { if (status >= 400) {
const bodyTxt = await safeText(res); const bodyTxt = await safeText(res);
throw new Error( throw new Error(`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`);
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}` }
);
}
if (mode === "json") { if (mode === "json") {
const txt = await res.text(); const txt = await res.text();
let json; let json;
try { try {
json = JSON.parse(txt); json = JSON.parse(txt);
} catch (e) { } catch (e) {
throw new RetryableError(`Bad JSON: ${e?.message || e}`); throw new RetryableError(`Bad JSON: ${e?.message || e}`);
} }
return { json, ms: elapsed, bytes: txt.length, status, finalUrl }; return { json, ms: elapsed, bytes: txt.length, status, finalUrl };
} }
const text = await res.text(); const text = await res.text();
if (!text || text.length < 200) { if (!text || text.length < 200) {
throw new RetryableError(`Short HTML bytes=${text.length}`); throw new RetryableError(`Short HTML bytes=${text.length}`);
} }
return { text, ms: elapsed, bytes: text.length, status, finalUrl }; return { text, ms: elapsed, bytes: text.length, status, finalUrl };
} catch (e) { } catch (e) {
const retryable = isRetryable(e); const retryable = isRetryable(e);
const host = hostFromUrl(url); const host = hostFromUrl(url);
const nextOk = hostNextOkAt.get(host) || 0; const nextOk = hostNextOkAt.get(host) || 0;
logger?.dbg?.( logger?.dbg?.(
`REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max( `REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max(
0, 0,
nextOk - Date.now() nextOk - Date.now(),
)}ms` )}ms`,
); );
if (!retryable || attempt === maxRetries) throw e; if (!retryable || attempt === maxRetries) throw e;
let delay = backoffMs(attempt); let delay = backoffMs(attempt);
if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now()); if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now());
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`); logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
await sleep(delay); await sleep(delay);
} finally { } finally {
releaseHost(); releaseHost();
inflight--; inflight--;
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`); logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
} }
} }
throw new Error("unreachable"); throw new Error("unreachable");
} }
function fetchTextWithRetry(url, tag, ua, opts) { function fetchTextWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) }); return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
} }
function fetchJsonWithRetry(url, tag, ua, opts) { function fetchJsonWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) }); return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
} }
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr }; return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
} }
module.exports = { createHttpClient, RetryableError }; module.exports = { createHttpClient, RetryableError };

View file

@ -4,55 +4,55 @@ const { C, color } = require("../utils/ansi");
const { ts } = require("../utils/time"); const { ts } = require("../utils/time");
function createLogger({ debug = false, colorize: wantColor = true } = {}) { function createLogger({ debug = false, colorize: wantColor = true } = {}) {
const isTTY = Boolean(process.stdout && process.stdout.isTTY); const isTTY = Boolean(process.stdout && process.stdout.isTTY);
const enabled = Boolean(wantColor && isTTY); const enabled = Boolean(wantColor && isTTY);
function ok(msg) { function ok(msg) {
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg)); console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
} }
function warn(msg) { function warn(msg) {
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg)); console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
} }
function err(msg) { function err(msg) {
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg)); console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
} }
function info(msg) { function info(msg) {
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg)); if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
} }
function dbg(msg) { function dbg(msg) {
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg)); if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
} }
function dim(s) { function dim(s) {
return color(s, C.dim, enabled); return color(s, C.dim, enabled);
} }
function bold(s) { function bold(s) {
return color(s, C.bold, enabled); return color(s, C.bold, enabled);
} }
function paint(s, code) { function paint(s, code) {
return color(s, code, enabled); return color(s, code, enabled);
} }
return { return {
debug, debug,
isTTY, isTTY,
colorize: enabled, colorize: enabled,
C, C,
ok, ok,
warn, warn,
err, err,
info, info,
dbg, dbg,
dim, dim,
bold, bold,
color: paint, color: paint,
}; };
} }
module.exports = { createLogger }; module.exports = { createLogger };

View file

@ -16,189 +16,169 @@ const { runAllStores } = require("./tracker/run_all");
const { renderFinalReport } = require("./tracker/report"); const { renderFinalReport } = require("./tracker/report");
const { ensureDir } = require("./tracker/db"); const { ensureDir } = require("./tracker/db");
const DEFAULT_UA = const DEFAULT_UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
function resolveDir(p, fallback) { function resolveDir(p, fallback) {
const v = String(p || "").trim(); const v = String(p || "").trim();
if (!v) return fallback; if (!v) return fallback;
return path.isAbsolute(v) ? v : path.join(process.cwd(), v); return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
} }
function getFlagValue(argv, flag) { function getFlagValue(argv, flag) {
// Supports: // Supports:
// --stores=a,b // --stores=a,b
// --stores a,b // --stores a,b
const idx = argv.indexOf(flag); const idx = argv.indexOf(flag);
if (idx >= 0) return argv[idx + 1] || ""; if (idx >= 0) return argv[idx + 1] || "";
const pref = `${flag}=`; const pref = `${flag}=`;
for (const a of argv) { for (const a of argv) {
if (a.startsWith(pref)) return a.slice(pref.length); if (a.startsWith(pref)) return a.slice(pref.length);
} }
return ""; return "";
} }
function normToken(s) { function normToken(s) {
return String(s || "") return String(s || "")
.toLowerCase() .toLowerCase()
.trim() .trim()
.replace(/[^a-z0-9]+/g, ""); .replace(/[^a-z0-9]+/g, "");
} }
function parseStoresFilter(raw) { function parseStoresFilter(raw) {
const v = String(raw || "").trim(); const v = String(raw || "").trim();
if (!v) return []; if (!v) return [];
return v return v
.split(",") .split(",")
.map((x) => x.trim()) .map((x) => x.trim())
.filter(Boolean); .filter(Boolean);
} }
function filterStoresOrThrow(stores, wantedListRaw) { function filterStoresOrThrow(stores, wantedListRaw) {
const wanted = parseStoresFilter(wantedListRaw); const wanted = parseStoresFilter(wantedListRaw);
if (!wanted.length) return stores; if (!wanted.length) return stores;
const wantedNorm = wanted.map(normToken).filter(Boolean); const wantedNorm = wanted.map(normToken).filter(Boolean);
const matched = []; const matched = [];
const missing = []; const missing = [];
for (let i = 0; i < wanted.length; i++) { for (let i = 0; i < wanted.length; i++) {
const w = wanted[i]; const w = wanted[i];
const wn = wantedNorm[i]; const wn = wantedNorm[i];
if (!wn) continue; if (!wn) continue;
// match against key/name/host (normalized) // match against key/name/host (normalized)
const hit = stores.find((s) => { const hit = stores.find((s) => {
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean); const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
return candidates.includes(wn); return candidates.includes(wn);
}); });
if (hit) matched.push(hit); if (hit) matched.push(hit);
else missing.push(w); else missing.push(w);
} }
if (missing.length) { if (missing.length) {
const avail = stores const avail = stores.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`).join(", ");
.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`) throw new Error(`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`);
.join(", "); }
throw new Error(
`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`
);
}
// de-dupe by key (in case name+key both matched) // de-dupe by key (in case name+key both matched)
const uniq = []; const uniq = [];
const seen = new Set(); const seen = new Set();
for (const s of matched) { for (const s of matched) {
if (seen.has(s.key)) continue; if (seen.has(s.key)) continue;
seen.add(s.key); seen.add(s.key);
uniq.push(s); uniq.push(s);
} }
return uniq; return uniq;
} }
async function main() { async function main() {
if (typeof fetch !== "function") { if (typeof fetch !== "function") {
throw new Error( throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). ");
"Global fetch() not found. Please use Node.js 18+ (or newer). " }
);
}
const argv = process.argv.slice(2); const argv = process.argv.slice(2);
const args = parseArgs(argv); const args = parseArgs(argv);
const logger = createLogger({ debug: args.debug, colorize: true }); const logger = createLogger({ debug: args.debug, colorize: true });
const config = { const config = {
debug: args.debug, debug: args.debug,
maxPages: args.maxPages, maxPages: args.maxPages,
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
staggerMs: staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
discoveryGuess: discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
discoveryStep: defaultUa: DEFAULT_UA,
args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), defaultParseProducts: parseProductsSierra,
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")),
defaultUa: DEFAULT_UA, reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")),
defaultParseProducts: parseProductsSierra, };
dbDir: resolveDir(
args.dataDir ?? process.env.DATA_DIR,
path.join(process.cwd(), "data", "db")
),
reportDir: resolveDir(
args.reportDir ?? process.env.REPORT_DIR,
path.join(process.cwd(), "reports")
),
};
ensureDir(config.dbDir); ensureDir(config.dbDir);
ensureDir(config.reportDir); ensureDir(config.reportDir);
const http = createHttpClient({ const http = createHttpClient({
maxRetries: config.maxRetries, maxRetries: config.maxRetries,
timeoutMs: config.timeoutMs, timeoutMs: config.timeoutMs,
defaultUa: config.defaultUa, defaultUa: config.defaultUa,
logger, logger,
}); });
const stores = createStores({ defaultUa: config.defaultUa }); const stores = createStores({ defaultUa: config.defaultUa });
const storesFilterRaw = const storesFilterRaw = getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
const storesToRun = filterStoresOrThrow(stores, storesFilterRaw); const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
if (storesFilterRaw) { if (storesFilterRaw) {
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`); logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
} }
const report = await runAllStores(storesToRun, { config, logger, http }); const report = await runAllStores(storesToRun, { config, logger, http });
const meaningful = const meaningful =
(report?.totals?.newCount || 0) + (report?.totals?.newCount || 0) +
(report?.totals?.updatedCount || 0) + (report?.totals?.updatedCount || 0) +
(report?.totals?.removedCount || 0) + (report?.totals?.removedCount || 0) +
(report?.totals?.restoredCount || 0) + (report?.totals?.restoredCount || 0) +
(report?.totals?.metaChangedCount || 0) > (report?.totals?.metaChangedCount || 0) >
0; 0;
const reportTextColor = renderFinalReport(report, { const reportTextColor = renderFinalReport(report, {
dbDir: config.dbDir, dbDir: config.dbDir,
colorize: logger.colorize, colorize: logger.colorize,
}); });
process.stdout.write(reportTextColor); process.stdout.write(reportTextColor);
if (!meaningful) { if (!meaningful) {
logger.ok("No meaningful changes; skipping report write."); logger.ok("No meaningful changes; skipping report write.");
process.exitCode = 3; // special "no-op" code process.exitCode = 3; // special "no-op" code
return; return;
} }
const reportTextPlain = renderFinalReport(report, { const reportTextPlain = renderFinalReport(report, {
dbDir: config.dbDir, dbDir: config.dbDir,
colorize: false, colorize: false,
}); });
const file = path.join( const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`);
config.reportDir, try {
`${isoTimestampFileSafe(new Date())}.txt` fs.writeFileSync(file, reportTextPlain, "utf8");
); logger.ok(`Report saved: ${logger.dim(file)}`);
try { } catch (e) {
fs.writeFileSync(file, reportTextPlain, "utf8"); logger.warn(`Report save failed: ${e?.message || e}`);
logger.ok(`Report saved: ${logger.dim(file)}`); }
} catch (e) {
logger.warn(`Report save failed: ${e?.message || e}`);
}
} }
module.exports = { main }; module.exports = { main };
if (require.main === module) { if (require.main === module) {
main().catch((e) => { main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e); const msg = e && e.stack ? e.stack : String(e);
// no logger here; keep simple // no logger here; keep simple
console.error(msg); console.error(msg);
process.exitCode = 1; process.exitCode = 1;
}); });
} }

View file

@ -11,368 +11,374 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes || 0).padStart(8, " "); return humanBytes(bytes || 0).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`; const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function toNum(v) { function toNum(v) {
const s = String(v ?? "").trim(); const s = String(v ?? "").trim();
if (!s) return NaN; if (!s) return NaN;
const n = Number(s.replace(/[^0-9.]/g, "")); const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN; return Number.isFinite(n) ? n : NaN;
} }
function money(v) { function money(v) {
const n = toNum(v); const n = toNum(v);
if (!Number.isFinite(n) || n <= 0) return ""; if (!Number.isFinite(n) || n <= 0) return "";
return `$${n.toFixed(2)}`; return `$${n.toFixed(2)}`;
} }
function pickBestPrice(p) { function pickBestPrice(p) {
const reg = toNum(p?.regular_price); const reg = toNum(p?.regular_price);
const sale = toNum(p?.sale_price); const sale = toNum(p?.sale_price);
const net = toNum(p?.net_price); const net = toNum(p?.net_price);
// Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular. // Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular.
if (Number.isFinite(sale) && sale > 0) { if (Number.isFinite(sale) && sale > 0) {
if (p?.is_sale === true) return money(sale); if (p?.is_sale === true) return money(sale);
if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale); if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale);
// Some feeds put the current price in sale_price even without flags: // Some feeds put the current price in sale_price even without flags:
if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale); if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale);
} }
if (Number.isFinite(net) && net > 0) return money(net); if (Number.isFinite(net) && net > 0) return money(net);
if (Number.isFinite(reg) && reg > 0) return money(reg); if (Number.isFinite(reg) && reg > 0) return money(reg);
return ""; return "";
} }
function normAbsUrl(raw, base) { function normAbsUrl(raw, base) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s.replace(/^\/+/, ""), base).toString(); return new URL(s.replace(/^\/+/, ""), base).toString();
} catch { } catch {
return s; return s;
} }
} }
function isInStock(p) { function isInStock(p) {
// Keep this strict: user asked "only show in stock items". // Keep this strict: user asked "only show in stock items".
// available_for_sale is the strongest signal; on_hand is a good secondary signal. // available_for_sale is the strongest signal; on_hand is a good secondary signal.
if (p && p.available_for_sale === false) return false; if (p && p.available_for_sale === false) return false;
const onHand = Number(p?.on_hand); const onHand = Number(p?.on_hand);
if (Number.isFinite(onHand)) return onHand > 0; if (Number.isFinite(onHand)) return onHand > 0;
// If on_hand is missing, fall back to available_for_sale truthiness. // If on_hand is missing, fall back to available_for_sale truthiness.
return Boolean(p?.available_for_sale); return Boolean(p?.available_for_sale);
} }
function arcNormalizeImg(raw) { function arcNormalizeImg(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
// already public // already public
if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s; if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s;
// site-relative -> public CDN // site-relative -> public CDN
const noProto = s.replace(/^https?:\/\/[^/]+/i, ""); const noProto = s.replace(/^https?:\/\/[^/]+/i, "");
const rel = noProto.replace(/^\/+/, ""); const rel = noProto.replace(/^\/+/, "");
// common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg" // common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg"
if (/^(custom\/|bc_lrs\/)/i.test(rel)) { if (/^(custom\/|bc_lrs\/)/i.test(rel)) {
return `https://s.barnetnetwork.com/img/m/${rel}`; return `https://s.barnetnetwork.com/img/m/${rel}`;
} }
// fallback: if it's any path, still try the CDN // fallback: if it's any path, still try the CDN
if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`; if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`;
return ""; return "";
} }
function arcItemToTracked(p, ctx) { function arcItemToTracked(p, ctx) {
if (!p) return null; if (!p) return null;
if (!isInStock(p)) return null; if (!isInStock(p)) return null;
const url = normAbsUrl(p.url, `https://${ctx.store.host}/`); const url = normAbsUrl(p.url, `https://${ctx.store.host}/`);
if (!url) return null; if (!url) return null;
const name = cleanText(p.description || p.name || ""); const name = cleanText(p.description || p.name || "");
if (!name) return null; if (!name) return null;
const price = pickBestPrice(p); const price = pickBestPrice(p);
const rawCspcId = String(p?.cspcid ?? "").trim(); const rawCspcId = String(p?.cspcid ?? "").trim();
const hasCspcId = /^\d{1,11}$/.test(rawCspcId); const hasCspcId = /^\d{1,11}$/.test(rawCspcId);
const id = Number(p?.id); const id = Number(p?.id);
const rawSku = const rawSku = hasCspcId ? `id:${rawCspcId}` : Number.isFinite(id) ? `id:${id}` : "";
hasCspcId ? `id:${rawCspcId}` :
Number.isFinite(id) ? `id:${id}` :
"";
const sku = const sku = normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
const img = arcNormalizeImg(p.image || p.image_url || p.img || "");
const img = arcNormalizeImg(p.image || p.image_url || p.img || ""); return { name, price, url, sku, img };
}
return { name, price, url, sku, img };
}
function parseCategoryParamsFromStartUrl(startUrl) { function parseCategoryParamsFromStartUrl(startUrl) {
try { try {
const u = new URL(startUrl); const u = new URL(startUrl);
const category = u.searchParams.get("category") || ""; const category = u.searchParams.get("category") || "";
const sub = u.searchParams.get("sub_category") || ""; const sub = u.searchParams.get("sub_category") || "";
return { category, sub }; return { category, sub };
} catch { } catch {
return { category: "", sub: "" }; return { category: "", sub: "" };
} }
} }
function avoidMassRemoval(prevDb, discovered, ctx, reason) { function avoidMassRemoval(prevDb, discovered, ctx, reason) {
const prevSize = prevDb?.byUrl?.size || 0; const prevSize = prevDb?.byUrl?.size || 0;
const discSize = discovered?.size || 0; const discSize = discovered?.size || 0;
if (prevSize <= 0 || discSize <= 0) return false; if (prevSize <= 0 || discSize <= 0) return false;
const ratio = discSize / Math.max(1, prevSize); const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false; if (ratio >= 0.6) return false;
ctx.logger.warn?.( ctx.logger.warn?.(
`${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).` `${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
); );
// Preserve prior active items not seen this run. // Preserve prior active items not seen this run.
for (const [u, it] of prevDb.byUrl.entries()) { for (const [u, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue; if (!it || it.removed) continue;
if (!discovered.has(u)) discovered.set(u, it); if (!discovered.has(u)) discovered.set(u, it);
} }
return true; return true;
} }
async function scanCategoryArcApi(ctx, prevDb, report) { async function scanCategoryArcApi(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
// Warm cookies / session (Barnet-based shops sometimes need this) // Warm cookies / session (Barnet-based shops sometimes need this)
try { try {
await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua); await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua);
} catch (_) {} } catch (_) {}
const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl); const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl);
const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim(); const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim();
const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim(); const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim();
if (!subCategory) { if (!subCategory) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`); ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`);
return; return;
} }
const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`); const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`);
const discovered = new Map(); const discovered = new Map();
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const hardCap = Math.min(5000, Math.max(1, maxPagesCap)); const hardCap = Math.min(5000, Math.max(1, maxPagesCap));
let donePages = 0; let donePages = 0;
let aborted = false; let aborted = false;
// Pagination safety // Pagination safety
let pageSize = 0; // inferred from first non-empty page let pageSize = 0; // inferred from first non-empty page
const seenPageFingerprints = new Set(); const seenPageFingerprints = new Set();
let stagnantPages = 0; let stagnantPages = 0;
for (let page = 1; page <= hardCap; page++) { for (let page = 1; page <= hardCap; page++) {
const u = new URL(apiBase.toString()); const u = new URL(apiBase.toString());
u.searchParams.set("p", String(page)); u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true"); u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc")); u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc"));
u.searchParams.set("category", category); u.searchParams.set("category", category);
u.searchParams.set("sub_category", subCategory); u.searchParams.set("sub_category", subCategory);
u.searchParams.set("varital_name", ""); u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found."); u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false"); u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Date.now())); u.searchParams.set("_dc", String(Date.now()));
let r; let r;
try { try {
r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, { r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "application/json, */*", Accept: "application/json, */*",
"X-Requested-With": "XMLHttpRequest", "X-Requested-With": "XMLHttpRequest",
Referer: ctx.cat.startUrl, Referer: ctx.cat.startUrl,
}, },
}); });
} catch (e) { } catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`); ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`);
aborted = true; aborted = true;
break; break;
} }
const arr = Array.isArray(r?.json?.items) ? r.json.items : []; const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
donePages++; donePages++;
const rawCount = arr.length; const rawCount = arr.length;
// Log early (even for empty) // Log early (even for empty)
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd( `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
3 .toString()
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( .padEnd(3)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
ctx.http.inflightStr(), ctx.http.inflightStr(),
11 11,
)} | ${secStr(r.ms)}` )} | ${secStr(r.ms)}`,
); );
if (!rawCount) break; if (!rawCount) break;
// Infer page size from first non-empty page // Infer page size from first non-empty page
if (!pageSize) pageSize = rawCount; if (!pageSize) pageSize = rawCount;
// Detect wrap/repeat: fingerprint by ids+urls (stable enough) // Detect wrap/repeat: fingerprint by ids+urls (stable enough)
const fp = arr const fp = arr
.map((p) => `${p?.id || ""}:${p?.url || ""}`) .map((p) => `${p?.id || ""}:${p?.url || ""}`)
.sort() .sort()
.join("|"); .join("|");
if (fp && seenPageFingerprints.has(fp)) { if (fp && seenPageFingerprints.has(fp)) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`); ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`);
break; break;
} }
if (fp) seenPageFingerprints.add(fp); if (fp) seenPageFingerprints.add(fp);
const before = discovered.size; const before = discovered.size;
let kept = 0; let kept = 0;
for (const p of arr) { for (const p of arr) {
const it = arcItemToTracked(p, ctx); const it = arcItemToTracked(p, ctx);
if (!it) continue; if (!it) continue;
discovered.set(it.url, it); discovered.set(it.url, it);
kept++; kept++;
} }
// Re-log with kept filled in (overwrite-style isnt possible; just emit a second line) // Re-log with kept filled in (overwrite-style isnt possible; just emit a second line)
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd( `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
3 .toString()
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( .padEnd(
ctx.http.inflightStr(), 3,
11 )} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
)} | ${secStr(r.ms)}` ctx.http.inflightStr(),
); 11,
)} | ${secStr(r.ms)}`,
);
// Stop condition #1: last page (short page) // Stop condition #1: last page (short page)
if (pageSize && rawCount < pageSize) break; if (pageSize && rawCount < pageSize) break;
// Stop condition #2: no new uniques for 2 pages (safety) // Stop condition #2: no new uniques for 2 pages (safety)
if (discovered.size === before) stagnantPages++; if (discovered.size === before) stagnantPages++;
else stagnantPages = 0; else stagnantPages = 0;
if (stagnantPages >= 2) { if (stagnantPages >= 2) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`); ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`);
break; break;
} }
} }
if (aborted) { if (aborted) {
avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`); avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`);
} }
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsedMs = Date.now() - t0; const elapsedMs = Date.now() - t0;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: Math.max(1, donePages), scannedPages: Math.max(1, donePages),
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length, metaChangedCount: metaChangedItems.length,
elapsedMs, elapsedMs,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length; report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "arc", key: "arc",
name: "ARC Liquor", name: "ARC Liquor",
host: "kelownaharveyave.armstrong.coop", host: "kelownaharveyave.armstrong.coop",
shopId: "644-290", shopId: "644-290",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryArcApi, scanCategory: scanCategoryArcApi,
categories: [ categories: [
{ {
key: "spirits-rum", key: "spirits-rum",
label: "Spirits - Rum", label: "Spirits - Rum",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum", startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum",
arcCategory: "Spirits", arcCategory: "Spirits",
arcSubCategory: "Rum", arcSubCategory: "Rum",
sortBy: "price_desc", sortBy: "price_desc",
}, },
{ {
key: "spirits-scotch", key: "spirits-scotch",
label: "Spirits - Scotch", label: "Spirits - Scotch",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch", startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch",
arcCategory: "Spirits", arcCategory: "Spirits",
arcSubCategory: "Scotch", arcSubCategory: "Scotch",
sortBy: "price_desc", sortBy: "price_desc",
}, },
{ {
key: "spirits-whiskey", key: "spirits-whiskey",
label: "Spirits - Whiskey", label: "Spirits - Whiskey",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey", startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey",
arcCategory: "Spirits", arcCategory: "Spirits",
arcSubCategory: "Whiskey", arcSubCategory: "Whiskey",
sortBy: "price_desc", sortBy: "price_desc",
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -9,332 +9,360 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function cad(n) { function cad(n) {
const x = Number(n); const x = Number(n);
if (!Number.isFinite(x)) return ""; if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`; return `$${x.toFixed(2)}`;
} }
function asNumber(n) { function asNumber(n) {
if (n == null) return NaN; if (n == null) return NaN;
if (typeof n === "number") return n; if (typeof n === "number") return n;
const t = String(n).trim(); const t = String(n).trim();
if (!t) return NaN; if (!t) return NaN;
const x = Number(t.replace(/[^0-9.]/g, "")); const x = Number(t.replace(/[^0-9.]/g, ""));
return x; return x;
} }
function bclTotalHits(json) { function bclTotalHits(json) {
const t = json?.hits?.total; const t = json?.hits?.total;
if (typeof t === "number") return t; if (typeof t === "number") return t;
if (t && typeof t.value === "number") return t.value; // ES-style if (t && typeof t.value === "number") return t.value; // ES-style
return 0; return 0;
} }
function bclIsInStock(src) { function bclIsInStock(src) {
const candidates = [ const candidates = [
src?.availability_override, // <-- add this src?.availability_override, // <-- add this
src?.availability, src?.availability,
src?.availabilityText, src?.availabilityText,
src?.availabilityStatus, src?.availabilityStatus,
src?.availability_status, src?.availability_status,
src?.stockStatus, src?.stockStatus,
src?.stock_status, src?.stock_status,
src?.status, src?.status,
src?.statusText, src?.statusText,
] ]
.map((v) => (v == null ? "" : String(v))) .map((v) => (v == null ? "" : String(v)))
.filter(Boolean); .filter(Boolean);
for (const s of candidates) { for (const s of candidates) {
if (/out of stock/i.test(s)) return false; if (/out of stock/i.test(s)) return false;
if (/\bin stock\b/i.test(s)) return true; if (/\bin stock\b/i.test(s)) return true;
if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07" if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07"
} }
const units = Number(src?.availableUnits); const units = Number(src?.availableUnits);
if (Number.isFinite(units)) return units > 0; if (Number.isFinite(units)) return units > 0;
return true; return true;
} }
function bclNormalizeAbsUrl(raw) { function bclNormalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, "https://www.bcliquorstores.com/").toString(); return new URL(s, "https://www.bcliquorstores.com/").toString();
} catch { } catch {
return s; return s;
} }
} }
function bclPickImage(src) { function bclPickImage(src) {
const cands = [ const cands = [
src?.imageUrl, src?.imageUrl,
src?.imageURL, src?.imageURL,
src?.image, src?.image,
src?.thumbnail, src?.thumbnail,
src?.thumbnailUrl, src?.thumbnailUrl,
src?.thumbnailURL, src?.thumbnailURL,
src?.primaryImage, src?.primaryImage,
src?.primaryImageUrl, src?.primaryImageUrl,
]; ];
for (const c of cands) { for (const c of cands) {
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c); if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
} }
const arrs = [src?.images, src?.imageUrls, src?.image_urls]; const arrs = [src?.images, src?.imageUrls, src?.image_urls];
for (const a of arrs) { for (const a of arrs) {
if (!Array.isArray(a) || !a.length) continue; if (!Array.isArray(a) || !a.length) continue;
const v = a[0]; const v = a[0];
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v); if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
if (v && typeof v === "object") { if (v && typeof v === "object") {
const s = String(v.src || v.url || "").trim(); const s = String(v.src || v.url || "").trim();
if (s) return bclNormalizeAbsUrl(s); if (s) return bclNormalizeAbsUrl(s);
} }
} }
return ""; return "";
} }
function bclHitToItem(hit) { function bclHitToItem(hit) {
const src = hit?._source || null; const src = hit?._source || null;
if (!src) return null; if (!src) return null;
const skuRaw = src.sku != null ? String(src.sku).trim() : ""; const skuRaw = src.sku != null ? String(src.sku).trim() : "";
if (!skuRaw) return null; if (!skuRaw) return null;
// SKU in URL (requested) // SKU in URL (requested)
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`; const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
const name = String(src.name || "").trim(); const name = String(src.name || "").trim();
if (!name) return null; if (!name) return null;
// Sale support: pick currentPrice when present; otherwise regularPrice. // Sale support: pick currentPrice when present; otherwise regularPrice.
const current = asNumber(src.currentPrice); const current = asNumber(src.currentPrice);
const regular = asNumber(src.regularPrice); const regular = asNumber(src.regularPrice);
const price = cad(Number.isFinite(current) ? current : regular); const price = cad(Number.isFinite(current) ? current : regular);
// SKU key: // SKU key:
// - Keep CSPC 6-digit when present (rare for BCL, but safe) // - Keep CSPC 6-digit when present (rare for BCL, but safe)
// - Otherwise upgrade to an explicit soft key: id:<digits> // - Otherwise upgrade to an explicit soft key: id:<digits>
// //
// ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits> // ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits>
// only fall back to raw (NOT u:) if its genuinely non-numeric. // only fall back to raw (NOT u:) if its genuinely non-numeric.
let sku = normalizeCspc(skuRaw); let sku = normalizeCspc(skuRaw);
if (!sku) { if (!sku) {
const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc. const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc.
sku = m ? `id:${m[0]}` : `id:${skuRaw}`; sku = m ? `id:${m[0]}` : `id:${skuRaw}`;
} }
const inStock = bclIsInStock(src); const inStock = bclIsInStock(src);
if (!inStock) return null; if (!inStock) return null;
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs. // ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
// Also use https. // Also use https.
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent( const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
skuRaw skuRaw,
)}.jpg`; )}.jpg`;
return { name, price, url, sku, img }; return { name, price, url, sku, img };
} }
async function bclFetchBrowsePage(ctx, page1, size) { async function bclFetchBrowsePage(ctx, page1, size) {
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey" const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
const category = "spirits"; const category = "spirits";
const sort = "featuredProducts:desc"; const sort = "featuredProducts:desc";
const u = new URL("https://www.bcliquorstores.com/ajax/browse"); const u = new URL("https://www.bcliquorstores.com/ajax/browse");
u.searchParams.set("category", category); u.searchParams.set("category", category);
u.searchParams.set("type", type); u.searchParams.set("type", type);
u.searchParams.set("sort", sort); u.searchParams.set("sort", sort);
u.searchParams.set("size", String(size)); u.searchParams.set("size", String(size));
u.searchParams.set("page", String(page1)); u.searchParams.set("page", String(page1));
const referer = const referer =
`https://www.bcliquorstores.com/product-catalogue?` + `https://www.bcliquorstores.com/product-catalogue?` +
`category=${encodeURIComponent(category)}` + `category=${encodeURIComponent(category)}` +
`&type=${encodeURIComponent(type)}` + `&type=${encodeURIComponent(type)}` +
`&sort=${encodeURIComponent(sort)}` + `&sort=${encodeURIComponent(sort)}` +
`&page=${encodeURIComponent(String(page1))}`; `&page=${encodeURIComponent(String(page1))}`;
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "application/json, text/plain, */*", Accept: "application/json, text/plain, */*",
Referer: referer, Referer: referer,
Origin: "https://www.bcliquorstores.com", Origin: "https://www.bcliquorstores.com",
}, },
}); });
} }
async function scanCategoryBCLAjax(ctx, prevDb, report) { async function scanCategoryBCLAjax(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
const size = 24; const size = 24;
let first; let first;
try { try {
first = await bclFetchBrowsePage(ctx, 1, size); first = await bclFetchBrowsePage(ctx, 1, size);
} catch (e) { } catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`); ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
const discovered = new Map(); const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
const dbObj = buildDbObject(ctx, merged); prevDb,
writeJsonAtomic(ctx.dbFile, dbObj); discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: 1, scannedPages: 1,
discoveredUnique: 0, discoveredUnique: 0,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
return; report,
} ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const total = bclTotalHits(first?.json); const total = bclTotalHits(first?.json);
const totalPages = Math.max(1, Math.ceil(total / size)); const totalPages = Math.max(1, Math.ceil(total / size));
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); ctx.logger.ok(
`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pageNums = []; const pageNums = [];
for (let p = 1; p <= scanPages; p++) pageNums.push(p); for (let p = 1; p <= scanPages; p++) pageNums.push(p);
let donePages = 0; let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered( const perPageItems = await require("../utils/async").parallelMapStaggered(
pageNums, pageNums,
ctx.config.concurrency, ctx.config.concurrency,
ctx.config.staggerMs, ctx.config.staggerMs,
async (page1, idx) => { async (page1, idx) => {
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size); const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : []; const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
const items = []; const items = [];
for (const h of hits) { for (const h of hits) {
const it = bclHitToItem(h); const it = bclHitToItem(h);
if (it) items.push(it); if (it) items.push(it);
} }
donePages++; donePages++;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft( `${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
items.length, items.length,
3 3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
); );
return items; return items;
} },
); );
const discovered = new Map(); const discovered = new Map();
let dups = 0; let dups = 0;
for (const arr of perPageItems) { for (const arr of perPageItems) {
for (const it of arr) { for (const it of arr) {
if (discovered.has(it.url)) dups++; if (discovered.has(it.url)) dups++;
discovered.set(it.url, it); discovered.set(it.url, it);
} }
} }
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: scanPages, scannedPages: scanPages,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "bcl", key: "bcl",
name: "BCL", name: "BCL",
host: "www.bcliquorstores.com", host: "www.bcliquorstores.com",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse) scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky / Whiskey", label: "Whisky / Whiskey",
// informational only; scan uses ajax/browse // informational only; scan uses ajax/browse
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1", startUrl:
bclType: "whisky / whiskey", "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
}, bclType: "whisky / whiskey",
{ },
key: "rum", {
label: "Rum", key: "rum",
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1", label: "Rum",
bclType: "rum", startUrl:
}, "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
], bclType: "rum",
}; },
],
};
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -14,380 +14,416 @@ const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2";
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`; const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
function usd(n) { function usd(n) {
if (!Number.isFinite(n)) return ""; if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`; return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
} }
function bswExtractCollectionIdFromHtml(html) { function bswExtractCollectionIdFromHtml(html) {
const s = String(html || ""); const s = String(html || "");
const patterns = [ const patterns = [
/collection_ids%3A(\d{6,})/i, /collection_ids%3A(\d{6,})/i,
/collection_ids\s*:\s*(\d{6,})/i, /collection_ids\s*:\s*(\d{6,})/i,
/"collection_ids"\s*:\s*(\d{6,})/i, /"collection_ids"\s*:\s*(\d{6,})/i,
/"collection_id"\s*:\s*(\d{6,})/i, /"collection_id"\s*:\s*(\d{6,})/i,
/collection_id\s*=\s*(\d{6,})/i, /collection_id\s*=\s*(\d{6,})/i,
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i, /collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
/data-collection-id=["'](\d{6,})["']/i, /data-collection-id=["'](\d{6,})["']/i,
]; ];
for (const re of patterns) { for (const re of patterns) {
const m = s.match(re); const m = s.match(re);
if (m && m[1]) return Number.parseInt(m[1], 10); if (m && m[1]) return Number.parseInt(m[1], 10);
} }
return null; return null;
} }
function bswFormatPrice(value, hintCents) { function bswFormatPrice(value, hintCents) {
if (value === null || value === undefined) return ""; if (value === null || value === undefined) return "";
if (typeof value === "string") { if (typeof value === "string") {
const t = value.trim(); const t = value.trim();
if (!t) return ""; if (!t) return "";
if (t.includes("$")) return t.replace(/\s+/g, ""); if (t.includes("$")) return t.replace(/\s+/g, "");
const n = Number(t.replace(/[^0-9.]/g, "")); const n = Number(t.replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return t; if (!Number.isFinite(n)) return t;
return usd(n); return usd(n);
} }
if (typeof value === "number") { if (typeof value === "number") {
let n = value; let n = value;
if (hintCents) n = n / 100; if (hintCents) n = n / 100;
else if (Number.isInteger(n) && n >= 100000) n = n / 100; else if (Number.isInteger(n) && n >= 100000) n = n / 100;
return usd(n); return usd(n);
} }
return ""; return "";
} }
function bswPickPrice(hit) { function bswPickPrice(hit) {
const pick = (val, cents) => ({ val, cents }); const pick = (val, cents) => ({ val, cents });
if (hit && hit.price_cents != null) return pick(hit.price_cents, true); if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true); if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
if (hit && hit.price != null) return pick(hit.price, false); if (hit && hit.price != null) return pick(hit.price, false);
if (hit && hit.price_min != null) return pick(hit.price_min, false); if (hit && hit.price_min != null) return pick(hit.price_min, false);
if (hit && hit.priceMin != null) return pick(hit.priceMin, false); if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
if (hit && hit.min_price != null) return pick(hit.min_price, false); if (hit && hit.min_price != null) return pick(hit.min_price, false);
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false); if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) { if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
const v = hit.variants[0]; const v = hit.variants[0];
if (v.price_cents != null) return pick(v.price_cents, true); if (v.price_cents != null) return pick(v.price_cents, true);
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true); if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
if (v.price != null) return pick(v.price, false); if (v.price != null) return pick(v.price, false);
} }
return pick(null, false); return pick(null, false);
} }
function bswHitToItem(hit) { function bswHitToItem(hit) {
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || "")); const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
const handle = hit && (hit.handle || hit.product_handle || hit.slug || ""); const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
const url = const url =
(hit && (hit.url || hit.product_url)) || (hit && (hit.url || hit.product_url)) ||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : ""); (handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
const { val: priceVal, cents: hintCents } = bswPickPrice(hit); const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
const price = bswFormatPrice(priceVal, hintCents); const price = bswFormatPrice(priceVal, hintCents);
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || ""); const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
const img = bswPickImage(hit); const img = bswPickImage(hit);
if (!name || !url) return null; if (!name || !url) return null;
return { name, price, url, sku, img }; return { name, price, url, sku, img };
} }
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) { async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`; const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
const params = const params =
`facets=%5B%22price%22%2C%22*%22%5D` + `facets=%5B%22price%22%2C%22*%22%5D` +
`&filters=${encodeURIComponent(filtersExpr)}` + `&filters=${encodeURIComponent(filtersExpr)}` +
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` + `&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
`&page=${encodeURIComponent(String(page0))}` + `&page=${encodeURIComponent(String(page0))}` +
`&query=` + `&query=` +
`&clickAnalytics=true` + `&clickAnalytics=true` +
`&maxValuesPerFacet=100` + `&maxValuesPerFacet=100` +
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : ""); (ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
const bodyObj = { requests: [{ indexName: "shopify_products", params }] }; const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
method: "POST", method: "POST",
headers: { headers: {
Accept: "*/*", Accept: "*/*",
"content-type": "application/x-www-form-urlencoded", "content-type": "application/x-www-form-urlencoded",
Origin: "https://www.bswliquor.com", Origin: "https://www.bswliquor.com",
Referer: "https://www.bswliquor.com/", Referer: "https://www.bswliquor.com/",
"x-algolia-api-key": BSW_ALGOLIA_API_KEY, "x-algolia-api-key": BSW_ALGOLIA_API_KEY,
"x-algolia-application-id": BSW_ALGOLIA_APP_ID, "x-algolia-application-id": BSW_ALGOLIA_APP_ID,
}, },
body: JSON.stringify(bodyObj), body: JSON.stringify(bodyObj),
}); });
} }
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function bswNormalizeAbsUrl(raw) { function bswNormalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, "https://www.bswliquor.com/").toString(); return new URL(s, "https://www.bswliquor.com/").toString();
} catch { } catch {
return s; return s;
} }
} }
function bswNormalizeImg(v) { function bswNormalizeImg(v) {
if (!v) return ""; if (!v) return "";
if (typeof v === "string") return bswNormalizeAbsUrl(v); if (typeof v === "string") return bswNormalizeAbsUrl(v);
if (typeof v === "object") { if (typeof v === "object") {
const cands = [ const cands = [
v.src, v.src,
v.url, v.url,
v.originalSrc, v.originalSrc,
v.original_src, v.original_src,
v.original, v.original,
v.secure_url, v.secure_url,
v.large, v.large,
v.medium, v.medium,
v.small, v.small,
]; ];
for (const c of cands) { for (const c of cands) {
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c); if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
} }
} }
return ""; return "";
} }
function bswPickImage(hit) { function bswPickImage(hit) {
const cands = [ const cands = [
hit?.image, hit?.image,
hit?.image_url, hit?.image_url,
hit?.imageUrl, hit?.imageUrl,
hit?.imageURL, hit?.imageURL,
hit?.featured_image, hit?.featured_image,
hit?.featured_image_url, hit?.featured_image_url,
hit?.featuredImage, hit?.featuredImage,
hit?.featuredImageUrl, hit?.featuredImageUrl,
hit?.product_image, hit?.product_image,
hit?.product_image_url, hit?.product_image_url,
hit?.productImage, hit?.productImage,
hit?.productImageUrl, hit?.productImageUrl,
hit?.thumbnail, hit?.thumbnail,
hit?.thumbnail_url, hit?.thumbnail_url,
hit?.thumbnailUrl, hit?.thumbnailUrl,
]; ];
for (const c of cands) { for (const c of cands) {
const s = bswNormalizeImg(c); const s = bswNormalizeImg(c);
if (s) return s; if (s) return s;
} }
if (Array.isArray(hit?.images)) { if (Array.isArray(hit?.images)) {
for (const im of hit.images) { for (const im of hit.images) {
const s = bswNormalizeImg(im); const s = bswNormalizeImg(im);
if (s) return s; if (s) return s;
} }
} }
if (Array.isArray(hit?.media)) { if (Array.isArray(hit?.media)) {
for (const im of hit.media) { for (const im of hit.media) {
const s = bswNormalizeImg(im); const s = bswNormalizeImg(im);
if (s) return s; if (s) return s;
} }
} }
return ""; return "";
} }
async function scanCategoryBSWAlgolia(ctx, prevDb, report) { async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null; let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
if (!collectionId) { if (!collectionId) {
try { try {
const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua); const { text: html } = await ctx.http.fetchTextWithRetry(
collectionId = bswExtractCollectionIdFromHtml(html); ctx.cat.startUrl,
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`); `bsw:html:${ctx.cat.key}`,
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`); ctx.store.ua,
} catch (e) { );
ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`); collectionId = bswExtractCollectionIdFromHtml(html);
} if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
} else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
} catch (e) {
ctx.logger.warn(
`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`,
);
}
}
if (!collectionId) { if (!collectionId) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`); ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
const discovered = new Map(); const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
const dbObj = buildDbObject(ctx, merged); prevDb,
writeJsonAtomic(ctx.dbFile, dbObj); discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: 1, scannedPages: 1,
discoveredUnique: 0, discoveredUnique: 0,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
return; report,
} ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const ruleContext = ctx.cat.bswRuleContext || ""; const ruleContext = ctx.cat.bswRuleContext || "";
const hitsPerPage = 50; const hitsPerPage = 50;
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage); const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
const result0 = first?.json?.results?.[0] || null; const result0 = first?.json?.results?.[0] || null;
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1; const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
const totalPages = Math.max(1, nbPages); const totalPages = Math.max(1, nbPages);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); ctx.logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pageIdxs = []; const pageIdxs = [];
for (let p = 0; p < scanPages; p++) pageIdxs.push(p); for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
let donePages = 0; let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => { const perPageItems = await require("../utils/async").parallelMapStaggered(
const pnum = idx + 1; pageIdxs,
const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage); ctx.config.concurrency,
ctx.config.staggerMs,
async (page0, idx) => {
const pnum = idx + 1;
const r =
page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
const res0 = r?.json?.results?.[0] || null; const res0 = r?.json?.results?.[0] || null;
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : []; const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
const items = []; const items = [];
for (const h of hits) { for (const h of hits) {
const it = bswHitToItem(h); const it = bswHitToItem(h);
if (it) items.push(it); if (it) items.push(it);
} }
donePages++; donePages++;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft( `${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
items.length, items.length,
3 3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
); );
return items; return items;
}); },
);
const discovered = new Map(); const discovered = new Map();
let dups = 0; let dups = 0;
for (const arr of perPageItems) { for (const arr of perPageItems) {
for (const it of arr) { for (const it of arr) {
if (discovered.has(it.url)) dups++; if (discovered.has(it.url)) dups++;
discovered.set(it.url, it); discovered.set(it.url, it);
} }
} }
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: scanPages, scannedPages: scanPages,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "bsw", key: "bsw",
name: "BSW", name: "BSW",
host: "www.bswliquor.com", host: "www.bswliquor.com",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryBSWAlgolia, scanCategory: scanCategoryBSWAlgolia,
categories: [ categories: [
{ {
key: "scotch-whisky", key: "scotch-whisky",
label: "Scotch Whisky", label: "Scotch Whisky",
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1", startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
bswRuleContext: "scotch-whisky", bswRuleContext: "scotch-whisky",
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://www.bswliquor.com/collections/rum?page=1", startUrl: "https://www.bswliquor.com/collections/rum?page=1",
bswRuleContext: "rum", bswRuleContext: "rum",
}, },
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: "https://www.bswliquor.com/collections/whisky?page=1", startUrl: "https://www.bswliquor.com/collections/whisky?page=1",
bswRuleContext: "whisky", bswRuleContext: "whisky",
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -11,20 +11,20 @@ const { addCategoryResultToReport } = require("../tracker/report");
/* ---------------- formatting ---------------- */ /* ---------------- formatting ---------------- */
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10; const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " "); return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const w = String(total).length; const w = String(total).length;
return `${padLeft(i, w)}/${total}`; return `${padLeft(i, w)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
/* ---------------- co-op specifics ---------------- */ /* ---------------- co-op specifics ---------------- */
@ -33,327 +33,352 @@ const BASE = "https://shoponlinewhisky-wine.coopwinespiritsbeer.com";
const REFERER = `${BASE}/worldofwhisky`; const REFERER = `${BASE}/worldofwhisky`;
function coopHeaders(ctx, sourcepage) { function coopHeaders(ctx, sourcepage) {
const coop = ctx.store.coop; const coop = ctx.store.coop;
return { return {
Accept: "application/json, text/javascript, */*; q=0.01", Accept: "application/json, text/javascript, */*; q=0.01",
"Content-Type": "application/json", "Content-Type": "application/json",
Origin: BASE, Origin: BASE,
Referer: REFERER, Referer: REFERER,
// these 4 are required on their API calls (matches browser) // these 4 are required on their API calls (matches browser)
SessionKey: coop.sessionKey, SessionKey: coop.sessionKey,
chainID: coop.chainId, chainID: coop.chainId,
storeID: coop.storeId, storeID: coop.storeId,
appVersion: coop.appVersion, appVersion: coop.appVersion,
AUTH_TOKEN: "null", AUTH_TOKEN: "null",
CONNECTION_ID: "null", CONNECTION_ID: "null",
SESSION_ID: coop.sessionId || "null", SESSION_ID: coop.sessionId || "null",
TIMESTAMP: String(Date.now()), TIMESTAMP: String(Date.now()),
sourcepage, sourcepage,
}; };
} }
async function coopFetchText(ctx, url, label, { headers } = {}) { async function coopFetchText(ctx, url, label, { headers } = {}) {
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, { return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET", method: "GET",
headers: headers || {}, headers: headers || {},
}); });
} }
function extractVar(html, re) { function extractVar(html, re) {
const m = String(html || "").match(re); const m = String(html || "").match(re);
return m ? String(m[1] || "").trim() : ""; return m ? String(m[1] || "").trim() : "";
} }
async function ensureCoopBootstrap(ctx) { async function ensureCoopBootstrap(ctx) {
const coop = ctx.store.coop; const coop = ctx.store.coop;
if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return; if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return;
const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", { const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", {
headers: { headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
Referer: REFERER, Referer: REFERER,
}, },
}); });
const html = r?.text || ""; const html = r?.text || "";
if (r?.status !== 200 || !html) { if (r?.status !== 200 || !html) {
throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`); throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`);
} }
// Values are in <script> var SESSIONKEY = "..."; etc. // Values are in <script> var SESSIONKEY = "..."; etc.
coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i); coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i);
coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i); coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i);
coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i); coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i);
coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i); coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i);
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
throw new Error(
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`
);
}
}
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
throw new Error(
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`,
);
}
}
async function ensureCoopSession(ctx) { async function ensureCoopSession(ctx) {
const coop = ctx.store.coop; const coop = ctx.store.coop;
if (coop.sessionId) return; if (coop.sessionId) return;
await ensureCoopBootstrap(ctx); await ensureCoopBootstrap(ctx);
const r = await ctx.http.fetchJsonWithRetry( const r = await ctx.http.fetchJsonWithRetry(
`${BASE}/api/account/createsession`, `${BASE}/api/account/createsession`,
`coop:createsession`, `coop:createsession`,
ctx.store.ua, ctx.store.ua,
{ {
method: "POST", method: "POST",
headers: coopHeaders(ctx, "/worldofwhisky"), headers: coopHeaders(ctx, "/worldofwhisky"),
// browser sends Content-Length: 0; easiest equivalent: // browser sends Content-Length: 0; easiest equivalent:
body: "", body: "",
} },
); );
const sid = const sid = r?.json?.SessionID || r?.json?.sessionID || r?.json?.sessionId || r?.json?.SessionId || "";
r?.json?.SessionID ||
r?.json?.sessionID ||
r?.json?.sessionId ||
r?.json?.SessionId ||
"";
if (!sid) { if (!sid) {
throw new Error( throw new Error(`createSession: missing SessionID (status=${r?.status})`);
`createSession: missing SessionID (status=${r?.status})` }
);
}
coop.sessionId = sid; coop.sessionId = sid;
coop.anonymousUserId = r?.json?.AnonymousUserID ?? null; coop.anonymousUserId = r?.json?.AnonymousUserID ?? null;
} }
function normalizeAbsUrl(raw) { function normalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, `${BASE}/`).toString(); return new URL(s, `${BASE}/`).toString();
} catch { } catch {
return s; return s;
} }
} }
function productUrlFromId(productId) { function productUrlFromId(productId) {
return `${REFERER}#/product/${encodeURIComponent(String(productId))}`; return `${REFERER}#/product/${encodeURIComponent(String(productId))}`;
} }
function productFromApi(p) { function productFromApi(p) {
if (!p || p.IsActive === false) return null; if (!p || p.IsActive === false) return null;
const name = String(p.Name || "").trim(); const name = String(p.Name || "").trim();
if (!name) return null; if (!name) return null;
const productId = p.ProductID; const productId = p.ProductID;
if (!productId) return null; if (!productId) return null;
const url = productUrlFromId(productId); const url = productUrlFromId(productId);
const price = const price = p?.CountDetails?.PriceText || (Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
p?.CountDetails?.PriceText ||
(Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
const upc = String(p.UPC || "").trim();
const upc = String(p.UPC || "").trim(); let rawKey = "";
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
let rawKey = ""; const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url }); const img = normalizeAbsUrl(p.ImageURL);
const img = normalizeAbsUrl(p.ImageURL); return {
name,
return { price,
name, url,
price, sku,
url, upc,
sku, productId,
upc, productStoreId: p.ProductStoreID || null,
productId, img,
productStoreId: p.ProductStoreID || null, };
img, }
};
}
/* ---------------- scanner ---------------- */ /* ---------------- scanner ---------------- */
async function fetchCategoryPage(ctx, categoryId, page) { async function fetchCategoryPage(ctx, categoryId, page) {
await ensureCoopSession(ctx); await ensureCoopSession(ctx);
const doReq = () => const doReq = () =>
ctx.http.fetchJsonWithRetry( ctx.http.fetchJsonWithRetry(
`${BASE}/api/v2/products/category/${categoryId}`, `${BASE}/api/v2/products/category/${categoryId}`,
`coop:${ctx.cat.key}:p${page}`, `coop:${ctx.cat.key}:p${page}`,
ctx.store.ua, ctx.store.ua,
{ {
method: "POST", method: "POST",
headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`), headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`),
body: JSON.stringify({ body: JSON.stringify({
page, page,
Filters: { Filters: {
Filters: [], Filters: [],
LastSelectedFilter: null, LastSelectedFilter: null,
SearchWithinTerm: null, SearchWithinTerm: null,
}, },
orderby: null, orderby: null,
}), }),
} },
); );
let r = await doReq(); let r = await doReq();
// one fast retry on invalid_session: refresh SessionID and repeat // one fast retry on invalid_session: refresh SessionID and repeat
if (r?.json?.type === "invalid_session") { if (r?.json?.type === "invalid_session") {
ctx.store.coop.sessionId = ""; ctx.store.coop.sessionId = "";
await ensureCoopSession(ctx); await ensureCoopSession(ctx);
r = await doReq(); r = await doReq();
} }
return r; return r;
} }
function avoidMassRemoval(prevDb, discovered, ctx) { function avoidMassRemoval(prevDb, discovered, ctx) {
const prev = prevDb?.size || 0; const prev = prevDb?.size || 0;
const curr = discovered.size; const curr = discovered.size;
if (!prev || !curr) return; if (!prev || !curr) return;
if (curr / prev >= 0.6) return; if (curr / prev >= 0.6) return;
ctx.logger.warn( ctx.logger.warn(`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`);
`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`
);
for (const [k, v] of prevDb.entries()) { for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v); if (!discovered.has(k)) discovered.set(k, v);
} }
} }
async function scanCategoryCoop(ctx, prevDb, report) { async function scanCategoryCoop(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
const discovered = new Map(); const discovered = new Map();
const maxPages = const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
let done = 0; let done = 0;
for (let page = 1; page <= maxPages; page++) { for (let page = 1; page <= maxPages; page++) {
let r; let r;
try { try {
r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page); r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page);
} catch (e) { } catch (e) {
ctx.logger.warn( ctx.logger.warn(`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}`);
`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}` break;
); }
break;
}
const arr = Array.isArray(r?.json?.Products?.Result) const arr = Array.isArray(r?.json?.Products?.Result) ? r.json.Products.Result : [];
? r.json.Products.Result
: [];
done++; done++;
let kept = 0; let kept = 0;
for (const p of arr) { for (const p of arr) {
const it = productFromApi(p); const it = productFromApi(p);
if (!it) continue; if (!it) continue;
discovered.set(it.url, it); discovered.set(it.url, it);
kept++; kept++;
} }
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String( `${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String(r.status || "").padEnd(
r.status || "" 3,
).padEnd(3)} | items=${padLeft(kept, 3)} | bytes=${kbStr( )} | items=${padLeft(kept, 3)} | bytes=${kbStr(
r.bytes r.bytes,
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` )} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
); );
if (!arr.length) break; if (!arr.length) break;
} }
if (prevDb) avoidMassRemoval(prevDb, discovered, ctx); if (prevDb) avoidMassRemoval(prevDb, discovered, ctx);
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`); ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: done, scannedPages: done,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport( addCategoryResultToReport(
report, report,
ctx.store.name, ctx.store.name,
ctx.cat.label, ctx.cat.label,
newItems, newItems,
updatedItems, updatedItems,
removedItems, removedItems,
restoredItems restoredItems,
); );
} }
/* ---------------- store ---------------- */ /* ---------------- store ---------------- */
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "coop", key: "coop",
name: "Co-op World of Whisky", name: "Co-op World of Whisky",
host: "shoponlinewhisky-wine.coopwinespiritsbeer.com", host: "shoponlinewhisky-wine.coopwinespiritsbeer.com",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryCoop, scanCategory: scanCategoryCoop,
// put your captured values here (or pull from env) // put your captured values here (or pull from env)
coop: { coop: {
sessionKey: "", sessionKey: "",
chainId: "", chainId: "",
storeId: "", storeId: "",
appVersion: "", appVersion: "",
sessionId: "", // set by ensureCoopSession() sessionId: "", // set by ensureCoopSession()
anonymousUserId: null, anonymousUserId: null,
}, },
categories: [ categories: [
{ key: "canadian-whisky", label: "Canadian Whisky", coopSlug: "canadian_whisky", coopCategoryId: 4, startUrl: `${REFERER}#/category/canadian_whisky` }, {
{ key: "bourbon-whiskey", label: "Bourbon Whiskey", coopSlug: "bourbon_whiskey", coopCategoryId: 9, startUrl: `${REFERER}#/category/bourbon_whiskey` }, key: "canadian-whisky",
{ key: "scottish-single-malts", label: "Scottish Single Malts", coopSlug: "scottish_single_malts", coopCategoryId: 6, startUrl: `${REFERER}#/category/scottish_single_malts` }, label: "Canadian Whisky",
{ key: "scottish-blends", label: "Scottish Whisky Blends", coopSlug: "scottish_whisky_blends", coopCategoryId: 5, startUrl: `${REFERER}#/category/scottish_whisky_blends` }, coopSlug: "canadian_whisky",
{ key: "american-whiskey", label: "American Whiskey", coopSlug: "american_whiskey", coopCategoryId: 8, startUrl: `${REFERER}#/category/american_whiskey` }, coopCategoryId: 4,
{ key: "world-whisky", label: "World Whisky", coopSlug: "world_international", coopCategoryId: 10, startUrl: `${REFERER}#/category/world_international` }, startUrl: `${REFERER}#/category/canadian_whisky`,
{ key: "rum", label: "Rum", coopSlug: "spirits_rum", coopCategoryId: 24, startUrl: `${REFERER}#/category/spirits_rum` }, },
], {
}; key: "bourbon-whiskey",
label: "Bourbon Whiskey",
coopSlug: "bourbon_whiskey",
coopCategoryId: 9,
startUrl: `${REFERER}#/category/bourbon_whiskey`,
},
{
key: "scottish-single-malts",
label: "Scottish Single Malts",
coopSlug: "scottish_single_malts",
coopCategoryId: 6,
startUrl: `${REFERER}#/category/scottish_single_malts`,
},
{
key: "scottish-blends",
label: "Scottish Whisky Blends",
coopSlug: "scottish_whisky_blends",
coopCategoryId: 5,
startUrl: `${REFERER}#/category/scottish_whisky_blends`,
},
{
key: "american-whiskey",
label: "American Whiskey",
coopSlug: "american_whiskey",
coopCategoryId: 8,
startUrl: `${REFERER}#/category/american_whiskey`,
},
{
key: "world-whisky",
label: "World Whisky",
coopSlug: "world_international",
coopCategoryId: 10,
startUrl: `${REFERER}#/category/world_international`,
},
{
key: "rum",
label: "Rum",
coopSlug: "spirits_rum",
coopCategoryId: 24,
startUrl: `${REFERER}#/category/spirits_rum`,
},
],
};
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -13,142 +13,123 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function craftCellarsIsEmptyListingPage(html) { function craftCellarsIsEmptyListingPage(html) {
const s = String(html || ""); const s = String(html || "");
if (/collection--empty\b/i.test(s)) return true; if (/collection--empty\b/i.test(s)) return true;
if (/No products found/i.test(s)) return true; if (/No products found/i.test(s)) return true;
return false; return false;
} }
function canonicalizeCraftProductUrl(raw) { function canonicalizeCraftProductUrl(raw) {
try { try {
const u = new URL(String(raw)); const u = new URL(String(raw));
u.search = ""; u.search = "";
u.hash = ""; u.hash = "";
return u.toString(); return u.toString();
} catch { } catch {
return String(raw || ""); return String(raw || "");
} }
} }
function extractShopifyCardPrice(block) { function extractShopifyCardPrice(block) {
const b = String(block || ""); const b = String(block || "");
const dollars = (txt) => const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
[...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) =>
m[0].replace(/\s+/g, "")
);
const saleRegion = b.split(/sale price/i)[1] || ""; const saleRegion = b.split(/sale price/i)[1] || "";
const saleD = dollars(saleRegion); const saleD = dollars(saleRegion);
if (saleD.length) return saleD[0]; if (saleD.length) return saleD[0];
const regRegion = b.split(/regular price/i)[1] || ""; const regRegion = b.split(/regular price/i)[1] || "";
const regD = dollars(regRegion); const regD = dollars(regRegion);
if (regD.length) return regD[0]; if (regD.length) return regD[0];
const any = dollars(b); const any = dollars(b);
return any[0] || ""; return any[0] || "";
} }
function parseProductsCraftCellars(html, ctx) { function parseProductsCraftCellars(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const g1 = const g1 = s.match(/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
s.match( const g2 = s.match(/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i
)?.[0] || "";
const g2 =
s.match(
/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i
)?.[0] || "";
const gridCandidate = g1.length > g2.length ? g1 : g2; const gridCandidate = g1.length > g2.length ? g1 : g2;
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s; const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
return parseProductsCraftCellarsInner(grid, ctx); return parseProductsCraftCellarsInner(grid, ctx);
} }
function parseProductsCraftCellarsInner(html, ctx) { function parseProductsCraftCellarsInner(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map( let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]);
(m) => m[0] if (blocks.length < 5) {
); blocks = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map(
if (blocks.length < 5) { (m) => m[0],
blocks = [ );
...s.matchAll( }
/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi
),
].map((m) => m[0]);
}
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
for (const block of blocks) { for (const block of blocks) {
const href = const href =
block.match( block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] ||
/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
)?.[1] || if (!href) continue;
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
let url = ""; let url = "";
try { try {
url = new URL(decodeHtml(href), base).toString(); url = new URL(decodeHtml(href), base).toString();
} catch { } catch {
continue; continue;
} }
url = canonicalizeCraftProductUrl(url); url = canonicalizeCraftProductUrl(url);
const nameHtml = const nameHtml =
block.match( block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i)?.[1] ||
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i block.match(
)?.[1] || /<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i,
block.match( )?.[1] ||
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
)?.[1] ||
block.match(
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i
)?.[1];
const name = sanitizeName(stripTags(decodeHtml(nameHtml || ""))); const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
if (!name) continue; if (!name) continue;
const price = extractShopifyCardPrice(block); const price = extractShopifyCardPrice(block);
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img }); items.push({ name, price, url, img });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function usdFromShopifyPriceStr(s) { function usdFromShopifyPriceStr(s) {
const n = Number(String(s || "").replace(/[^0-9.]/g, "")); const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return ""; if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { return `$${n.toLocaleString("en-US", {
minimumFractionDigits: 2, minimumFractionDigits: 2,
maximumFractionDigits: 2, maximumFractionDigits: 2,
})}`; })}`;
} }
function cfgNum(v, fallback) { function cfgNum(v, fallback) {
return Number.isFinite(v) ? v : fallback; return Number.isFinite(v) ? v : fallback;
} }
/* ---------- NEW: product page SKU extractor ---------- */ /* ---------- NEW: product page SKU extractor ---------- */
function extractCraftSkuFromProductPageHtml(html) { function extractCraftSkuFromProductPageHtml(html) {
const s = String(html || ""); const s = String(html || "");
// allow any content between </strong> and <span> (including newlines, other tags) // allow any content between </strong> and <span> (including newlines, other tags)
const m = const m =
s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) || s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) ||
s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i); s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i);
const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : ""; const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : "";
return normalizeCspc(raw); return normalizeCspc(raw);
} }
/** /**
@ -158,283 +139,233 @@ function extractCraftSkuFromProductPageHtml(html) {
* - product page HTML is final SKU fallback * - product page HTML is final SKU fallback
*/ */
async function scanCategoryCraftCellars(ctx, prevDb, report) { async function scanCategoryCraftCellars(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
const perPageDelayMs = const perPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))) || 0;
Math.max(
0,
cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))
) || 0;
const perJsonPageDelayMs = Math.max( const perJsonPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs));
0,
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
);
const htmlMap = new Map(); const htmlMap = new Map();
const maxPages = const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200);
ctx.config.maxPages === null
? 200
: Math.min(ctx.config.maxPages, 200);
let htmlPagesFetched = 0; let htmlPagesFetched = 0;
let emptyStreak = 0; let emptyStreak = 0;
for (let p = 1; p <= maxPages; p++) { for (let p = 1; p <= maxPages; p++) {
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs); if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p); const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
const { text: html } = await ctx.http.fetchTextWithRetry( const { text: html } = await ctx.http.fetchTextWithRetry(
pageUrl, pageUrl,
`craft:html:${ctx.cat.key}:p${p}`, `craft:html:${ctx.cat.key}:p${p}`,
ctx.store.ua ctx.store.ua,
); );
htmlPagesFetched++; htmlPagesFetched++;
if (craftCellarsIsEmptyListingPage(html)) break; if (craftCellarsIsEmptyListingPage(html)) break;
const items = parseProductsCraftCellars(html, ctx); const items = parseProductsCraftCellars(html, ctx);
if (!items.length) { if (!items.length) {
emptyStreak++; emptyStreak++;
if (emptyStreak >= 2) break; if (emptyStreak >= 2) break;
continue; continue;
} }
emptyStreak = 0; emptyStreak = 0;
for (const it of items) { for (const it of items) {
const url = canonicalizeCraftProductUrl(it.url); const url = canonicalizeCraftProductUrl(it.url);
if (!url) continue; if (!url) continue;
htmlMap.set(url, { htmlMap.set(url, {
name: it.name || "", name: it.name || "",
price: it.price || "", price: it.price || "",
url, url,
img: it.img || "", img: it.img || "",
}); });
} }
} }
if (!htmlMap.size) { if (!htmlMap.size) {
ctx.logger.warn( ctx.logger.warn(`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery`);
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery` }
);
}
const jsonMap = new Map(); const jsonMap = new Map();
if (htmlMap.size) { if (htmlMap.size) {
const start = new URL(ctx.cat.startUrl); const start = new URL(ctx.cat.startUrl);
const m = start.pathname.match(/^\/collections\/([^/]+)/i); const m = start.pathname.match(/^\/collections\/([^/]+)/i);
if (!m) if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`);
throw new Error( const collectionHandle = m[1];
`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`
);
const collectionHandle = m[1];
const limit = 250; const limit = 250;
let jsonPage = 1; let jsonPage = 1;
let jsonPagesFetched = 0; let jsonPagesFetched = 0;
while (true) { while (true) {
if (jsonPage > 1 && perJsonPageDelayMs > 0) if (jsonPage > 1 && perJsonPageDelayMs > 0) await sleep(perJsonPageDelayMs);
await sleep(perJsonPageDelayMs);
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`; const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
const r = await ctx.http.fetchJsonWithRetry( const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
url,
`craft:coljson:${ctx.cat.key}:p${jsonPage}`,
ctx.store.ua
);
const products = Array.isArray(r?.json?.products) const products = Array.isArray(r?.json?.products) ? r.json.products : [];
? r.json.products jsonPagesFetched++;
: [];
jsonPagesFetched++;
if (!products.length) break; if (!products.length) break;
for (const p of products) { for (const p of products) {
const handle = String(p?.handle || ""); const handle = String(p?.handle || "");
if (!handle) continue; if (!handle) continue;
const prodUrl = canonicalizeCraftProductUrl( const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`);
`https://${ctx.store.host}/products/${handle}` if (!htmlMap.has(prodUrl)) continue;
);
if (!htmlMap.has(prodUrl)) continue;
const variants = Array.isArray(p?.variants) ? p.variants : []; const variants = Array.isArray(p?.variants) ? p.variants : [];
const v = const v = variants.find((x) => x && x.available === true) || variants[0] || null;
variants.find((x) => x && x.available === true) ||
variants[0] ||
null;
const sku = normalizeCspc(v?.sku || ""); const sku = normalizeCspc(v?.sku || "");
const price = v?.price ? usdFromShopifyPriceStr(v.price) : ""; const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
let img = ""; let img = "";
const images = Array.isArray(p?.images) ? p.images : []; const images = Array.isArray(p?.images) ? p.images : [];
if (images[0]) { if (images[0]) {
img = img = typeof images[0] === "string" ? images[0] : String(images[0]?.src || images[0]?.url || "");
typeof images[0] === "string" }
? images[0] if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || "");
: String(images[0]?.src || images[0]?.url || ""); img = String(img || "").trim();
} if (img.startsWith("//")) img = `https:${img}`;
if (!img && p?.image)
img = String(p.image?.src || p.image?.url || p.image || "");
img = String(img || "").trim();
if (img.startsWith("//")) img = `https:${img}`;
jsonMap.set(prodUrl, { sku, price, img }); jsonMap.set(prodUrl, { sku, price, img });
} }
if (products.length < limit) break; if (products.length < limit) break;
if (++jsonPage > 200) break; if (++jsonPage > 200) break;
} }
ctx.logger.ok( ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`);
`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}` }
);
}
const discovered = new Map(); const discovered = new Map();
for (const [url, it] of htmlMap.entries()) { for (const [url, it] of htmlMap.entries()) {
const j = jsonMap.get(url); const j = jsonMap.get(url);
const prev = prevDb?.byUrl?.get(url) || null; const prev = prevDb?.byUrl?.get(url) || null;
discovered.set(url, { discovered.set(url, {
name: it.name, name: it.name,
price: j?.price || it.price || "", price: j?.price || it.price || "",
url, url,
// reuse cached SKU unless we found something better this run // reuse cached SKU unless we found something better this run
sku: pickBetterSku(j?.sku || "", prev?.sku || ""), sku: pickBetterSku(j?.sku || "", prev?.sku || ""),
// reuse cached image if we didn't find one // reuse cached image if we didn't find one
img: (j?.img || it.img || prev?.img || ""), img: j?.img || it.img || prev?.img || "",
}); });
} }
/* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */ /* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */
const perProductSkuDelayMs = Math.max( const perProductSkuDelayMs = Math.max(
0, 0,
cfgNum( cfgNum(ctx?.cat?.skuPageDelayMs, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)),
ctx?.cat?.skuPageDelayMs, );
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
)
);
let skuPagesFetched = 0; let skuPagesFetched = 0;
for (const it of discovered.values()) { for (const it of discovered.values()) {
// only hit product pages when missing/synthetic // only hit product pages when missing/synthetic
if (!needsSkuDetail(it.sku)) continue; if (!needsSkuDetail(it.sku)) continue;
if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs); if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs);
try { try {
const { text } = await ctx.http.fetchTextWithRetry( const { text } = await ctx.http.fetchTextWithRetry(
it.url, it.url,
`craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url) `craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url).toString("base64").slice(0, 24)}`,
.toString("base64") ctx.store.ua,
.slice(0, 24)}`, );
ctx.store.ua skuPagesFetched++;
);
skuPagesFetched++;
const sku = extractCraftSkuFromProductPageHtml(text); const sku = extractCraftSkuFromProductPageHtml(text);
if (sku) it.sku = sku; if (sku) it.sku = sku;
} catch { } catch {
/* best effort */ /* best effort */
} }
} }
ctx.logger.ok( ctx.logger.ok(`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`);
`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`
);
ctx.logger.ok( ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`
);
const { const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
merged, storeLabel: ctx.store.name,
newItems, });
updatedItems,
removedItems,
restoredItems,
} = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: htmlPagesFetched, scannedPages: htmlPagesFetched,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport( addCategoryResultToReport(
report, report,
ctx.store.name, ctx.store.name,
ctx.cat.label, ctx.cat.label,
newItems, newItems,
updatedItems, updatedItems,
removedItems, removedItems,
restoredItems restoredItems,
); );
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "craftcellars", key: "craftcellars",
name: "Craft Cellars", name: "Craft Cellars",
host: "craftcellars.ca", host: "craftcellars.ca",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryCraftCellars, scanCategory: scanCategoryCraftCellars,
parseProducts: parseProductsCraftCellars, parseProducts: parseProductsCraftCellars,
makePageUrl: makePageUrlShopifyQueryPage, makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: craftCellarsIsEmptyListingPage, isEmptyListingPage: craftCellarsIsEmptyListingPage,
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
"https://craftcellars.ca/collections/whisky?filter.v.availability=1", pageConcurrency: 1,
pageConcurrency: 1, pageStaggerMs: 10000,
pageStaggerMs: 10000, discoveryDelayMs: 10000,
discoveryDelayMs: 10000, skuPageDelayMs: 12000,
skuPageDelayMs: 12000, },
}, {
{ key: "rum",
key: "rum", label: "Rum",
label: "Rum", startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
startUrl: pageConcurrency: 1,
"https://craftcellars.ca/collections/rum?filter.v.availability=1", pageStaggerMs: 10000,
pageConcurrency: 1, discoveryDelayMs: 10000,
pageStaggerMs: 10000, skuPageDelayMs: 12000,
discoveryDelayMs: 10000, },
skuPageDelayMs: 12000, ],
}, };
],
};
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -6,12 +6,12 @@ const { normalizeCspc, pickBetterSku, needsSkuDetail } = require("../utils/sku")
const { makePageUrl } = require("../utils/url"); const { makePageUrl } = require("../utils/url");
function looksInStock(block) { function looksInStock(block) {
const s = String(block || ""); const s = String(block || "");
if (/\boutofstock\b/i.test(s)) return false; if (/\boutofstock\b/i.test(s)) return false;
if (/\bin-stock\b/i.test(s)) return true; if (/\bin-stock\b/i.test(s)) return true;
if (/\binstock\b/i.test(s)) return true; if (/\binstock\b/i.test(s)) return true;
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true; if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
return /\bin-stock\b/i.test(s); return /\bin-stock\b/i.test(s);
} }
// Gull product tiles commonly contain two amounts: // Gull product tiles commonly contain two amounts:
@ -19,116 +19,111 @@ function looksInStock(block) {
// - deposit (e.g. 0.10) inside the "price suffix" // - deposit (e.g. 0.10) inside the "price suffix"
// We extract all amounts and pick the last one >= 1.00 (sale price if present). // We extract all amounts and pick the last one >= 1.00 (sale price if present).
function extractGullPriceFromBlock(block) { function extractGullPriceFromBlock(block) {
const s = String(block || ""); const s = String(block || "");
const nums = []; const nums = [];
// Match WooCommerce "Price amount" blocks, pull out the BDI contents, // Match WooCommerce "Price amount" blocks, pull out the BDI contents,
// then strip tags/entities and parse as float. // then strip tags/entities and parse as float.
const re = const re =
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi; /<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
for (const m of s.matchAll(re)) { for (const m of s.matchAll(re)) {
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05" const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
const n = parseFloat(String(raw).replace(/[^0-9.]/g, "")); const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
if (Number.isFinite(n)) nums.push(n); if (Number.isFinite(n)) nums.push(n);
} }
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.) // Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
const big = nums.filter((n) => n >= 1.0); const big = nums.filter((n) => n >= 1.0);
if (!big.length) return ""; if (!big.length) return "";
// If sale price exists, Woo often renders old then new; taking the last >=1 // If sale price exists, Woo often renders old then new; taking the last >=1
// typically yields the current price. // typically yields the current price.
const chosen = big[big.length - 1]; const chosen = big[big.length - 1];
// Normalize formatting // Normalize formatting
return `$${chosen.toFixed(2)}`; return `$${chosen.toFixed(2)}`;
} }
// Gull SKUs are often NOT 6 digits (e.g. 67424). // Gull SKUs are often NOT 6 digits (e.g. 67424).
// If it's not 6 digits, represent as id:<digits> to avoid normalizeCspc turning it into u:SHA. // If it's not 6 digits, represent as id:<digits> to avoid normalizeCspc turning it into u:SHA.
function normalizeGullSku(raw) { function normalizeGullSku(raw) {
const s = cleanText(decodeHtml(String(raw || ""))).trim(); const s = cleanText(decodeHtml(String(raw || ""))).trim();
// already in a stable prefixed form // already in a stable prefixed form
if (/^(id:|u:)/i.test(s)) return s; if (/^(id:|u:)/i.test(s)) return s;
// digits-only SKU (from page / tile) // digits-only SKU (from page / tile)
const digits = s.match(/\b(\d{3,10})\b/)?.[1] || ""; const digits = s.match(/\b(\d{3,10})\b/)?.[1] || "";
if (digits) { if (digits) {
if (digits.length === 6) return normalizeCspc(digits); if (digits.length === 6) return normalizeCspc(digits);
return `id:${digits}`; return `id:${digits}`;
} }
// fall back to existing normalizer (may yield u:...) // fall back to existing normalizer (may yield u:...)
return normalizeCspc(s); return normalizeCspc(s);
} }
// When we fall back to normalizeCspc(url), we may end up with a generated u:XXXXXXXX. // When we fall back to normalizeCspc(url), we may end up with a generated u:XXXXXXXX.
function isGeneratedUrlSku(sku) { function isGeneratedUrlSku(sku) {
const s = String(sku || ""); const s = String(sku || "");
// you have u:8hex in the DB, so accept 8+ // you have u:8hex in the DB, so accept 8+
return /^u:[0-9a-f]{8,128}$/i.test(s); return /^u:[0-9a-f]{8,128}$/i.test(s);
} }
// Extract SKU from Gull product page HTML. // Extract SKU from Gull product page HTML.
function extractGullSkuFromProductPage(html) { function extractGullSkuFromProductPage(html) {
const s = String(html || ""); const s = String(html || "");
// Most reliable: <span class="sku">67424</span> // Most reliable: <span class="sku">67424</span>
const m1 = s.match( const m1 = s.match(/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i);
/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i if (m1?.[1]) return normalizeGullSku(m1[1]);
);
if (m1?.[1]) return normalizeGullSku(m1[1]);
// Fallback: "SKU: 67424" text // Fallback: "SKU: 67424" text
const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i); const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i);
if (m2?.[1]) return normalizeGullSku(m2[1]); if (m2?.[1]) return normalizeGullSku(m2[1]);
return ""; return "";
} }
// Serial limiter: ensures at least minIntervalMs between request starts. // Serial limiter: ensures at least minIntervalMs between request starts.
function createMinIntervalLimiter(minIntervalMs) { function createMinIntervalLimiter(minIntervalMs) {
let lastStart = 0; let lastStart = 0;
let chain = Promise.resolve(); let chain = Promise.resolve();
return async function schedule(fn) { return async function schedule(fn) {
chain = chain.then(async () => { chain = chain.then(async () => {
const now = Date.now(); const now = Date.now();
const waitMs = Math.max(0, lastStart + minIntervalMs - now); const waitMs = Math.max(0, lastStart + minIntervalMs - now);
if (waitMs) await new Promise((r) => setTimeout(r, waitMs)); if (waitMs) await new Promise((r) => setTimeout(r, waitMs));
lastStart = Date.now(); lastStart = Date.now();
return fn(); return fn();
}); });
return chain; return chain;
}; };
} }
async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) { async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
let attempt = 0; let attempt = 0;
while (true) { while (true) {
const res = await fetchFn(url, { headers }); const res = await fetchFn(url, { headers });
if (res.status !== 429) { if (res.status !== 429) {
if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`); if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`);
return await res.text(); return await res.text();
} }
if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`); if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`);
// Respect Retry-After if present; otherwise progressive backoff. // Respect Retry-After if present; otherwise progressive backoff.
const ra = const ra = res.headers && typeof res.headers.get === "function" ? res.headers.get("retry-after") : null;
res.headers && typeof res.headers.get === "function"
? res.headers.get("retry-after")
: null;
const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1); const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1);
await new Promise((r) => setTimeout(r, waitSec * 1000)); await new Promise((r) => setTimeout(r, waitSec * 1000));
attempt++; attempt++;
} }
} }
/** /**
@ -137,142 +132,133 @@ async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
* *
* NEW: accepts prevDb so we can skip fetch if URL already has a good SKU cached. * NEW: accepts prevDb so we can skip fetch if URL already has a good SKU cached.
*/ */
async function hydrateGullSkus( async function hydrateGullSkus(items, { fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}) {
items, if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
{ fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}
) {
if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
const schedule = createMinIntervalLimiter(minIntervalMs); const schedule = createMinIntervalLimiter(minIntervalMs);
const headers = { const headers = {
"user-agent": ua || "Mozilla/5.0", "user-agent": ua || "Mozilla/5.0",
accept: "text/html,application/xhtml+xml", accept: "text/html,application/xhtml+xml",
}; };
for (const it of items || []) { for (const it of items || []) {
if (!it || !it.url) continue; if (!it || !it.url) continue;
// NEW: if DB already has a good SKU, reuse it and skip fetch // NEW: if DB already has a good SKU, reuse it and skip fetch
const prev = prevDb?.byUrl?.get(it.url) || null; const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev?.sku && !needsSkuDetail(prev.sku)) { if (prev?.sku && !needsSkuDetail(prev.sku)) {
it.sku = pickBetterSku(it.sku, prev.sku); it.sku = pickBetterSku(it.sku, prev.sku);
continue; continue;
} }
if (!isGeneratedUrlSku(it.sku)) continue; // only where required if (!isGeneratedUrlSku(it.sku)) continue; // only where required
const html = await schedule(() => const html = await schedule(() => fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries }));
fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries })
);
const realSku = extractGullSkuFromProductPage(html); const realSku = extractGullSkuFromProductPage(html);
if (realSku) it.sku = pickBetterSku(realSku, it.sku); if (realSku) it.sku = pickBetterSku(realSku, it.sku);
} }
return items; return items;
} }
function parseProductsGull(html, ctx) { function parseProductsGull(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
// split on <li class="product ..."> // split on <li class="product ...">
const parts = s.split( const parts = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i if (parts.length <= 1) return items;
);
if (parts.length <= 1) return items;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
for (let i = 1; i < parts.length; i++) { for (let i = 1; i < parts.length; i++) {
const block = '<li class="product"' + parts[i]; const block = '<li class="product"' + parts[i];
if (!looksInStock(block)) continue; if (!looksInStock(block)) continue;
const hrefM = block.match( const hrefM = block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i /<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i,
); );
if (!hrefM || !hrefM[1]) continue; if (!hrefM || !hrefM[1]) continue;
let url; let url;
try { try {
url = new URL(decodeHtml(hrefM[1]), base).toString(); url = new URL(decodeHtml(hrefM[1]), base).toString();
} catch { } catch {
continue; continue;
} }
const titleM = block.match( const titleM = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i /<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
); );
const name = cleanText(decodeHtml(titleM ? titleM[1] : "")); const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) continue; if (!name) continue;
const price = extractGullPriceFromBlock(block); const price = extractGullPriceFromBlock(block);
const skuRaw = const skuRaw =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] || block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] ||
url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:... url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:...
const sku = normalizeGullSku(skuRaw); const sku = normalizeGullSku(skuRaw);
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img }); items.push({ name, price, url, sku, img });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "gull", key: "gull",
name: "Gull Liquor", name: "Gull Liquor",
host: "gullliquorstore.com", host: "gullliquorstore.com",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsGull, parseProducts: parseProductsGull,
// Optional hook callers can use to post-process items: // Optional hook callers can use to post-process items:
// only hits product pages when sku is u:... // only hits product pages when sku is u:...
hydrateSkus: hydrateGullSkus, hydrateSkus: hydrateGullSkus,
productPageMinIntervalMs: 12000, // slow by default; Gull is strict productPageMinIntervalMs: 12000, // slow by default; Gull is strict
makePageUrl, // enables /page/N/ paging makePageUrl, // enables /page/N/ paging
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
"https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky", discoveryStartPage: 3,
discoveryStartPage: 3, discoveryStep: 2,
discoveryStep: 2, pageConcurrency: 1,
pageConcurrency: 1, pageStaggerMs: 10000,
pageStaggerMs: 10000, discoveryDelayMs: 10000,
discoveryDelayMs: 10000, },
}, {
{ key: "rum",
key: "rum", label: "Rum",
label: "Rum", startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
startUrl: discoveryStartPage: 3,
"https://gullliquorstore.com/product-category/spirits/?spirit_type=rum", discoveryStep: 2,
discoveryStartPage: 3, pageConcurrency: 1,
discoveryStep: 2, pageStaggerMs: 10000,
pageConcurrency: 1, discoveryDelayMs: 10000,
pageStaggerMs: 10000, },
discoveryDelayMs: 10000, ],
}, };
],
};
} }
module.exports = { module.exports = {
createStore, createStore,
parseProductsGull, parseProductsGull,
hydrateGullSkus, hydrateGullSkus,
extractGullSkuFromProductPage, extractGullSkuFromProductPage,
isGeneratedUrlSku, isGeneratedUrlSku,
normalizeGullSku, normalizeGullSku,
}; };

View file

@ -18,24 +18,24 @@ const { createStore: createWillowPark } = require("./willowpark");
const { createStore: createArc } = require("./arc"); const { createStore: createArc } = require("./arc");
function createStores({ defaultUa } = {}) { function createStores({ defaultUa } = {}) {
return [ return [
createKWM(defaultUa), createKWM(defaultUa),
createCraftCellars(defaultUa), createCraftCellars(defaultUa),
createSierra(defaultUa), createSierra(defaultUa),
createGull(defaultUa), createGull(defaultUa),
createCoop(defaultUa), createCoop(defaultUa),
createStrath(defaultUa), createStrath(defaultUa),
createBCL(defaultUa), createBCL(defaultUa),
createBSW(defaultUa), createBSW(defaultUa),
createWillowPark(defaultUa), createWillowPark(defaultUa),
createVessel(defaultUa), createVessel(defaultUa),
createMaltsAndGrains(defaultUa), createMaltsAndGrains(defaultUa),
createKegNCork(defaultUa), createKegNCork(defaultUa),
createTudor(defaultUa), createTudor(defaultUa),
createVintage(defaultUa), createVintage(defaultUa),
createLegacy(defaultUa), createLegacy(defaultUa),
createArc(defaultUa), createArc(defaultUa),
]; ];
} }
module.exports = { createStores, parseProductsSierra }; module.exports = { createStores, parseProductsSierra };

View file

@ -4,75 +4,74 @@ const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../uti
const { makePageUrlQueryParam } = require("../utils/url"); const { makePageUrlQueryParam } = require("../utils/url");
function makePageUrlKegNCork(baseUrl, pageNum) { function makePageUrlKegNCork(baseUrl, pageNum) {
return makePageUrlQueryParam(baseUrl, "page", pageNum); return makePageUrlQueryParam(baseUrl, "page", pageNum);
} }
function parseProductsKegNCork(html, ctx) { function parseProductsKegNCork(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i); const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
for (let i = 1; i < blocks.length; i++) { for (let i = 1; i < blocks.length; i++) {
const block = "<li" + blocks[i]; const block = "<li" + blocks[i];
const mTitle = block.match( const mTitle = block.match(
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i /<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i,
); );
if (!mTitle) continue; if (!mTitle) continue;
const url = decodeHtml(mTitle[1]).trim(); const url = decodeHtml(mTitle[1]).trim();
const name = cleanText(decodeHtml(mTitle[2])); const name = cleanText(decodeHtml(mTitle[2]));
if (!url || !/^https?:\/\//i.test(url) || !name) continue; if (!url || !/^https?:\/\//i.test(url) || !name) continue;
let price = ""; let price = "";
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i); const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
if (mPrice && mPrice[1]) { if (mPrice && mPrice[1]) {
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, ""); const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
if (p) price = p.startsWith("$") ? p : `$${p}`; if (p) price = p.startsWith("$") ? p : `$${p}`;
} else { } else {
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || ""; const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/); const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
if (mDollar) price = mDollar[0].replace(/\s+/g, ""); if (mDollar) price = mDollar[0].replace(/\s+/g, "");
} }
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img }); items.push({ name, price, url, img });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "kegncork", key: "kegncork",
name: "Keg N Cork", name: "Keg N Cork",
host: "kegncork.com", host: "kegncork.com",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsKegNCork, parseProducts: parseProductsKegNCork,
makePageUrl: makePageUrlKegNCork, makePageUrl: makePageUrlKegNCork,
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: "https://kegncork.com/whisky/?page=1", startUrl: "https://kegncork.com/whisky/?page=1",
discoveryStartPage: 5, discoveryStartPage: 5,
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://kegncork.com/rum/?page=1", startUrl: "https://kegncork.com/rum/?page=1",
discoveryStartPage: 1, discoveryStartPage: 1,
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -6,184 +6,186 @@ const { normalizeCspc } = require("../utils/sku");
const { normalizeBaseUrl } = require("../utils/url"); const { normalizeBaseUrl } = require("../utils/url");
function makePageUrlKWM(baseUrl, pageNum) { function makePageUrlKWM(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl)); const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = ""; u.hash = "";
if (pageNum <= 1) { if (pageNum <= 1) {
u.searchParams.delete("page"); u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString(); return u.toString();
} }
u.searchParams.set("page", String(pageNum)); u.searchParams.set("page", String(pageNum));
u.search = `?${u.searchParams.toString()}`; u.search = `?${u.searchParams.toString()}`;
return u.toString(); return u.toString();
} }
function extractDivBlocksByExactClass(html, className, maxBlocks) { function extractDivBlocksByExactClass(html, className, maxBlocks) {
const out = []; const out = [];
const s = String(html || ""); const s = String(html || "");
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi"); const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
let m; let m;
while ((m = re.exec(s))) { while ((m = re.exec(s))) {
if (out.length >= maxBlocks) break; if (out.length >= maxBlocks) break;
const startTagEnd = m.index + m[0].length; const startTagEnd = m.index + m[0].length;
let i = startTagEnd; let i = startTagEnd;
let depth = 1; let depth = 1;
while (i < s.length) { while (i < s.length) {
const nextOpen = s.indexOf("<div", i); const nextOpen = s.indexOf("<div", i);
const nextClose = s.indexOf("</div>", i); const nextClose = s.indexOf("</div>", i);
if (nextClose === -1) break; if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) { if (nextOpen !== -1 && nextOpen < nextClose) {
depth++; depth++;
i = nextOpen + 4; i = nextOpen + 4;
continue; continue;
} }
depth--; depth--;
if (depth === 0) { if (depth === 0) {
out.push(s.slice(m.index, nextClose + 6)); out.push(s.slice(m.index, nextClose + 6));
re.lastIndex = nextClose + 6; re.lastIndex = nextClose + 6;
break; break;
} }
i = nextClose + 6; i = nextClose + 6;
} }
} }
return out; return out;
} }
function kwmExtractProductLinkHref(block) { function kwmExtractProductLinkHref(block) {
let m = let m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) || block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i); block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
if (m && m[1]) return m[1].trim(); if (m && m[1]) return m[1].trim();
m = m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) || block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i); block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
return m && m[1] ? m[1].trim() : ""; return m && m[1] ? m[1].trim() : "";
} }
function kwmExtractName(block) { function kwmExtractName(block) {
const dataItem = extractHtmlAttr(block, "data-item"); const dataItem = extractHtmlAttr(block, "data-item");
if (dataItem) return sanitizeName(dataItem); if (dataItem) return sanitizeName(dataItem);
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i); const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
if (m && m[1]) return sanitizeName(stripTags(m[1])); if (m && m[1]) return sanitizeName(stripTags(m[1]));
return ""; return "";
} }
function kwmExtractFirstDivByClass(html, className) { function kwmExtractFirstDivByClass(html, className) {
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i"); const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
const m = re.exec(html); const m = re.exec(html);
if (!m) return ""; if (!m) return "";
const start = m.index + m[0].length; const start = m.index + m[0].length;
let i = start; let i = start;
let depth = 1; let depth = 1;
while (i < html.length) { while (i < html.length) {
const nextOpen = html.indexOf("<div", i); const nextOpen = html.indexOf("<div", i);
const nextClose = html.indexOf("</div>", i); const nextClose = html.indexOf("</div>", i);
if (nextClose === -1) break; if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) { if (nextOpen !== -1 && nextOpen < nextClose) {
depth++; depth++;
i = nextOpen + 4; i = nextOpen + 4;
continue; continue;
} }
depth--; depth--;
if (depth === 0) return html.slice(start, nextClose); if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 6; i = nextClose + 6;
} }
return ""; return "";
} }
function kwmExtractPrice(block) { function kwmExtractPrice(block) {
let m = block.match(/\bdata-price=["']([^"']+)["']/i); let m = block.match(/\bdata-price=["']([^"']+)["']/i);
if (m && m[1]) { if (m && m[1]) {
const raw = String(m[1]).trim(); const raw = String(m[1]).trim();
const n = raw.replace(/[^0-9.]/g, ""); const n = raw.replace(/[^0-9.]/g, "");
if (n) return `$${Number(n).toFixed(2)}`; if (n) return `$${Number(n).toFixed(2)}`;
} }
const priceDiv = kwmExtractFirstDivByClass(block, "product-price"); const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
if (!priceDiv) return ""; if (!priceDiv) return "";
const cleaned = String(priceDiv).replace(/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " "); const cleaned = String(priceDiv).replace(
/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi,
" ",
);
const txt = cleanText(decodeHtml(stripTags(cleaned))); const txt = cleanText(decodeHtml(stripTags(cleaned)));
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)]; const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (dollars.length) return dollars[0][0].replace(/\s+/g, ""); if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
return ""; return "";
} }
function parseProductsKWM(html, ctx) { function parseProductsKWM(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000); const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
const items = []; const items = [];
for (const block of blocks) { for (const block of blocks) {
if (/OUT OF STOCK/i.test(block)) continue; if (/OUT OF STOCK/i.test(block)) continue;
const href = kwmExtractProductLinkHref(block); const href = kwmExtractProductLinkHref(block);
if (!href) continue; if (!href) continue;
let url; let url;
try { try {
url = new URL(decodeHtml(href), base).toString(); url = new URL(decodeHtml(href), base).toString();
} catch { } catch {
continue; continue;
} }
const name = kwmExtractName(block); const name = kwmExtractName(block);
if (!name) continue; if (!name) continue;
const price = kwmExtractPrice(block); const price = kwmExtractPrice(block);
const sku = normalizeCspc(url); const sku = normalizeCspc(url);
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img }); items.push({ name, price, url, sku, img });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "kwm", key: "kwm",
name: "Kensington Wine Market", name: "Kensington Wine Market",
host: "kensingtonwinemarket.com", host: "kensingtonwinemarket.com",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsKWM, parseProducts: parseProductsKWM,
makePageUrl: makePageUrlKWM, makePageUrl: makePageUrlKWM,
categories: [ categories: [
{ {
key: "scotch", key: "scotch",
label: "Scotch", label: "Scotch",
startUrl: "https://kensingtonwinemarket.com/products/scotch/", startUrl: "https://kensingtonwinemarket.com/products/scotch/",
discoveryStartPage: 200, discoveryStartPage: 200,
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/", startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
discoveryStartPage: 20, discoveryStartPage: 20,
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -9,57 +9,57 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function normalizeLegacySku(rawSku, { storeLabel, url }) { function normalizeLegacySku(rawSku, { storeLabel, url }) {
const raw = String(rawSku ?? "").trim(); const raw = String(rawSku ?? "").trim();
if (!raw) return ""; if (!raw) return "";
const cspc = normalizeCspc(raw); const cspc = normalizeCspc(raw);
if (cspc) return cspc; if (cspc) return cspc;
const m = raw.match(/\b(\d{1,11})\b/); const m = raw.match(/\b(\d{1,11})\b/);
if (m && m[1]) return `id:${m[1]}`; if (m && m[1]) return `id:${m[1]}`;
return normalizeSkuKey(raw, { storeLabel, url }); return normalizeSkuKey(raw, { storeLabel, url });
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function cad(n) { function cad(n) {
const x = Number(n); const x = Number(n);
if (!Number.isFinite(x)) return ""; if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`; return `$${x.toFixed(2)}`;
} }
function normalizeAbsUrl(raw) { function normalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, "https://www.legacyliquorstore.com/").toString(); return new URL(s, "https://www.legacyliquorstore.com/").toString();
} catch { } catch {
return s; return s;
} }
} }
const LEGACY_GQL_URL = "https://production-storefront-api-hagnfhf3sq-uc.a.run.app/graphql"; const LEGACY_GQL_URL = "https://production-storefront-api-hagnfhf3sq-uc.a.run.app/graphql";
@ -140,184 +140,201 @@ query(
`; `;
function pickInStockVariant(p) { function pickInStockVariant(p) {
const vars = Array.isArray(p?.variants) ? p.variants : []; const vars = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vars) { for (const v of vars) {
const q = Number(v?.quantity); const q = Number(v?.quantity);
if (Number.isFinite(q) && q > 0) return v; if (Number.isFinite(q) && q > 0) return v;
} }
return null; return null;
} }
function legacyProductToItem(p, ctx) { function legacyProductToItem(p, ctx) {
const v = pickInStockVariant(p); const v = pickInStockVariant(p);
if (!v) return null; if (!v) return null;
const slug = String(p?.slug || "").trim(); const slug = String(p?.slug || "").trim();
if (!slug) return null; if (!slug) return null;
const base = "https://www.legacyliquorstore.com"; const base = "https://www.legacyliquorstore.com";
// Matches observed pattern: /LL/product/spirits/<category>/<slug> // Matches observed pattern: /LL/product/spirits/<category>/<slug>
const url = new URL(`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`, base).toString(); const url = new URL(
`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`,
base,
).toString();
const nameRaw = const nameRaw =
String(v?.fullName || "").trim() || String(v?.fullName || "").trim() ||
[String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | "); [String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | ");
const name = String(nameRaw || "").trim(); const name = String(nameRaw || "").trim();
if (!name) return null; if (!name) return null;
const price = const price = cad(v?.price) || cad(p?.priceFrom) || cad(p?.priceTo) || "";
cad(v?.price) ||
cad(p?.priceFrom) ||
cad(p?.priceTo) ||
"";
const sku = normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) || normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||""; const sku =
const img = normalizeAbsUrl(v?.image || ""); normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) ||
normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||
"";
const img = normalizeAbsUrl(v?.image || "");
return { name, price, url, sku, img }; return { name, price, url, sku, img };
} }
async function legacyFetchPage(ctx, pageCursor, pageLimit) { async function legacyFetchPage(ctx, pageCursor, pageLimit) {
const body = { const body = {
query: PRODUCTS_QUERY, query: PRODUCTS_QUERY,
variables: { variables: {
allTags: ctx.cat.allTags || null, allTags: ctx.cat.allTags || null,
anyTags: null, anyTags: null,
collectionSlug: null, collectionSlug: null,
countries: null, countries: null,
isBestSeller: null, isBestSeller: null,
isNewArrival: null, isNewArrival: null,
isFeatured: null, isFeatured: null,
isFeaturedOnHomepage: null, isFeaturedOnHomepage: null,
isOnSale: null, isOnSale: null,
isStaffPick: null, isStaffPick: null,
pageCursor: pageCursor || null, pageCursor: pageCursor || null,
pageLimit: pageLimit, pageLimit: pageLimit,
pointsMin: null, pointsMin: null,
priceMin: null, priceMin: null,
priceMax: null, priceMax: null,
quantityMin: null, quantityMin: null,
regions: null, regions: null,
brandValue: null, brandValue: null,
searchValue: null, searchValue: null,
sortOrder: "asc", sortOrder: "asc",
sortBy: "name", sortBy: "name",
storeId: "LL", storeId: "LL",
}, },
}; };
return await ctx.http.fetchJsonWithRetry(LEGACY_GQL_URL, `legacy:${ctx.cat.key}:${pageCursor || "first"}`, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(
method: "POST", LEGACY_GQL_URL,
headers: { `legacy:${ctx.cat.key}:${pageCursor || "first"}`,
Accept: "application/json", ctx.store.ua,
"content-type": "application/json", {
Origin: "https://www.legacyliquorstore.com", method: "POST",
Referer: "https://www.legacyliquorstore.com/", headers: {
}, Accept: "application/json",
body: JSON.stringify(body), "content-type": "application/json",
}); Origin: "https://www.legacyliquorstore.com",
Referer: "https://www.legacyliquorstore.com/",
},
body: JSON.stringify(body),
},
);
} }
async function scanCategoryLegacyLiquor(ctx, prevDb, report) { async function scanCategoryLegacyLiquor(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
const pageLimit = 100; const pageLimit = 100;
const discovered = new Map(); const discovered = new Map();
let cursor = null; let cursor = null;
let page = 0; let page = 0;
let done = 0; let done = 0;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
while (page < maxPagesCap) { while (page < maxPagesCap) {
page++; page++;
let r; let r;
try { try {
r = await legacyFetchPage(ctx, cursor, pageLimit); r = await legacyFetchPage(ctx, cursor, pageLimit);
} catch (e) { } catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`); ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`);
break; break;
} }
const items = r?.json?.data?.products?.items; const items = r?.json?.data?.products?.items;
const next = r?.json?.data?.products?.nextPageCursor; const next = r?.json?.data?.products?.nextPageCursor;
const arr = Array.isArray(items) ? items : []; const arr = Array.isArray(items) ? items : [];
let kept = 0; let kept = 0;
for (const p of arr) { for (const p of arr) {
const it = legacyProductToItem(p, ctx); const it = legacyProductToItem(p, ctx);
if (!it) continue; if (!it) continue;
discovered.set(it.url, it); discovered.set(it.url, it);
kept++; kept++;
} }
done++; done++;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft( `${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft(
kept, kept,
3 3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
); );
if (!next || !arr.length) break; if (!next || !arr.length) break;
if (next === cursor) break; // safety if (next === cursor) break; // safety
cursor = next; cursor = next;
} }
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
const dbObj = buildDbObject(ctx, merged); storeLabel: ctx.store.name,
writeJsonAtomic(ctx.dbFile, dbObj); });
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: Math.max(1, page), scannedPages: Math.max(1, page),
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "legacyliquor", key: "legacyliquor",
name: "Legacy Liquor", name: "Legacy Liquor",
host: "www.legacyliquorstore.com", host: "www.legacyliquorstore.com",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryLegacyLiquor, scanCategory: scanCategoryLegacyLiquor,
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky", startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky",
allTags: ["spirits", "whisky"], allTags: ["spirits", "whisky"],
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum", startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum",
allTags: ["spirits", "rum"], allTags: ["spirits", "rum"],
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -5,103 +5,104 @@ const { normalizeCspc } = require("../utils/sku");
const { extractPriceFromTmbBlock } = require("../utils/woocommerce"); const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
function allowMaltsExcludeGinTequilaMezcal(item) { function allowMaltsExcludeGinTequilaMezcal(item) {
if (item && item.inStock === false) return false; if (item && item.inStock === false) return false;
const cats = Array.isArray(item?.cats) ? item.cats : []; const cats = Array.isArray(item?.cats) ? item.cats : [];
const has = (re) => cats.some((c) => re.test(String(c || ""))); const has = (re) => cats.some((c) => re.test(String(c || "")));
if (has(/\bgin\b/i)) return false; if (has(/\bgin\b/i)) return false;
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false; if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
return true; return true;
} }
function parseProductsMaltsAndGrains(html, ctx) { function parseProductsMaltsAndGrains(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi; const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
const blocks = [...s.matchAll(re)].map((m) => m[0] || ""); const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
for (const block of blocks) { for (const block of blocks) {
const classAttr = extractHtmlAttr(block, "class"); const classAttr = extractHtmlAttr(block, "class");
const isOut = const isOut =
/\boutofstock\b/i.test(classAttr) || /\boutofstock\b/i.test(classAttr) ||
/ast-shop-product-out-of-stock/i.test(block) || /ast-shop-product-out-of-stock/i.test(block) ||
/>\s*out of stock\s*</i.test(block); />\s*out of stock\s*</i.test(block);
if (isOut) continue; if (isOut) continue;
const cats = []; const cats = [];
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) { for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
const v = String(m[1] || "").trim().toLowerCase(); const v = String(m[1] || "")
if (v) cats.push(v); .trim()
} .toLowerCase();
if (v) cats.push(v);
}
let href = let href =
block.match( block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i /<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i,
)?.[1] || )?.[1] ||
block.match( block.match(
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i /<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i,
)?.[2] || )?.[2] ||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1]; block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
if (!href) continue; if (!href) continue;
let url = ""; let url = "";
try { try {
url = new URL(decodeHtml(href), base).toString(); url = new URL(decodeHtml(href), base).toString();
} catch { } catch {
continue; continue;
} }
if (!/^https?:\/\//i.test(url)) continue; if (!/^https?:\/\//i.test(url)) continue;
const mTitle = block.match( const mTitle = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i /<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
); );
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : ""; const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
if (!name) continue; if (!name) continue;
const price = extractPriceFromTmbBlock(block); const price = extractPriceFromTmbBlock(block);
const sku = normalizeCspc( const sku = normalizeCspc(
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] || block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
"" "",
); );
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img, cats, inStock: true }); items.push({ name, price, url, sku, img, cats, inStock: true });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "maltsandgrains", key: "maltsandgrains",
name: "Malts & Grains", name: "Malts & Grains",
host: "maltsandgrains.store", host: "maltsandgrains.store",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsMaltsAndGrains, parseProducts: parseProductsMaltsAndGrains,
categories: [ categories: [
{ {
key: "all-minus-gin-tequila-mezcal", key: "all-minus-gin-tequila-mezcal",
label: "All Spirits", label: "All Spirits",
startUrl: "https://maltsandgrains.store/shop/page/1/", startUrl: "https://maltsandgrains.store/shop/page/1/",
discoveryStartPage: 15, discoveryStartPage: 15,
allowUrl: allowMaltsExcludeGinTequilaMezcal, allowUrl: allowMaltsExcludeGinTequilaMezcal,
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -10,226 +10,222 @@ const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function allowSierraUrlRumWhisky(item) { function allowSierraUrlRumWhisky(item) {
const u = (item && item.url) ? String(item.url) : ""; const u = item && item.url ? String(item.url) : "";
const s = u.toLowerCase(); const s = u.toLowerCase();
if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false; if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false;
return /\b(rum|whisk(?:e)?y)\b/.test(s); return /\b(rum|whisk(?:e)?y)\b/.test(s);
} }
// Keep old name referenced historically in this store config // Keep old name referenced historically in this store config
const allowSierraSpiritsLiquorUrlRumWhisky = allowSierraUrlRumWhisky; const allowSierraSpiritsLiquorUrlRumWhisky = allowSierraUrlRumWhisky;
function formatWooStorePrice(prices) { function formatWooStorePrice(prices) {
if (!prices) return null; if (!prices) return null;
const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2; const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2;
const raw = prices.price ?? prices.regular_price ?? prices.sale_price; const raw = prices.price ?? prices.regular_price ?? prices.sale_price;
if (raw == null) return null; if (raw == null) return null;
const n = Number(String(raw).replace(/[^\d]/g, "")); const n = Number(String(raw).replace(/[^\d]/g, ""));
if (!Number.isFinite(n)) return null; if (!Number.isFinite(n)) return null;
const value = (n / Math.pow(10, minor)).toFixed(minor); const value = (n / Math.pow(10, minor)).toFixed(minor);
const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$"; const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$";
const suffix = prices.currency_suffix ?? ""; const suffix = prices.currency_suffix ?? "";
return `${prefix}${value}${suffix}`; return `${prefix}${value}${suffix}`;
} }
function parseWooStoreProductsJson(payload, ctx) { function parseWooStoreProductsJson(payload, ctx) {
const items = []; const items = [];
let data = null; let data = null;
try { try {
data = JSON.parse(payload); data = JSON.parse(payload);
} catch (_) { } catch (_) {
return items; return items;
} }
if (!Array.isArray(data)) return items; if (!Array.isArray(data)) return items;
for (const p of data) { for (const p of data) {
const url = (p && p.permalink) ? String(p.permalink) : ""; const url = p && p.permalink ? String(p.permalink) : "";
if (!url) continue; if (!url) continue;
const name = (p && p.name) ? cleanText(decodeHtml(String(p.name))) : ""; const name = p && p.name ? cleanText(decodeHtml(String(p.name))) : "";
if (!name) continue; if (!name) continue;
const price = formatWooStorePrice(p.prices); const price = formatWooStorePrice(p.prices);
const rawSku = const rawSku =
(typeof p?.sku === "string" && p.sku.trim()) ? p.sku.trim() typeof p?.sku === "string" && p.sku.trim() ? p.sku.trim() : p && (p.id ?? p.id === 0) ? String(p.id) : "";
: (p && (p.id ?? p.id === 0)) ? String(p.id)
: "";
const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku; const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku;
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url }); const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = const img =
(p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src) p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src ? String(p.images[0].src) : null;
? String(p.images[0].src)
: null;
const item = { name, price, url, sku, img }; const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl; const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue; if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item); items.push(item);
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function parseWooProductsHtml(html, ctx) { function parseWooProductsHtml(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
const parts = s.split(/<li\b/i); const parts = s.split(/<li\b/i);
for (let i = 1; i < parts.length; i++) { for (let i = 1; i < parts.length; i++) {
const chunk = "<li" + parts[i]; const chunk = "<li" + parts[i];
if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue; if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue;
if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue; if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue;
const endIdx = chunk.search(/<\/li>/i); const endIdx = chunk.search(/<\/li>/i);
const block = (endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk); const block = endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk;
const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map(m => m[1]); const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map((m) => m[1]);
const href = hrefs.find(h => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || ""; const href = hrefs.find((h) => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || "";
if (!href) continue; if (!href) continue;
const url = new URL(decodeHtml(href), base).toString(); const url = new URL(decodeHtml(href), base).toString();
const nameHtml = const nameHtml =
block.match(/<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i)?.[1] || block.match(
block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] || /<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
""; )?.[1] ||
const name = cleanText(decodeHtml(nameHtml)); block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] ||
if (!name) continue; "";
const name = cleanText(decodeHtml(nameHtml));
if (!name) continue;
const price = extractPriceFromTmbBlock(block); const price = extractPriceFromTmbBlock(block);
const rawSku = const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] || block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] ||
""; "";
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim()) const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
? `id:${String(rawSku).trim()}` ? `id:${String(rawSku).trim()}`
: String(rawSku || "").trim(); : String(rawSku || "").trim();
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url }); const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
const item = { name, price, url, sku, img }; const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl; const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue; if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item); items.push(item);
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function parseProductsSierra(body, ctx) { function parseProductsSierra(body, ctx) {
const s = String(body || ""); const s = String(body || "");
const t = s.trimStart(); const t = s.trimStart();
if (t.startsWith("[") || t.startsWith("{")) { if (t.startsWith("[") || t.startsWith("{")) {
const jsonItems = parseWooStoreProductsJson(s, ctx); const jsonItems = parseWooStoreProductsJson(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`);
return jsonItems; return jsonItems;
} }
const blocks = s.split(/<div class="tmb\b/i); const blocks = s.split(/<div class="tmb\b/i);
ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
if (blocks.length > 1) { if (blocks.length > 1) {
const items = []; const items = [];
for (let i = 1; i < blocks.length; i++) { for (let i = 1; i < blocks.length; i++) {
const block = "<div class=\"tmb" + blocks[i]; const block = '<div class="tmb' + blocks[i];
const titleMatch = block.match( const titleMatch = block.match(
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i /<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i,
); );
if (!titleMatch) continue; if (!titleMatch) continue;
const url = new URL(decodeHtml(titleMatch[1]), base).toString(); const url = new URL(decodeHtml(titleMatch[1]), base).toString();
const name = cleanText(decodeHtml(titleMatch[2])); const name = cleanText(decodeHtml(titleMatch[2]));
if (!name) continue; if (!name) continue;
const price = extractPriceFromTmbBlock(block); const price = extractPriceFromTmbBlock(block);
const rawSku = const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] || block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
""; "";
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim()) const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim()) ? `id:${String(rawSku).trim()}` : rawSku;
? `id:${String(rawSku).trim()}`
: rawSku;
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url }); const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
const item = { name, price, url, sku, img }; const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl; const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue; if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item); items.push(item);
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
const woo = parseWooProductsHtml(s, ctx); const woo = parseWooProductsHtml(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`); ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`);
return woo; return woo;
} }
function extractProductCatTermId(html) { function extractProductCatTermId(html) {
const s = String(html || ""); const s = String(html || "");
// Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..." // Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..."
const m = const m = s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) || s.match(/\bterm-(\d{1,10})\b/i);
s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) || if (!m) return null;
s.match(/\bterm-(\d{1,10})\b/i); const n = Number(m[1]);
if (!m) return null; return Number.isFinite(n) ? n : null;
const n = Number(m[1]);
return Number.isFinite(n) ? n : null;
} }
async function getWooCategoryIdForCat(ctx) { async function getWooCategoryIdForCat(ctx) {
// allow manual override if you ever want it // allow manual override if you ever want it
if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId; if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId;
// cache per category object // cache per category object
if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId; if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId;
// infer from the HTML category page so startUrl stays stable (DB filenames stay stable) // infer from the HTML category page so startUrl stays stable (DB filenames stay stable)
const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua); const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua);
const id = extractProductCatTermId(text); const id = extractProductCatTermId(text);
if (!id) { if (!id) {
ctx.logger.warn(`${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`); ctx.logger.warn(
ctx.cat._wooCategoryId = null; `${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`,
return null; );
} ctx.cat._wooCategoryId = null;
return null;
}
ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`); ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`);
ctx.cat._wooCategoryId = id; ctx.cat._wooCategoryId = id;
return id; return id;
} }
/** /**
@ -237,163 +233,157 @@ async function getWooCategoryIdForCat(ctx) {
* while keeping original startUrl (so DB hashes and "source" stay unchanged). * while keeping original startUrl (so DB hashes and "source" stay unchanged).
*/ */
async function scanCategoryWooStoreApi(ctx, prevDb, report) { async function scanCategoryWooStoreApi(ctx, prevDb, report) {
const { logger } = ctx; const { logger } = ctx;
const t0 = Date.now(); const t0 = Date.now();
const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100; const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100;
const discovered = new Map(); const discovered = new Map();
const catId = await getWooCategoryIdForCat(ctx); const catId = await getWooCategoryIdForCat(ctx);
if (!catId) return; if (!catId) return;
const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`); const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`);
apiBase.searchParams.set("per_page", String(perPage)); apiBase.searchParams.set("per_page", String(perPage));
apiBase.searchParams.set("category", String(catId)); apiBase.searchParams.set("category", String(catId));
const hardCap = 500; const hardCap = 500;
let page = 1; let page = 1;
while (page <= hardCap) { while (page <= hardCap) {
apiBase.searchParams.set("page", String(page)); apiBase.searchParams.set("page", String(page));
const pageUrl = apiBase.toString(); const pageUrl = apiBase.toString();
const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry( const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry(
pageUrl, pageUrl,
`page:${ctx.store.key}:${ctx.cat.key}:${page}`, `page:${ctx.store.key}:${ctx.cat.key}:${page}`,
ctx.store.ua ctx.store.ua,
); );
// IMPORTANT: // IMPORTANT:
// Parse WITHOUT allowUrl so pagination is based on real API page size // Parse WITHOUT allowUrl so pagination is based on real API page size
const ctxNoFilter = const ctxNoFilter =
typeof ctx?.cat?.allowUrl === "function" typeof ctx?.cat?.allowUrl === "function" ? { ...ctx, cat: { ...ctx.cat, allowUrl: null } } : ctx;
? { ...ctx, cat: { ...ctx.cat, allowUrl: null } }
: ctx;
const itemsAll = const itemsAll = (ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
(ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
const rawCount = itemsAll.length; const rawCount = itemsAll.length;
// Now apply allowUrl AFTER pagination logic // Now apply allowUrl AFTER pagination logic
const items = []; const items = [];
const allow = ctx?.cat?.allowUrl; const allow = ctx?.cat?.allowUrl;
for (const it of itemsAll) { for (const it of itemsAll) {
if (typeof allow === "function" && !allow(it)) continue; if (typeof allow === "function" && !allow(it)) continue;
items.push(it); items.push(it);
} }
logger.ok( logger.ok(
`${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s` `${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s`,
); );
// Stop only when the API page itself is empty // Stop only when the API page itself is empty
if (!rawCount) break; if (!rawCount) break;
for (const it of items) discovered.set(it.url, it); for (const it of items) discovered.set(it.url, it);
// Last page if API returned fewer than perPage // Last page if API returned fewer than perPage
if (rawCount < perPage) break; if (rawCount < perPage) break;
page++; page++;
} }
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
merged, prevDb,
newItems, discovered,
updatedItems, { storeLabel: ctx.store.name },
removedItems, );
restoredItems,
metaChangedItems,
} = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`); logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsedMs = Date.now() - t0; const elapsedMs = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: Math.max(0, page), scannedPages: Math.max(0, page),
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length, metaChangedCount: metaChangedItems.length,
elapsedMs, elapsedMs,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length; report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport( addCategoryResultToReport(
report, report,
ctx.store.name, ctx.store.name,
ctx.cat.label, ctx.cat.label,
newItems, newItems,
updatedItems, updatedItems,
removedItems, removedItems,
restoredItems restoredItems,
); );
} }
function createStore(defaultUa) { function createStore(defaultUa) {
const ua = defaultUa; const ua = defaultUa;
return { return {
key: "sierrasprings", key: "sierrasprings",
name: "Sierra Springs", name: "Sierra Springs",
host: "sierraspringsliquor.ca", host: "sierraspringsliquor.ca",
ua, ua,
parseProducts: parseProductsSierra, parseProducts: parseProductsSierra,
// store-only override (no changes outside this file) // store-only override (no changes outside this file)
scanCategory: scanCategoryWooStoreApi, scanCategory: scanCategoryWooStoreApi,
// RESTORED: original 4 categories, unchanged startUrl so DB hashes match // RESTORED: original 4 categories, unchanged startUrl so DB hashes match
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/", startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
discoveryStartPage: 1, discoveryStartPage: 1,
perPage: 100, perPage: 100,
}, },
{ {
key: "fine-rare", key: "fine-rare",
label: "Fine & Rare", label: "Fine & Rare",
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/", startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
discoveryStartPage: 1, discoveryStartPage: 1,
perPage: 100, perPage: 100,
}, },
{ {
key: "spirits-liquor", key: "spirits-liquor",
label: "Spirits / Liquor", label: "Spirits / Liquor",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/", startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/",
discoveryStartPage: 1, discoveryStartPage: 1,
perPage: 100, perPage: 100,
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky, allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
}, },
{ {
key: "spirits", key: "spirits",
label: "Spirits", label: "Spirits",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/", startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
discoveryStartPage: 1, discoveryStartPage: 1,
perPage: 100, perPage: 100,
}, },
], ],
}; };
} }
module.exports = { createStore, parseProductsSierra }; module.exports = { createStore, parseProductsSierra };

View file

@ -10,502 +10,508 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function extractArticles(html) { function extractArticles(html) {
const s = String(html || ""); const s = String(html || "");
const parts = s.split(/<article\b/i); const parts = s.split(/<article\b/i);
if (parts.length <= 1) return []; if (parts.length <= 1) return [];
const out = []; const out = [];
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]); for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
return out; return out;
} }
function normalizePrice(str) { function normalizePrice(str) {
const s = String(str || ""); const s = String(str || "");
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/); const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
if (!m) return ""; if (!m) return "";
const raw = m[0].replace(/\s+/g, ""); const raw = m[0].replace(/\s+/g, "");
return raw.replace(/,/g, ""); return raw.replace(/,/g, "");
} }
function pickPriceFromArticle(articleHtml) { function pickPriceFromArticle(articleHtml) {
const a = String(articleHtml || ""); const a = String(articleHtml || "");
const noMember = a.replace( const noMember = a.replace(/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi, " ");
/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi,
" "
);
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i); const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
if (ins && ins[1]) return normalizePrice(ins[1]); if (ins && ins[1]) return normalizePrice(ins[1]);
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i); const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
if (reg && reg[1]) return normalizePrice(reg[1]); if (reg && reg[1]) return normalizePrice(reg[1]);
const priceDiv = noMember.match( const priceDiv = noMember.match(/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i);
/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
);
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
return normalizePrice(scope); return normalizePrice(scope);
} }
function extractProductIdFromArticle(articleHtml) { function extractProductIdFromArticle(articleHtml) {
const a = String(articleHtml || ""); const a = String(articleHtml || "");
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i); let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]); if (m && m[1]) return Number(m[1]);
m = a.match(/\bpost-(\d{1,10})\b/i); m = a.match(/\bpost-(\d{1,10})\b/i);
if (m && m[1]) return Number(m[1]); if (m && m[1]) return Number(m[1]);
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i); m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]); if (m && m[1]) return Number(m[1]);
return 0; return 0;
} }
function extractSkuFromArticle(articleHtml) { function extractSkuFromArticle(articleHtml) {
const a = String(articleHtml || ""); const a = String(articleHtml || "");
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i); let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
if (m && m[1]) return m[1]; if (m && m[1]) return m[1];
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i); m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
if (m && m[1]) return m[1]; if (m && m[1]) return m[1];
return ""; return "";
} }
function idFromImageUrl(imgUrl) { function idFromImageUrl(imgUrl) {
const s = String(imgUrl || ""); const s = String(imgUrl || "");
// /1487-1_... or /1487_... or /1487-... => 1487 // /1487-1_... or /1487_... or /1487-... => 1487
const m = s.match(/\/(\d{1,11})(?=[-_])/); const m = s.match(/\/(\d{1,11})(?=[-_])/);
return m && m[1] ? `id:${m[1]}` : ""; return m && m[1] ? `id:${m[1]}` : "";
} }
function looksInStock(articleHtml) { function looksInStock(articleHtml) {
const a = String(articleHtml || ""); const a = String(articleHtml || "");
if (/\boutofstock\b/i.test(a)) return false; if (/\boutofstock\b/i.test(a)) return false;
if (/Currently\s+Unavailable/i.test(a)) return false; if (/Currently\s+Unavailable/i.test(a)) return false;
if (/\binstock\b/i.test(a)) return true; if (/\binstock\b/i.test(a)) return true;
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true; if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true; if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
if (/10\+\s*Bottles\s+Available/i.test(a)) return true; if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
return /\binstock\b/i.test(a); return /\binstock\b/i.test(a);
} }
function parseProductFromArticle(articleHtml) { function parseProductFromArticle(articleHtml) {
const a = String(articleHtml || ""); const a = String(articleHtml || "");
if (!looksInStock(a)) return null; if (!looksInStock(a)) return null;
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i); const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
if (!hrefM || !hrefM[1]) return null; if (!hrefM || !hrefM[1]) return null;
let url; let url;
try { try {
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString(); url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
} catch { } catch {
return null; return null;
} }
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i); const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i); const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || ""))); const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || ""))); const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
const name = cleanText([title, sub].filter(Boolean).join(" - ")); const name = cleanText([title, sub].filter(Boolean).join(" - "));
if (!name) return null; if (!name) return null;
const price = pickPriceFromArticle(a); const price = pickPriceFromArticle(a);
const productId = extractProductIdFromArticle(a); const productId = extractProductIdFromArticle(a);
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/"); const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
const skuFromHtml = extractSkuFromArticle(a); const skuFromHtml = extractSkuFromArticle(a);
const skuFromImg = idFromImageUrl(img); const skuFromImg = idFromImageUrl(img);
const fallbackSku = normalizeCspc(url) || ""; const fallbackSku = normalizeCspc(url) || "";
return {
name,
price,
url,
sku: skuFromHtml || skuFromImg || fallbackSku,
productId,
img,
};
return {
name,
price,
url,
sku: skuFromHtml || skuFromImg || fallbackSku,
productId,
img,
};
} }
/* ---------------- Store API paging ---------------- */ /* ---------------- Store API paging ---------------- */
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) { function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
const u = new URL(startUrl); const u = new URL(startUrl);
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`); const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
api.searchParams.set("order", "desc"); api.searchParams.set("order", "desc");
api.searchParams.set("orderby", "date"); api.searchParams.set("orderby", "date");
const stock = u.searchParams.get("_sfm__stock_status"); const stock = u.searchParams.get("_sfm__stock_status");
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock"); if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
const pr = u.searchParams.get("_sfm__regular_price"); const pr = u.searchParams.get("_sfm__regular_price");
if (pr) { if (pr) {
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/); const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
if (m) { if (m) {
api.searchParams.set("min_price", m[1]); api.searchParams.set("min_price", m[1]);
api.searchParams.set("max_price", m[2]); api.searchParams.set("max_price", m[2]);
} }
} }
return api; return api;
} }
function hasCategorySlug(p, wanted) { function hasCategorySlug(p, wanted) {
const w = String(wanted || "").trim().toLowerCase(); const w = String(wanted || "")
if (!w) return true; .trim()
.toLowerCase();
if (!w) return true;
const cats = Array.isArray(p?.categories) ? p.categories : []; const cats = Array.isArray(p?.categories) ? p.categories : [];
for (const c of cats) { for (const c of cats) {
const slug = String(c?.slug || "").trim().toLowerCase(); const slug = String(c?.slug || "")
if (slug === w) return true; .trim()
} .toLowerCase();
return false; if (slug === w) return true;
}
return false;
} }
function normalizeProductUrl(p) { function normalizeProductUrl(p) {
const u = String(p?.permalink || p?.link || "").trim(); const u = String(p?.permalink || p?.link || "").trim();
return u && u.startsWith("http") ? u : ""; return u && u.startsWith("http") ? u : "";
} }
function normalizeProductName(p) { function normalizeProductName(p) {
// Store API "name" can contain HTML entities like &#8211; and sometimes markup like <em> // Store API "name" can contain HTML entities like &#8211; and sometimes markup like <em>
const raw = String(p?.name || ""); const raw = String(p?.name || "");
return cleanText(decodeHtml(stripTags(raw))); return cleanText(decodeHtml(stripTags(raw)));
} }
function normalizeProductImage(p) { function normalizeProductImage(p) {
const imgs = Array.isArray(p?.images) ? p.images : []; const imgs = Array.isArray(p?.images) ? p.images : [];
for (const im of imgs) { for (const im of imgs) {
if (!im) continue; if (!im) continue;
const raw = const raw =
(typeof im === "string" ? im : "") || (typeof im === "string" ? im : "") ||
(typeof im?.src === "string" ? im.src : "") || (typeof im?.src === "string" ? im.src : "") ||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") || (typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
(typeof im?.url === "string" ? im.url : ""); (typeof im?.url === "string" ? im.url : "");
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) continue; if (!s) continue;
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
return s; return s;
} }
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim(); const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
if (!direct) return ""; if (!direct) return "";
return direct.startsWith("//") ? `https:${direct}` : direct; return direct.startsWith("//") ? `https:${direct}` : direct;
} }
function toMoneyStringFromMinorUnits(valueStr, minorUnit) { function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
const mu = Number(minorUnit); const mu = Number(minorUnit);
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return ""; if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
const v = String(valueStr || "").trim(); const v = String(valueStr || "").trim();
if (!/^\d+$/.test(v)) return ""; if (!/^\d+$/.test(v)) return "";
// Use integer math to avoid float rounding issues // Use integer math to avoid float rounding issues
const pad = "0".repeat(mu); const pad = "0".repeat(mu);
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v; const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu); const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
const frac = mu === 0 ? "" : s.slice(s.length - mu); const frac = mu === 0 ? "" : s.slice(s.length - mu);
return mu === 0 ? whole : `${whole}.${frac}`; return mu === 0 ? whole : `${whole}.${frac}`;
} }
function normalizeProductPrice(p) { function normalizeProductPrice(p) {
const prices = p?.prices; const prices = p?.prices;
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35) // Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
if (prices && typeof prices === "object") { if (prices && typeof prices === "object") {
const minor = prices.currency_minor_unit; const minor = prices.currency_minor_unit;
const sale = String(prices.sale_price || "").trim(); const sale = String(prices.sale_price || "").trim();
const regular = String(prices.regular_price || "").trim(); const regular = String(prices.regular_price || "").trim();
const chosen = sale || regular; const chosen = sale || regular;
if (chosen) { if (chosen) {
let numeric = chosen; let numeric = chosen;
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) { if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
const converted = toMoneyStringFromMinorUnits(chosen, minor); const converted = toMoneyStringFromMinorUnits(chosen, minor);
if (converted) numeric = converted; if (converted) numeric = converted;
} }
const num = Number(numeric); const num = Number(numeric);
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`; if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
} }
} }
const raw = String(p?.price || p?.price_html || "").trim(); const raw = String(p?.price || p?.price_html || "").trim();
const norm = normalizePrice(raw); const norm = normalizePrice(raw);
return norm; return norm;
} }
function normalizeProductSku(p) { function normalizeProductSku(p) {
const sku = String(p?.sku || "").trim(); const sku = String(p?.sku || "").trim();
if (/^\d{6}$/.test(sku)) return sku; if (/^\d{6}$/.test(sku)) return sku;
return ""; return "";
} }
function normalizeProductId(p) { function normalizeProductId(p) {
const id = Number(p?.id); const id = Number(p?.id);
return Number.isFinite(id) ? id : 0; return Number.isFinite(id) ? id : 0;
} }
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) { async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
const u = new URL(apiBaseUrl.toString()); const u = new URL(apiBaseUrl.toString());
u.searchParams.set("page", String(page)); u.searchParams.set("page", String(page));
u.searchParams.set("per_page", String(perPage)); u.searchParams.set("per_page", String(perPage));
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "application/json", Accept: "application/json",
Referer: ctx.cat.startUrl, Referer: ctx.cat.startUrl,
}, },
}); });
} }
function avoidMassRemoval(prevDb, discovered, ctx, reason) { function avoidMassRemoval(prevDb, discovered, ctx, reason) {
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0; const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0; const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
if (prevSize <= 0 || discSize <= 0) return false; if (prevSize <= 0 || discSize <= 0) return false;
const ratio = discSize / Math.max(1, prevSize); const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false; if (ratio >= 0.6) return false;
ctx.logger.warn?.( ctx.logger.warn?.(
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).` `${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
); );
if (prevDb && typeof prevDb.entries === "function") { if (prevDb && typeof prevDb.entries === "function") {
for (const [k, v] of prevDb.entries()) { for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v); if (!discovered.has(k)) discovered.set(k, v);
} }
return true; return true;
} }
return false; return false;
} }
async function scanCategoryStrath(ctx, prevDb, report) { async function scanCategoryStrath(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
// Listing HTML (seed + sanity) // Listing HTML (seed + sanity)
let html = ""; let html = "";
let listingFinalUrl = ctx.cat.startUrl; let listingFinalUrl = ctx.cat.startUrl;
let listingStatus = 0; let listingStatus = 0;
let listingBytes = 0; let listingBytes = 0;
let listingMs = 0; let listingMs = 0;
try { try {
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua); const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
html = r.text || ""; html = r.text || "";
listingFinalUrl = r.finalUrl || ctx.cat.startUrl; listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
listingStatus = r.status || 0; listingStatus = r.status || 0;
listingBytes = r.bytes || 0; listingBytes = r.bytes || 0;
listingMs = r.ms || 0; listingMs = r.ms || 0;
} catch (e) { } catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`); ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
} }
const discovered = new Map(); const discovered = new Map();
const listingArticles = extractArticles(html); const listingArticles = extractArticles(html);
let listingItems = 0; let listingItems = 0;
for (const art of listingArticles) { for (const art of listingArticles) {
const it = parseProductFromArticle(art); const it = parseProductFromArticle(art);
if (it) { if (it) {
discovered.set(it.url, it); discovered.set(it.url, it);
listingItems++; listingItems++;
} }
} }
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft( `${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
listingItems, listingItems,
3 3,
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}` )} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`,
); );
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl); const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
const perPage = 100; const perPage = 100;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase(); const wantedSlug = String(ctx.cat.apiCategorySlug || "")
.trim()
.toLowerCase();
let donePages = 0; let donePages = 0;
let emptyMatchPages = 0; let emptyMatchPages = 0;
for (let page = 1; page <= maxPagesCap; page++) { for (let page = 1; page <= maxPagesCap; page++) {
let r; let r;
try { try {
r = await fetchStoreApiPage(ctx, apiBase, page, perPage); r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
} catch (e) { } catch (e) {
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`); ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
break; break;
} }
const arr = Array.isArray(r?.json) ? r.json : []; const arr = Array.isArray(r?.json) ? r.json : [];
donePages++; donePages++;
if (!arr.length) break; if (!arr.length) break;
let kept = 0; let kept = 0;
for (const p of arr) { for (const p of arr) {
const stock = String(p?.stock_status || "").toLowerCase(); const stock = String(p?.stock_status || "").toLowerCase();
if (stock && stock !== "instock") continue; if (stock && stock !== "instock") continue;
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue; if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
const url = normalizeProductUrl(p); const url = normalizeProductUrl(p);
if (!url) continue; if (!url) continue;
const name = normalizeProductName(p); const name = normalizeProductName(p);
if (!name) continue; if (!name) continue;
const price = normalizeProductPrice(p); const price = normalizeProductPrice(p);
const sku = normalizeProductSku(p); const sku = normalizeProductSku(p);
const productId = normalizeProductId(p); const productId = normalizeProductId(p);
const prev = discovered.get(url) || null;
const prev = discovered.get(url) || null; const apiImg = normalizeProductImage(p) || "";
const img = apiImg || (prev && prev.img) || "";
const apiImg = normalizeProductImage(p) || ""; const skuFromApiImg = idFromImageUrl(apiImg);
const img = apiImg || (prev && prev.img) || ""; const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || "";
const skuFromApiImg = idFromImageUrl(apiImg); const newSku = sku || fallbackSku;
const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || ""; const mergedSku = pickBetterSku(newSku, prev && prev.sku);
const newSku = sku || fallbackSku; discovered.set(url, {
const mergedSku = pickBetterSku(newSku, prev && prev.sku); name,
price,
url,
sku: mergedSku,
productId,
img,
});
kept++;
}
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
kept,
3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
discovered.set(url, { if (wantedSlug) {
name, if (kept === 0) emptyMatchPages++;
price, else emptyMatchPages = 0;
url,
sku: mergedSku,
productId,
img,
});
kept++;
}
ctx.logger.ok( // If filter is tight (rum), stop after 2 empty pages in a row.
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft( if (emptyMatchPages >= 2) break;
kept, }
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
if (wantedSlug) { if (arr.length < perPage) break;
if (kept === 0) emptyMatchPages++; }
else emptyMatchPages = 0;
// If filter is tight (rum), stop after 2 empty pages in a row. if (prevDb && typeof prevDb.size === "number") {
if (emptyMatchPages >= 2) break; avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
} }
if (arr.length < perPage) break; ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
}
if (prevDb && typeof prevDb.size === "number") { const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`); storeLabel: ctx.store.name,
} });
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged); const elapsed = Date.now() - t0;
writeJsonAtomic(ctx.dbFile, dbObj); ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1 + Math.max(0, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
const elapsed = Date.now() - t0; addCategoryResultToReport(
ctx.logger.ok( report,
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` ctx.store.name,
); ctx.cat.label,
newItems,
report.categories.push({ updatedItems,
store: ctx.store.name, removedItems,
label: ctx.cat.label, restoredItems,
key: ctx.cat.key, );
dbFile: ctx.dbFile,
scannedPages: 1 + Math.max(0, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "strath", key: "strath",
name: "Strath Liquor", name: "Strath Liquor",
host: "www.strathliquor.com", host: "www.strathliquor.com",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryStrath, scanCategory: scanCategoryStrath,
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
apiCategorySlug: "whisky", apiCategorySlug: "whisky",
startUrl: startUrl:
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date", "https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
}, },
{ {
key: "spirits-rum", key: "spirits-rum",
label: "Spirits - Rum", label: "Spirits - Rum",
apiCategorySlug: "rum", apiCategorySlug: "rum",
startUrl: startUrl:
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date", "https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -19,192 +19,191 @@ const GQL_URL = "https://production-storefront-api-mlwv4nj3rq-uc.a.run.app/graph
/* ---------------- formatting ---------------- */ /* ---------------- formatting ---------------- */
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10; const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " "); return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const w = String(total).length; const w = String(total).length;
return `${padLeft(i, w)}/${total}`; return `${padLeft(i, w)}/${total}`;
} }
/* ---------------- helpers ---------------- */ /* ---------------- helpers ---------------- */
function money(n) { function money(n) {
const x = Number(n); const x = Number(n);
return Number.isFinite(x) ? `$${x.toFixed(2)}` : ""; return Number.isFinite(x) ? `$${x.toFixed(2)}` : "";
} }
function firstNonEmptyStr(...vals) { function firstNonEmptyStr(...vals) {
for (const v of vals) { for (const v of vals) {
const s = typeof v === "string" ? v.trim() : ""; const s = typeof v === "string" ? v.trim() : "";
if (s) return s; if (s) return s;
if (Array.isArray(v)) { if (Array.isArray(v)) {
for (const a of v) { for (const a of v) {
if (typeof a === "string" && a.trim()) return a.trim(); if (typeof a === "string" && a.trim()) return a.trim();
if (a && typeof a === "object") { if (a && typeof a === "object") {
const u = String(a.url || a.src || a.image || "").trim(); const u = String(a.url || a.src || a.image || "").trim();
if (u) return u; if (u) return u;
} }
} }
} }
} }
return ""; return "";
} }
function normalizeAbsUrl(raw) { function normalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, `${BASE}/`).toString(); return new URL(s, `${BASE}/`).toString();
} catch { } catch {
return s; return s;
} }
} }
// Treat u:* as synthetic (URL-hash fallback) and eligible for repair. // Treat u:* as synthetic (URL-hash fallback) and eligible for repair.
function isSyntheticSku(sku) { function isSyntheticSku(sku) {
const s = String(sku || "").trim(); const s = String(sku || "").trim();
return !s || /^u:/i.test(s); return !s || /^u:/i.test(s);
} }
// If SKU is <6 chars, namespace it (per your request) to reduce collisions. // If SKU is <6 chars, namespace it (per your request) to reduce collisions.
// Also: DO NOT run numeric SKUs through normalizeCspc (some normalizers hash arbitrary strings). // Also: DO NOT run numeric SKUs through normalizeCspc (some normalizers hash arbitrary strings).
function normalizeTudorSku(rawSku) { function normalizeTudorSku(rawSku) {
const s = String(rawSku || "").trim(); const s = String(rawSku || "").trim();
if (!s) return ""; if (!s) return "";
if (/^id:/i.test(s)) return s; if (/^id:/i.test(s)) return s;
if (/^u:/i.test(s)) return s; if (/^u:/i.test(s)) return s;
// numeric SKU like 67433 // numeric SKU like 67433
if (/^\d+$/.test(s)) { if (/^\d+$/.test(s)) {
return s.length < 6 ? `id:${s}` : s; return s.length < 6 ? `id:${s}` : s;
} }
// short alnum SKU -> namespace // short alnum SKU -> namespace
if (s.length < 6) return `id:${s}`; if (s.length < 6) return `id:${s}`;
// for other formats, keep your existing normalization // for other formats, keep your existing normalization
// (if normalizeCspc returns empty, fall back to the raw string) // (if normalizeCspc returns empty, fall back to the raw string)
return normalizeCspc(s) || s; return normalizeCspc(s) || s;
} }
function tudorProductUrl(ctx, slug) { function tudorProductUrl(ctx, slug) {
// Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug> // Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug>
const root = ctx?.cat?.tudorRootSlug || "spirits"; const root = ctx?.cat?.tudorRootSlug || "spirits";
const sub = ctx?.cat?.tudorSubSlug || ""; const sub = ctx?.cat?.tudorSubSlug || "";
const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`; const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`;
return new URL(path, BASE).toString(); return new URL(path, BASE).toString();
} }
function parseVolumeMl(v) { function parseVolumeMl(v) {
const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase(); const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase();
// Match "1.75L", "1L", "750ML", etc. // Match "1.75L", "1L", "750ML", etc.
const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/); const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/);
if (!m) return null; if (!m) return null;
const n = Number(m[1]); const n = Number(m[1]);
if (!Number.isFinite(n)) return null; if (!Number.isFinite(n)) return null;
return m[2] === "L" ? Math.round(n * 1000) : Math.round(n); return m[2] === "L" ? Math.round(n * 1000) : Math.round(n);
} }
function tudorPickVariant(p) { function tudorPickVariant(p) {
const vs = Array.isArray(p?.variants) ? p.variants : []; const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.filter((v) => Number(v?.quantity) > 0); const inStock = vs.filter((v) => Number(v?.quantity) > 0);
const pool = inStock.length ? inStock : vs; const pool = inStock.length ? inStock : vs;
if (!pool.length) return null; if (!pool.length) return null;
if (pool.length === 1) return pool[0]; if (pool.length === 1) return pool[0];
let best = pool[0]; let best = pool[0];
let bestVol = parseVolumeMl(best); let bestVol = parseVolumeMl(best);
let bestPrice = Number(best?.price); let bestPrice = Number(best?.price);
for (let i = 1; i < pool.length; i++) { for (let i = 1; i < pool.length; i++) {
const v = pool[i]; const v = pool[i];
const vol = parseVolumeMl(v); const vol = parseVolumeMl(v);
const price = Number(v?.price); const price = Number(v?.price);
const volA = bestVol == null ? -1 : bestVol; const volA = bestVol == null ? -1 : bestVol;
const volB = vol == null ? -1 : vol; const volB = vol == null ? -1 : vol;
// 1) largest volume wins // 1) largest volume wins
if (volB > volA) { if (volB > volA) {
best = v; best = v;
bestVol = vol; bestVol = vol;
bestPrice = price; bestPrice = price;
continue; continue;
} }
if (volB < volA) continue; if (volB < volA) continue;
// 2) tie-break: higher price wins // 2) tie-break: higher price wins
const priceA = Number.isFinite(bestPrice) ? bestPrice : -1; const priceA = Number.isFinite(bestPrice) ? bestPrice : -1;
const priceB = Number.isFinite(price) ? price : -1; const priceB = Number.isFinite(price) ? price : -1;
if (priceB > priceA) { if (priceB > priceA) {
best = v; best = v;
bestVol = vol; bestVol = vol;
bestPrice = price; bestPrice = price;
} }
} }
return best; return best;
} }
function parseDisplayPriceFromHtml(html) { function parseDisplayPriceFromHtml(html) {
const s = String(html || ""); const s = String(html || "");
// Narrow to the main price container first (avoid grabbing retail-price) // Narrow to the main price container first (avoid grabbing retail-price)
const block = const block =
s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) || s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) ||
s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i); s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i);
const hay = block ? block[1] : s; const hay = block ? block[1] : s;
// Remove retail-price spans so we pick the live price first // Remove retail-price spans so we pick the live price first
const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " "); const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/); const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/);
if (!m) return null; if (!m) return null;
const n = Number(m[1]); const n = Number(m[1]);
return Number.isFinite(n) ? n : null; return Number.isFinite(n) ? n : null;
} }
function pickAnySkuFromProduct(p) { function pickAnySkuFromProduct(p) {
const vs = Array.isArray(p?.variants) ? p.variants : []; const vs = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vs) { for (const v of vs) {
const s = String(v?.sku || "").trim(); const s = String(v?.sku || "").trim();
if (s) return s; if (s) return s;
} }
return ""; return "";
} }
function pickInStockVariantWithFallback(p) { function pickInStockVariantWithFallback(p) {
const vs = Array.isArray(p?.variants) ? p.variants : []; const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.find((v) => Number(v?.quantity) > 0); const inStock = vs.find((v) => Number(v?.quantity) > 0);
return inStock || vs[0] || null; return inStock || vs[0] || null;
} }
/* ---------------- GraphQL ---------------- */ /* ---------------- GraphQL ---------------- */
async function tudorGql(ctx, label, query, variables) { async function tudorGql(ctx, label, query, variables) {
return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, {
method: "POST", method: "POST",
headers: { headers: {
Accept: "application/json", Accept: "application/json",
"content-type": "application/json", "content-type": "application/json",
Origin: BASE, Origin: BASE,
Referer: `${BASE}/`, Referer: `${BASE}/`,
}, },
body: JSON.stringify({ query, variables }), body: JSON.stringify({ query, variables }),
}); });
} }
/* ---------------- GQL queries ---------------- */ /* ---------------- GQL queries ---------------- */
@ -300,63 +299,63 @@ const PRODUCTS_BY_SKU_QUERY = `
`; `;
async function fetchProductsPage(ctx, cursor) { async function fetchProductsPage(ctx, cursor) {
const vars = { const vars = {
storeId: STORE_ID, storeId: STORE_ID,
allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug], allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug],
anyTags: null, anyTags: null,
pageCursor: cursor || null, pageCursor: cursor || null,
pageLimit: 100, pageLimit: 100,
sortBy: "name", sortBy: "name",
sortOrder: "asc", sortOrder: "asc",
priceMin: null, priceMin: null,
priceMax: null, priceMax: null,
quantityMin: null, quantityMin: null,
}; };
const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars); const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars);
if (r?.status !== 200 || !r?.json?.data?.products) { if (r?.status !== 200 || !r?.json?.data?.products) {
const errs = Array.isArray(r?.json?.errors) ? r.json.errors : []; const errs = Array.isArray(r?.json?.errors) ? r.json.errors : [];
const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`; const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`;
throw new Error(`Tudor products query failed: ${msg}`); throw new Error(`Tudor products query failed: ${msg}`);
} }
return r.json.data.products; return r.json.data.products;
} }
/* ---------------- GQL bySku helper (image-only within budget) ---------------- */ /* ---------------- GQL bySku helper (image-only within budget) ---------------- */
async function fetchProductBySku(ctx, sku) { async function fetchProductBySku(ctx, sku) {
const s = String(sku || "").trim(); const s = String(sku || "").trim();
if (!s) return null; if (!s) return null;
if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map(); if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map();
if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s); if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s);
const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, { const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, {
sku: s, sku: s,
storeId: STORE_ID, storeId: STORE_ID,
}); });
let out = null; let out = null;
if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) { if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) {
out = r.json.data.productsBySku.items[0] || null; out = r.json.data.productsBySku.items[0] || null;
} }
ctx._tudorSkuCache.set(s, out); ctx._tudorSkuCache.set(s, out);
return out; return out;
} }
async function supplementImageFromSku(ctx, skuProbe) { async function supplementImageFromSku(ctx, skuProbe) {
const prod = await fetchProductBySku(ctx, skuProbe); const prod = await fetchProductBySku(ctx, skuProbe);
if (!prod) return null; if (!prod) return null;
const v = pickInStockVariantWithFallback(prod); const v = pickInStockVariantWithFallback(prod);
const img = normalizeAbsUrl( const img = normalizeAbsUrl(
firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds) firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds),
); );
return img ? { img } : null; return img ? { img } : null;
} }
/* ---------------- HTML product page fallback (SKU + optional image) ---------------- */ /* ---------------- HTML product page fallback (SKU + optional image) ---------------- */
@ -366,333 +365,335 @@ const DETAIL_HTML_BUDGET_DEFAULT = 200;
const DETAIL_GQL_BUDGET_DEFAULT = 10; const DETAIL_GQL_BUDGET_DEFAULT = 10;
function parseSkuFromHtml(html) { function parseSkuFromHtml(html) {
const s = String(html || ""); const s = String(html || "");
// 1) Visible block: <div class="sku ...">SKU: 67433</div> // 1) Visible block: <div class="sku ...">SKU: 67433</div>
const m1 = const m1 = s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) || s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) || if (m1 && m1[1]) return String(m1[1]).trim();
s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
if (m1 && m1[1]) return String(m1[1]).trim();
// 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"` // 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"`
const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i); const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i);
return m2 && m2[1] ? String(m2[1]).trim() : ""; return m2 && m2[1] ? String(m2[1]).trim() : "";
} }
function parseOgImageFromHtml(html) { function parseOgImageFromHtml(html) {
const s = String(html || ""); const s = String(html || "");
const m = const m =
s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) || s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) ||
s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i); s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i);
return m ? String(m[1] || "").trim() : ""; return m ? String(m[1] || "").trim() : "";
} }
async function tudorFetchHtml(ctx, label, url) { async function tudorFetchHtml(ctx, label, url) {
// Use ctx.http so pacing/throttle is respected. // Use ctx.http so pacing/throttle is respected.
if (ctx?.http?.fetchTextWithRetry) { if (ctx?.http?.fetchTextWithRetry) {
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, { return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "text/html,application/xhtml+xml", Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`, Referer: `${BASE}/`,
}, },
}); });
} }
// Best-effort fallback if your wrapper has a generic fetchWithRetry. // Best-effort fallback if your wrapper has a generic fetchWithRetry.
if (ctx?.http?.fetchWithRetry) { if (ctx?.http?.fetchWithRetry) {
const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, { const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "text/html,application/xhtml+xml", Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`, Referer: `${BASE}/`,
}, },
}); });
const body = r?.text ?? r?.body ?? r?.data ?? ""; const body = r?.text ?? r?.body ?? r?.data ?? "";
const text = const text =
typeof body === "string" typeof body === "string"
? body ? body
: Buffer.isBuffer(body) : Buffer.isBuffer(body)
? body.toString("utf8") ? body.toString("utf8")
: body && typeof body === "object" && typeof body.toString === "function" : body && typeof body === "object" && typeof body.toString === "function"
? body.toString() ? body.toString()
: ""; : "";
return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms }; return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms };
} }
throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry)."); throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry).");
} }
async function tudorDetailFromProductPage(ctx, url) { async function tudorDetailFromProductPage(ctx, url) {
if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map(); if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map();
if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url); if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url);
let out = null; let out = null;
try { try {
const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url); const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url);
if (r?.status === 200 && typeof r?.text === "string" && r.text.length) { if (r?.status === 200 && typeof r?.text === "string" && r.text.length) {
const rawSku = parseSkuFromHtml(r.text); const rawSku = parseSkuFromHtml(r.text);
const sku = normalizeTudorSku(rawSku); const sku = normalizeTudorSku(rawSku);
const img = normalizeAbsUrl(parseOgImageFromHtml(r.text)); const img = normalizeAbsUrl(parseOgImageFromHtml(r.text));
const priceNum = parseDisplayPriceFromHtml(r.text); const priceNum = parseDisplayPriceFromHtml(r.text);
out = { sku, img, priceNum }; out = { sku, img, priceNum };
} }
} catch { } catch {
out = null; out = null;
} }
ctx._tudorHtmlCache.set(url, out); ctx._tudorHtmlCache.set(url, out);
return out; return out;
} }
/* ---------------- item builder (fast, no extra calls) ---------------- */ /* ---------------- item builder (fast, no extra calls) ---------------- */
function tudorItemFromProductFast(p, ctx) { function tudorItemFromProductFast(p, ctx) {
if (!p) return null; if (!p) return null;
const name = cleanText(p?.name || ""); const name = cleanText(p?.name || "");
const slug = String(p?.slug || "").trim(); const slug = String(p?.slug || "").trim();
if (!name || !slug) return null; if (!name || !slug) return null;
const v = tudorPickVariant(p); const v = tudorPickVariant(p);
if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock
const url = tudorProductUrl(ctx, slug); const url = tudorProductUrl(ctx, slug);
// NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products // NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products
const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo); const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo);
const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p); const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p);
const sku = normalizeTudorSku(skuRaw); const sku = normalizeTudorSku(skuRaw);
const img = normalizeAbsUrl( const img = normalizeAbsUrl(firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds));
firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds)
);
// NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price // NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price
const variants = Array.isArray(p?.variants) const variants = Array.isArray(p?.variants)
? p.variants.map((x) => ({ ? p.variants.map((x) => ({
sku: String(x?.sku || "").trim(), sku: String(x?.sku || "").trim(),
price: x?.price, price: x?.price,
retailPrice: x?.retailPrice, retailPrice: x?.retailPrice,
quantity: x?.quantity, quantity: x?.quantity,
})) }))
: []; : [];
return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants }; return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants };
} }
/* ---------------- repair (second pass, budgeted) ---------------- */ /* ---------------- repair (second pass, budgeted) ---------------- */
async function tudorRepairItem(ctx, it) { async function tudorRepairItem(ctx, it) {
// Determine if we need HTML for precision: // Determine if we need HTML for precision:
// - Missing/synthetic SKU (existing behavior) // - Missing/synthetic SKU (existing behavior)
// - OR multi-variant product where fast-path may choose the wrong variant for this URL // - OR multi-variant product where fast-path may choose the wrong variant for this URL
const inStockVariants = Array.isArray(it._variants) const inStockVariants = Array.isArray(it._variants) ? it._variants.filter((v) => Number(v?.quantity) > 0) : [];
? it._variants.filter((v) => Number(v?.quantity) > 0)
: [];
const hasMultiInStock = inStockVariants.length >= 2; const hasMultiInStock = inStockVariants.length >= 2;
// 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs // 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs
if (isSyntheticSku(it.sku) || hasMultiInStock) { if (isSyntheticSku(it.sku) || hasMultiInStock) {
const d = await tudorDetailFromProductPage(ctx, it.url); const d = await tudorDetailFromProductPage(ctx, it.url);
// Prefer real SKU from HTML // Prefer real SKU from HTML
if (d?.sku && !isSyntheticSku(d.sku)) { if (d?.sku && !isSyntheticSku(d.sku)) {
it.sku = d.sku; it.sku = d.sku;
} }
// Fill image if missing // Fill image if missing
if (!it.img && d?.img) it.img = d.img; if (!it.img && d?.img) it.img = d.img;
// Price precision: // Price precision:
// - Best: match HTML SKU to a GQL variant sku => exact numeric variant price // - Best: match HTML SKU to a GQL variant sku => exact numeric variant price
// - Fallback: use displayed HTML price // - Fallback: use displayed HTML price
const htmlSkuDigits = String(d?.sku || "").replace(/^id:/i, "").trim(); const htmlSkuDigits = String(d?.sku || "")
.replace(/^id:/i, "")
.trim();
if (htmlSkuDigits && inStockVariants.length) { if (htmlSkuDigits && inStockVariants.length) {
const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits); const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits);
if (match && Number.isFinite(Number(match.price))) { if (match && Number.isFinite(Number(match.price))) {
it.price = money(match.price); it.price = money(match.price);
} else if (Number.isFinite(d?.priceNum)) { } else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum); it.price = money(d.priceNum);
} }
} else if (Number.isFinite(d?.priceNum)) { } else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum); it.price = money(d.priceNum);
} }
} }
// 2) Missing image -> limited productsBySku (existing behavior) // 2) Missing image -> limited productsBySku (existing behavior)
if (!it.img) { if (!it.img) {
const skuProbe = String(it._skuProbe || "").trim(); const skuProbe = String(it._skuProbe || "").trim();
if (skuProbe) { if (skuProbe) {
const supp = await supplementImageFromSku(ctx, skuProbe); const supp = await supplementImageFromSku(ctx, skuProbe);
if (supp?.img) it.img = supp.img; if (supp?.img) it.img = supp.img;
} }
} }
// Final fallback ONLY after repair attempts (stability) // Final fallback ONLY after repair attempts (stability)
if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || ""; if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || "";
return it; return it;
} }
/* ---------------- scanner ---------------- */ /* ---------------- scanner ---------------- */
async function scanCategoryTudor(ctx, prevDb, report) { async function scanCategoryTudor(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
const discovered = new Map(); const discovered = new Map();
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500); const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
let cursor = null; let cursor = null;
let done = 0; let done = 0;
const needsDetail = []; const needsDetail = [];
for (let page = 1; page <= maxPages; page++) { for (let page = 1; page <= maxPages; page++) {
const tPage = Date.now(); const tPage = Date.now();
const prod = await fetchProductsPage(ctx, cursor); const prod = await fetchProductsPage(ctx, cursor);
const arr = Array.isArray(prod?.items) ? prod.items : []; const arr = Array.isArray(prod?.items) ? prod.items : [];
let kept = 0; let kept = 0;
for (const p of arr) { for (const p of arr) {
const it = tudorItemFromProductFast(p, ctx); const it = tudorItemFromProductFast(p, ctx);
if (!it) continue; if (!it) continue;
// NEW: seed from cached DB to avoid repeating detail HTML // NEW: seed from cached DB to avoid repeating detail HTML
const prev = prevDb?.byUrl?.get(it.url) || null; const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev) { if (prev) {
it.sku = pickBetterSku(it.sku, prev.sku); it.sku = pickBetterSku(it.sku, prev.sku);
if (!it.img && prev.img) it.img = prev.img; if (!it.img && prev.img) it.img = prev.img;
} }
// queue only; do not do detail calls inline // queue only; do not do detail calls inline
if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it); if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it);
discovered.set(it.url, it); discovered.set(it.url, it);
kept++; kept++;
} }
done++; done++;
const ms = Date.now() - tPage; const ms = Date.now() - tPage;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft( `${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft(
kept, kept,
3 3,
)} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}` )} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
); );
cursor = prod?.nextPageCursor || null; cursor = prod?.nextPageCursor || null;
if (!cursor || !arr.length) break; if (!cursor || !arr.length) break;
} }
// second pass: repair with budgets // second pass: repair with budgets
const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget) const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget)
? ctx.config.tudorHtmlBudget ? ctx.config.tudorHtmlBudget
: DETAIL_HTML_BUDGET_DEFAULT; : DETAIL_HTML_BUDGET_DEFAULT;
const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget) const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget)
? ctx.config.tudorGqlBudget ? ctx.config.tudorGqlBudget
: DETAIL_GQL_BUDGET_DEFAULT; : DETAIL_GQL_BUDGET_DEFAULT;
let htmlUsed = 0; let htmlUsed = 0;
let gqlUsed = 0; let gqlUsed = 0;
for (const it of needsDetail) { for (const it of needsDetail) {
const wantsHtml = isSyntheticSku(it.sku); const wantsHtml = isSyntheticSku(it.sku);
const wantsGql = !it.img && String(it._skuProbe || "").trim(); const wantsGql = !it.img && String(it._skuProbe || "").trim();
// enforce caps // enforce caps
if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue; if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue;
if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue; if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue;
// count budgets pessimistically // count budgets pessimistically
if (wantsHtml) htmlUsed++; if (wantsHtml) htmlUsed++;
if (wantsGql) gqlUsed++; if (wantsGql) gqlUsed++;
await tudorRepairItem(ctx, it); await tudorRepairItem(ctx, it);
discovered.set(it.url, it); discovered.set(it.url, it);
} }
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})` `${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})`,
); );
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name, storeLabel: ctx.store.name,
}); });
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: done, scannedPages: done,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
/* ---------------- store ---------------- */ /* ---------------- store ---------------- */
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "tudor", key: "tudor",
name: "Tudor House", name: "Tudor House",
host: HOST, host: HOST,
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryTudor, scanCategory: scanCategoryTudor,
categories: [ categories: [
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`, startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`,
tudorRootSlug: "spirits", tudorRootSlug: "spirits",
tudorSubSlug: "rum", tudorSubSlug: "rum",
tudorAllTags: ["spirits", "rum"], tudorAllTags: ["spirits", "rum"],
}, },
{ {
key: "whiskey-scotch", key: "whiskey-scotch",
label: "Whiskey / Scotch", label: "Whiskey / Scotch",
startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`, startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`,
tudorRootSlug: "spirits", tudorRootSlug: "spirits",
tudorSubSlug: "whiskey-scotch", tudorSubSlug: "whiskey-scotch",
tudorAllTags: ["spirits", "whiskey-scotch"], tudorAllTags: ["spirits", "whiskey-scotch"],
}, },
{ {
key: "scotch-selections", key: "scotch-selections",
label: "Scotch Selections", label: "Scotch Selections",
startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`, startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`,
tudorRootSlug: "spirits", tudorRootSlug: "spirits",
tudorSubSlug: "scotch-selections", tudorSubSlug: "scotch-selections",
tudorAllTags: ["spirits", "scotch-selections"], tudorAllTags: ["spirits", "scotch-selections"],
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -5,169 +5,170 @@ const { normalizeCspc } = require("../utils/sku");
const { normalizeBaseUrl } = require("../utils/url"); const { normalizeBaseUrl } = require("../utils/url");
function normalizeAbsUrl(raw) { function normalizeAbsUrl(raw) {
const s = String(raw || "").trim(); const s = String(raw || "").trim();
if (!s) return ""; if (!s) return "";
if (s.startsWith("//")) return `https:${s}`; if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s; if (/^https?:\/\//i.test(s)) return s;
try { try {
return new URL(s, "https://vesselliquor.com/").toString(); return new URL(s, "https://vesselliquor.com/").toString();
} catch { } catch {
return s; return s;
} }
} }
// Strip noisy Shopify/tracking params so URLs stay stable. // Strip noisy Shopify/tracking params so URLs stay stable.
// Keep only "variant" since it can represent a distinct product configuration. // Keep only "variant" since it can represent a distinct product configuration.
function normalizeShopifyProductUrl(rawUrl) { function normalizeShopifyProductUrl(rawUrl) {
try { try {
const u = new URL(String(rawUrl || "")); const u = new URL(String(rawUrl || ""));
u.hash = ""; u.hash = "";
const keep = new Set(["variant"]); const keep = new Set(["variant"]);
for (const k of [...u.searchParams.keys()]) { for (const k of [...u.searchParams.keys()]) {
if (!keep.has(k)) u.searchParams.delete(k); if (!keep.has(k)) u.searchParams.delete(k);
} }
if ([...u.searchParams.keys()].length === 0) u.search = ""; if ([...u.searchParams.keys()].length === 0) u.search = "";
if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, ""); if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, "");
return u.toString(); return u.toString();
} catch { } catch {
return String(rawUrl || ""); return String(rawUrl || "");
} }
} }
function makeVesselPageUrl(baseUrl, pageNum) { function makeVesselPageUrl(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl)); const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = ""; u.hash = "";
if (pageNum <= 1) u.searchParams.delete("page"); if (pageNum <= 1) u.searchParams.delete("page");
else u.searchParams.set("page", String(pageNum)); else u.searchParams.set("page", String(pageNum));
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString(); return u.toString();
} }
function vesselLooksInStock(block) { function vesselLooksInStock(block) {
const s = String(block || "").toLowerCase(); const s = String(block || "").toLowerCase();
if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false; if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false;
if (/\bdata-available=["']false["']/.test(s)) return false; if (/\bdata-available=["']false["']/.test(s)) return false;
return true; return true;
} }
function vesselExtractPrice(block) { function vesselExtractPrice(block) {
const s = String(block || ""); const s = String(block || "");
const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)]; const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)];
for (let i = saleTags.length - 1; i >= 0; i--) { for (let i = saleTags.length - 1; i >= 0; i--) {
const txt = cleanText(decodeHtml(saleTags[i][1] || "")); const txt = cleanText(decodeHtml(saleTags[i][1] || ""));
const m = txt.match(/\$\s*\d+(?:\.\d{2})?/); const m = txt.match(/\$\s*\d+(?:\.\d{2})?/);
if (m) return m[0].replace(/\s+/g, ""); if (m) return m[0].replace(/\s+/g, "");
} }
// Fallback: read price-list but ignore compare-at (crossed-out) // Fallback: read price-list but ignore compare-at (crossed-out)
const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, ""); const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, "");
const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i); const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i);
if (pl) { if (pl) {
const txt = cleanText(decodeHtml(pl[1] || "")); const txt = cleanText(decodeHtml(pl[1] || ""));
const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)]; const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (all.length) return all[all.length - 1][0].replace(/\s+/g, ""); if (all.length) return all[all.length - 1][0].replace(/\s+/g, "");
} }
return ""; return "";
} }
function vesselExtractSkuFromImgOrBlock(imgUrl, block) { function vesselExtractSkuFromImgOrBlock(imgUrl, block) {
const cspc = normalizeCspc(imgUrl) || ""; const cspc = normalizeCspc(imgUrl) || "";
if (cspc) return cspc; if (cspc) return cspc;
try { try {
const u = new URL(String(imgUrl || "")); const u = new URL(String(imgUrl || ""));
const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i); const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i);
if (m && m[1]) return `id:${m[1]}`; if (m && m[1]) return `id:${m[1]}`;
} catch {} } catch {}
const s = String(block || ""); const s = String(block || "");
const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i); const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i);
if (m2 && m2[1]) return `id:${m2[1]}`; if (m2 && m2[1]) return `id:${m2[1]}`;
return ""; return "";
} }
function vesselCardToItem(block, base) { function vesselCardToItem(block, base) {
if (!vesselLooksInStock(block)) return null; if (!vesselLooksInStock(block)) return null;
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i); const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i);
if (!hrefM || !hrefM[1]) return null; if (!hrefM || !hrefM[1]) return null;
let url = ""; let url = "";
try { try {
url = new URL(decodeHtml(hrefM[1]), base).toString(); url = new URL(decodeHtml(hrefM[1]), base).toString();
url = normalizeShopifyProductUrl(url); url = normalizeShopifyProductUrl(url);
} catch { } catch {
return null; return null;
} }
const titleM = const titleM =
block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) || block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) ||
block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i); block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i);
const name = cleanText(decodeHtml(titleM ? titleM[1] : "")); const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) return null; if (!name) return null;
const img = normalizeAbsUrl(extractFirstImgUrl(block, base)); const img = normalizeAbsUrl(extractFirstImgUrl(block, base));
const price = vesselExtractPrice(block); const price = vesselExtractPrice(block);
// Prefer numeric filename SKU like 67424.jpg (works for 5-digit too) // Prefer numeric filename SKU like 67424.jpg (works for 5-digit too)
const sku = vesselExtractSkuFromImgOrBlock(img, block); const sku = vesselExtractSkuFromImgOrBlock(img, block);
return { name, price, url, sku, img }; return { name, price, url, sku, img };
} }
function parseProductsVessel(html, ctx) { function parseProductsVessel(html, ctx) {
const s = String(html || ""); const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`;
const parts = s.split(/<product-card\b/i); const parts = s.split(/<product-card\b/i);
if (parts.length <= 1) return []; if (parts.length <= 1) return [];
const items = []; const items = [];
for (let i = 1; i < parts.length; i++) { for (let i = 1; i < parts.length; i++) {
const block = "<product-card" + parts[i]; const block = "<product-card" + parts[i];
const it = vesselCardToItem(block, base); const it = vesselCardToItem(block, base);
if (it) items.push(it); if (it) items.push(it);
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "vessel", key: "vessel",
name: "Vessel Liquor", name: "Vessel Liquor",
host: "vesselliquor.com", host: "vesselliquor.com",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsVessel, parseProducts: parseProductsVessel,
makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params) makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params)
categories: [ categories: [
{ {
key: "whisky", key: "whisky",
label: "Whisky", label: "Whisky",
startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1", startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1",
discoveryStartPage: 20, discoveryStartPage: 20,
discoveryStep: 10, discoveryStep: 10,
}, },
{ {
key: "rum-cane-spirit", key: "rum-cane-spirit",
label: "Rum / Cane Spirit", label: "Rum / Cane Spirit",
startUrl: "https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1", startUrl:
discoveryStartPage: 20, "https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1",
discoveryStep: 10, discoveryStartPage: 20,
}, discoveryStep: 10,
], },
}; ],
};
} }
module.exports = { createStore, parseProductsVessel }; module.exports = { createStore, parseProductsVessel };

View file

@ -9,20 +9,20 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report"); const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10; const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " "); return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
} }
function pageStr(i, total) { function pageStr(i, total) {
const w = String(total).length; const w = String(total).length;
return `${padLeft(i, w)}/${total}`; return `${padLeft(i, w)}/${total}`;
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
const BASE = "https://shop.vintagespirits.ca"; const BASE = "https://shop.vintagespirits.ca";
@ -30,228 +30,250 @@ const SHOP_ID = "679-320"; // from your curl; can be made dynamic later
const IMG_BASE = "https://s.barnetnetwork.com/img/m/"; const IMG_BASE = "https://s.barnetnetwork.com/img/m/";
function asMoneyFromApi(it) { function asMoneyFromApi(it) {
// prefer explicit sale price when present // prefer explicit sale price when present
const sale = Number(it?.sale_price); const sale = Number(it?.sale_price);
const regular = Number(it?.regular_price); const regular = Number(it?.regular_price);
const net = Number(it?.net_price); const net = Number(it?.net_price);
const n = const n =
(Number.isFinite(sale) && sale > 0 ? sale : NaN) || (Number.isFinite(sale) && sale > 0 ? sale : NaN) ||
(Number.isFinite(net) && net > 0 ? net : NaN) || (Number.isFinite(net) && net > 0 ? net : NaN) ||
(Number.isFinite(regular) && regular > 0 ? regular : NaN); (Number.isFinite(regular) && regular > 0 ? regular : NaN);
if (!Number.isFinite(n)) return ""; if (!Number.isFinite(n)) return "";
return `$${n.toFixed(2)}`; return `$${n.toFixed(2)}`;
} }
function imgUrlFromApi(it) { function imgUrlFromApi(it) {
const p = String(it?.image || "").trim(); const p = String(it?.image || "").trim();
if (!p) return ""; if (!p) return "";
if (/^https?:\/\//i.test(p)) return p; if (/^https?:\/\//i.test(p)) return p;
if (p.startsWith("//")) return `https:${p}`; if (p.startsWith("//")) return `https:${p}`;
// API provides "custom/goods/..." // API provides "custom/goods/..."
return `${IMG_BASE}${p.replace(/^\/+/, "")}`; return `${IMG_BASE}${p.replace(/^\/+/, "")}`;
} }
function vintageItemFromApi(it) { function vintageItemFromApi(it) {
if (!it) return null; if (!it) return null;
// stock gate // stock gate
if (!it.available_for_sale) return null; if (!it.available_for_sale) return null;
const onHand = Number(it.on_hand); const onHand = Number(it.on_hand);
if (Number.isFinite(onHand) && onHand <= 0) return null; if (Number.isFinite(onHand) && onHand <= 0) return null;
const url = String(it.url || "").trim(); const url = String(it.url || "").trim();
const name = String(it.description || "").trim(); const name = String(it.description || "").trim();
if (!url || !name) return null; if (!url || !name) return null;
const sku = normalizeCspc(it.cspcid || ""); const sku = normalizeCspc(it.cspcid || "");
const price = asMoneyFromApi(it); const price = asMoneyFromApi(it);
const img = imgUrlFromApi(it); const img = imgUrlFromApi(it);
return { name, price, url, sku, img }; return { name, price, url, sku, img };
} }
function makeApiUrl(cat, page) { function makeApiUrl(cat, page) {
const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`); const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`);
u.searchParams.set("p", String(page)); u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true"); u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", "desc"); u.searchParams.set("sort_by", "desc");
u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS" u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS"
u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM" u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM"
u.searchParams.set("varital_name", ""); u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found."); u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false"); u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10))); u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10)));
return u.toString(); return u.toString();
} }
async function fetchVintagePage(ctx, page) { async function fetchVintagePage(ctx, page) {
const url = makeApiUrl(ctx.cat, page); const url = makeApiUrl(ctx.cat, page);
return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, { return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET", method: "GET",
headers: { headers: {
Accept: "*/*", Accept: "*/*",
Referer: ctx.cat.startUrl, Referer: ctx.cat.startUrl,
Origin: BASE, Origin: BASE,
}, },
// cookies not required in my testing; enable if you hit 403/empty // cookies not required in my testing; enable if you hit 403/empty
cookies: true, cookies: true,
}); });
} }
async function scanCategoryVintageApi(ctx, prevDb, report) { async function scanCategoryVintageApi(ctx, prevDb, report) {
const t0 = Date.now(); const t0 = Date.now();
let first; let first;
try { try {
first = await fetchVintagePage(ctx, 1); first = await fetchVintagePage(ctx, 1);
} catch (e) { } catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`); ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`);
const discovered = new Map(); const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
storeLabel: ctx.store.name, prevDb,
}); discovered,
const dbObj = buildDbObject(ctx, merged); {
writeJsonAtomic(ctx.dbFile, dbObj); storeLabel: ctx.store.name,
},
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: 1, scannedPages: 1,
discoveredUnique: 0, discoveredUnique: 0,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
return; report,
} ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1); const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}` `${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
); );
const pages = []; const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(p); for (let p = 1; p <= scanPages; p++) pages.push(p);
let donePages = 0; let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered( const perPageItems = await require("../utils/async").parallelMapStaggered(
pages, pages,
ctx.config.concurrency, ctx.config.concurrency,
ctx.config.staggerMs, ctx.config.staggerMs,
async (page, idx) => { async (page, idx) => {
const r = page === 1 ? first : await fetchVintagePage(ctx, page); const r = page === 1 ? first : await fetchVintagePage(ctx, page);
const arr = Array.isArray(r?.json?.items) ? r.json.items : []; const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
const items = []; const items = [];
for (const raw of arr) { for (const raw of arr) {
const it = vintageItemFromApi(raw); const it = vintageItemFromApi(raw);
if (it) items.push(it); if (it) items.push(it);
} }
donePages++; donePages++;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd( `${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd(
3 3,
)} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr( )} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr(
r.bytes r.bytes,
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` )} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
); );
return items; return items;
} },
); );
const discovered = new Map(); const discovered = new Map();
let dups = 0; let dups = 0;
for (const arr of perPageItems) { for (const arr of perPageItems) {
for (const it of arr) { for (const it of arr) {
if (discovered.has(it.url)) dups++; if (discovered.has(it.url)) dups++;
discovered.set(it.url, it); discovered.set(it.url, it);
} }
} }
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name, storeLabel: ctx.store.name,
}); });
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
ctx.logger.ok( ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: scanPages, scannedPages: scanPages,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "vintage", key: "vintage",
name: "Vintage Spirits", name: "Vintage Spirits",
host: "shop.vintagespirits.ca", host: "shop.vintagespirits.ca",
ua: defaultUa, ua: defaultUa,
scanCategory: scanCategoryVintageApi, scanCategory: scanCategoryVintageApi,
categories: [ categories: [
{ {
key: "whisky-whiskey", key: "whisky-whiskey",
label: "Whisky & Whiskey", label: "Whisky & Whiskey",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY", startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY",
vsCategory: "40 SPIRITS", vsCategory: "40 SPIRITS",
vsSubCategory: "WHISKY & WHISKEY", vsSubCategory: "WHISKY & WHISKEY",
}, },
{ {
key: "single-malt-whisky", key: "single-malt-whisky",
label: "Single Malt Whisky", label: "Single Malt Whisky",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY", startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY",
vsCategory: "40 SPIRITS", vsCategory: "40 SPIRITS",
vsSubCategory: "SINGLE MALT WHISKY", vsSubCategory: "SINGLE MALT WHISKY",
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM", startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM",
vsCategory: "40 SPIRITS", vsCategory: "40 SPIRITS",
vsSubCategory: "RUM", vsSubCategory: "RUM",
}, },
], ],
}; };
} }
module.exports = { createStore }; module.exports = { createStore };

View file

@ -6,120 +6,116 @@ const { makePageUrlShopifyQueryPage } = require("../utils/url");
const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku"); const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku");
function extractSkuFromUrlOrHref(hrefOrUrl) { function extractSkuFromUrlOrHref(hrefOrUrl) {
const s = String(hrefOrUrl || ""); const s = String(hrefOrUrl || "");
// /products/<handle>-123456 or /collections/.../products/<handle>-123456 // /products/<handle>-123456 or /collections/.../products/<handle>-123456
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/); const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function extractSkuFromWillowBlock(block) { function extractSkuFromWillowBlock(block) {
const b = String(block || ""); const b = String(block || "");
// Image filename pattern: // Image filename pattern:
// /products/710296-Zaya-Gran-Reserva-16-Year_160x.png // /products/710296-Zaya-Gran-Reserva-16-Year_160x.png
const m1 = b.match(/\/products\/(\d{6})[-_]/i); const m1 = b.match(/\/products\/(\d{6})[-_]/i);
if (m1) return m1[1]; if (m1) return m1[1];
// Generic fallback // Generic fallback
const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/); const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/);
if (m2) return m2[1]; if (m2) return m2[1];
return ""; return "";
} }
function canonicalizeWillowUrl(raw) { function canonicalizeWillowUrl(raw) {
try { try {
const u = new URL(String(raw)); const u = new URL(String(raw));
u.search = ""; u.search = "";
u.hash = ""; u.hash = "";
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i); const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
if (m) u.pathname = `/products/${m[1]}`; if (m) u.pathname = `/products/${m[1]}`;
return u.toString(); return u.toString();
} catch { } catch {
return String(raw || ""); return String(raw || "");
} }
} }
// Prefer exact decimal from visually-hidden spans. // Prefer exact decimal from visually-hidden spans.
// Fallback: reconstruct from $39<sup>99</sup>. // Fallback: reconstruct from $39<sup>99</sup>.
function extractWillowCardPrice(block) { function extractWillowCardPrice(block) {
const b = String(block || ""); const b = String(block || "");
const current = const current =
b.match( b.match(
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i /grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i,
)?.[1] || )?.[1] || b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
if (current) return current.replace(/\s+/g, ""); if (current) return current.replace(/\s+/g, "");
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i); const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`; if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/); const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
return any ? any[0].replace(/\s+/g, "") : ""; return any ? any[0].replace(/\s+/g, "") : "";
} }
function parseProductsWillowPark(html, ctx, finalUrl) { function parseProductsWillowPark(html, ctx, finalUrl) {
const s = String(html || ""); const s = String(html || "");
const items = []; const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`; const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)] const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
.map((m) => m.index) .map((m) => m.index)
.filter((i) => typeof i === "number"); .filter((i) => typeof i === "number");
const blocks = []; const blocks = [];
for (let i = 0; i < starts.length; i++) { for (let i = 0; i < starts.length; i++) {
const a = starts[i]; const a = starts[i];
const b = i + 1 < starts.length ? starts[i + 1] : s.length; const b = i + 1 < starts.length ? starts[i + 1] : s.length;
blocks.push(s.slice(a, b)); blocks.push(s.slice(a, b));
} }
for (const block of blocks) { for (const block of blocks) {
const href = const href =
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] || block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1]; block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue; if (!href) continue;
let url; let url;
try { try {
url = new URL(decodeHtml(href), base).toString(); url = new URL(decodeHtml(href), base).toString();
} catch { } catch {
continue; continue;
} }
url = canonicalizeWillowUrl(url); url = canonicalizeWillowUrl(url);
const titleHtml = const titleHtml =
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] || block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] ||
""; "";
const name = cleanText(decodeHtml(stripTags(titleHtml))); const name = cleanText(decodeHtml(stripTags(titleHtml)));
if (!name) continue; if (!name) continue;
const price = extractWillowCardPrice(block); const price = extractWillowCardPrice(block);
const img = extractFirstImgUrl(block, base); const img = extractFirstImgUrl(block, base);
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || ""; const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
const sku = const sku = extractSkuFromUrlOrHref(href) || extractSkuFromUrlOrHref(url) || extractSkuFromWillowBlock(block);
extractSkuFromUrlOrHref(href) ||
extractSkuFromUrlOrHref(url) ||
extractSkuFromWillowBlock(block);
items.push({ name, price, url, sku, img, pid }); items.push({ name, price, url, sku, img, pid });
} }
const uniq = new Map(); const uniq = new Map();
for (const it of items) uniq.set(it.url, it); for (const it of items) uniq.set(it.url, it);
return [...uniq.values()]; return [...uniq.values()];
} }
function willowIsEmptyListingPage(html) { function willowIsEmptyListingPage(html) {
const s = String(html || ""); const s = String(html || "");
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true; if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
if (/No products found/i.test(s)) return true; if (/No products found/i.test(s)) return true;
if (/collection--empty\b/i.test(s)) return true; if (/collection--empty\b/i.test(s)) return true;
return false; return false;
} }
/* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */ /* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */
@ -137,102 +133,99 @@ query ($id: ID!) @inContext(country: CA) {
`; `;
function pickBestVariantSku(product) { function pickBestVariantSku(product) {
const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : []; const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : [];
if (!vs.length) return ""; if (!vs.length) return "";
const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim()); const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim());
if (inStock) return String(inStock.sku).trim(); if (inStock) return String(inStock.sku).trim();
const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim()); const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim());
if (forSale) return String(forSale.sku).trim(); if (forSale) return String(forSale.sku).trim();
const any = vs.find((v) => String(v?.sku || "").trim()); const any = vs.find((v) => String(v?.sku || "").trim());
return any ? String(any.sku).trim() : ""; return any ? String(any.sku).trim() : "";
} }
function extractStorefrontTokenFromHtml(html) { function extractStorefrontTokenFromHtml(html) {
const s = String(html || ""); const s = String(html || "");
// 1) script#shopify-features JSON: {"accessToken":"..."} // 1) script#shopify-features JSON: {"accessToken":"..."}
const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1]; const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1];
if (j) { if (j) {
try { try {
const obj = JSON.parse(j); const obj = JSON.parse(j);
const t = String(obj?.accessToken || "").trim(); const t = String(obj?.accessToken || "").trim();
if (t) return t; if (t) return t;
} catch {} } catch {}
} }
// 2) meta name="shopify-checkout-api-token" // 2) meta name="shopify-checkout-api-token"
const m = s.match( const m = s.match(/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i)?.[1];
/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i return String(m || "").trim();
)?.[1];
return String(m || "").trim();
} }
async function willowGetStorefrontToken(ctx) { async function willowGetStorefrontToken(ctx) {
if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken; if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken;
const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua); const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua);
const t = extractStorefrontTokenFromHtml(r?.text || ""); const t = extractStorefrontTokenFromHtml(r?.text || "");
if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML"); if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML");
ctx._willowStorefrontToken = t; ctx._willowStorefrontToken = t;
return t; return t;
} }
async function willowGql(ctx, label, query, variables) { async function willowGql(ctx, label, query, variables) {
const token = await willowGetStorefrontToken(ctx); const token = await willowGetStorefrontToken(ctx);
const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, { const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, {
method: "POST", method: "POST",
headers: { headers: {
Accept: "application/json", Accept: "application/json",
"content-type": "application/json", "content-type": "application/json",
Origin: "https://www.willowpark.net", Origin: "https://www.willowpark.net",
Referer: "https://www.willowpark.net/", Referer: "https://www.willowpark.net/",
"x-shopify-storefront-access-token": token, "x-shopify-storefront-access-token": token,
}, },
body: JSON.stringify({ query, variables }), body: JSON.stringify({ query, variables }),
}); });
// If token is rejected, clear so a future attempt re-fetches it once. // If token is rejected, clear so a future attempt re-fetches it once.
if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = ""; if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = "";
return r; return r;
} }
// If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:<NUM>. // If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:<NUM>.
// Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is. // Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is.
function normalizeWillowGqlSku(rawSku) { function normalizeWillowGqlSku(rawSku) {
const s = String(rawSku || "").trim(); const s = String(rawSku || "").trim();
if (!s) return ""; if (!s) return "";
const cspc = normalizeCspc(s); const cspc = normalizeCspc(s);
if (cspc) return cspc; // 6-digit wins if (cspc) return cspc; // 6-digit wins
if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s; if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s;
if (/^\d+$/.test(s)) return `id:${s}`; if (/^\d+$/.test(s)) return `id:${s}`;
return s; return s;
} }
async function willowFetchSkuByPid(ctx, pid) { async function willowFetchSkuByPid(ctx, pid) {
const id = String(pid || "").trim(); const id = String(pid || "").trim();
if (!id) return ""; if (!id) return "";
if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map(); if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map();
if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id); if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id);
const gid = `gid://shopify/Product/${id}`; const gid = `gid://shopify/Product/${id}`;
let sku = ""; let sku = "";
try { try {
const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid }); const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid });
if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product)); if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product));
} catch { } catch {
sku = ""; sku = "";
} }
ctx._willowSkuCacheByPid.set(id, sku); ctx._willowSkuCacheByPid.set(id, sku);
return sku; return sku;
} }
/** /**
@ -240,58 +233,58 @@ async function willowFetchSkuByPid(ctx, pid) {
* Budgeted to avoid exploding requests. * Budgeted to avoid exploding requests.
*/ */
async function willowRepairDiscoveredItems(ctx, discovered, prevDb) { async function willowRepairDiscoveredItems(ctx, discovered, prevDb) {
const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200; const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200;
let used = 0; let used = 0;
for (const [url, it] of discovered.entries()) { for (const [url, it] of discovered.entries()) {
if (!it) continue; if (!it) continue;
// Seed from prev DB so we don't repair repeatedly if we already learned a good SKU. // Seed from prev DB so we don't repair repeatedly if we already learned a good SKU.
const prev = prevDb?.byUrl?.get(url); const prev = prevDb?.byUrl?.get(url);
if (prev) it.sku = pickBetterSku(it.sku, prev.sku); if (prev) it.sku = pickBetterSku(it.sku, prev.sku);
if (!needsSkuDetail(it.sku)) continue; if (!needsSkuDetail(it.sku)) continue;
if (used >= budget) break; if (used >= budget) break;
const repaired = await willowFetchSkuByPid(ctx, it.pid); const repaired = await willowFetchSkuByPid(ctx, it.pid);
if (repaired) it.sku = pickBetterSku(repaired, it.sku); if (repaired) it.sku = pickBetterSku(repaired, it.sku);
discovered.set(url, it); discovered.set(url, it);
used++; used++;
} }
ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`); ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`);
} }
function createStore(defaultUa) { function createStore(defaultUa) {
return { return {
key: "willowpark", key: "willowpark",
name: "Willow Park", name: "Willow Park",
host: "www.willowpark.net", host: "www.willowpark.net",
ua: defaultUa, ua: defaultUa,
parseProducts: parseProductsWillowPark, parseProducts: parseProductsWillowPark,
makePageUrl: makePageUrlShopifyQueryPage, makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: willowIsEmptyListingPage, isEmptyListingPage: willowIsEmptyListingPage,
// Hook called by scanner (add 1-line call in scanner before merge) // Hook called by scanner (add 1-line call in scanner before merge)
repairDiscoveredItems: willowRepairDiscoveredItems, repairDiscoveredItems: willowRepairDiscoveredItems,
categories: [ categories: [
{ {
key: "scotch", key: "scotch",
label: "Scotch", label: "Scotch",
startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1", startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1",
discoveryStartPage: 5, discoveryStartPage: 5,
}, },
{ {
key: "rum", key: "rum",
label: "Rum", label: "Rum",
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1", startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
discoveryStartPage: 3, discoveryStartPage: 3,
}, },
], ],
}; };
} }
module.exports = { createStore, parseProductsWillowPark }; module.exports = { createStore, parseProductsWillowPark };

View file

@ -16,84 +16,86 @@ const STATUS_W = 4;
const PROG_W = 4; const PROG_W = 4;
function kbStr(bytes) { function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " "); return humanBytes(bytes).padStart(8, " ");
} }
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function pctStr(done, total) { function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0; const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
function pageStr(i, total) { function pageStr(i, total) {
const leftW = String(total).length; const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`; return `${padLeft(i, leftW)}/${total}`;
} }
function actionCell(s) { function actionCell(s) {
return padRightV(String(s), ACTION_W); return padRightV(String(s), ACTION_W);
} }
function statusCell(logger, statusRaw, okBool) { function statusCell(logger, statusRaw, okBool) {
const cell = padRightV(String(statusRaw || ""), STATUS_W); const cell = padRightV(String(statusRaw || ""), STATUS_W);
if (!statusRaw) return cell; if (!statusRaw) return cell;
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow); return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
} }
function progCell(v) { function progCell(v) {
const raw = String(v ?? "----"); const raw = String(v ?? "----");
return padLeftV(raw, PROG_W); return padLeftV(raw, PROG_W);
} }
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) { function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`); logger.ok(
`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`,
);
} }
function makeCatPrefixers(stores, logger) { function makeCatPrefixers(stores, logger) {
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1); const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1); const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
function catPrefixRaw(store, cat) { function catPrefixRaw(store, cat) {
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`; return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
} }
function catPrefixOut(store, cat) { function catPrefixOut(store, cat) {
return logger.bold(catPrefixRaw(store, cat)); return logger.bold(catPrefixRaw(store, cat));
} }
return { catPrefixRaw, catPrefixOut, width: storeW, catW }; return { catPrefixRaw, catPrefixOut, width: storeW, catW };
} }
function buildCategoryContext(store, cat, catPrefixOutFn, config) { function buildCategoryContext(store, cat, catPrefixOutFn, config) {
const baseUrl = normalizeBaseUrl(cat.startUrl); const baseUrl = normalizeBaseUrl(cat.startUrl);
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir); const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
return { return {
store, store,
cat, cat,
baseUrl, baseUrl,
dbFile, dbFile,
catPrefixOut: catPrefixOutFn(store, cat), catPrefixOut: catPrefixOutFn(store, cat),
}; };
} }
function loadCategoryDb(logger, ctx) { function loadCategoryDb(logger, ctx) {
const prevDb = readDb(ctx.dbFile); const prevDb = readDb(ctx.dbFile);
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`); logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
return prevDb; return prevDb;
} }
function shouldTrackItem(ctx, finalUrl, item) { function shouldTrackItem(ctx, finalUrl, item) {
const allow = ctx?.cat?.allowUrl; const allow = ctx?.cat?.allowUrl;
if (typeof allow !== "function") return true; if (typeof allow !== "function") return true;
return allow(item, ctx, finalUrl); return allow(item, ctx, finalUrl);
} }
/** /**
@ -104,285 +106,309 @@ function shouldTrackItem(ctx, finalUrl, item) {
* inside links that often have "page-numbers" class, but works even without it. * inside links that often have "page-numbers" class, but works even without it.
*/ */
function extractTotalPagesFromPaginationHtml(html) { function extractTotalPagesFromPaginationHtml(html) {
const s = String(html || ""); const s = String(html || "");
let max = 0; let max = 0;
// /page/23/ // /page/23/
for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) { for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) {
const n = Number(m[1]); const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n; if (Number.isFinite(n) && n > max) max = n;
} }
// ?paged=23 // ?paged=23
for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) { for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]); const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n; if (Number.isFinite(n) && n > max) max = n;
} }
// Shopify: ?page=23 // Shopify: ?page=23
for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) { for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]); const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n; if (Number.isFinite(n) && n > max) max = n;
} }
// Sometimes themes render plain numbers without /page/ in href; keep it conservative: // Sometimes themes render plain numbers without /page/ in href; keep it conservative:
// Only trust these if we already found at least one pagination-ish token. // Only trust these if we already found at least one pagination-ish token.
if (max > 1) return max; if (max > 1) return max;
return 0; return 0;
} }
async function pageHasProducts(ctx, url) { async function pageHasProducts(ctx, url) {
const { http, config } = ctx; const { http, config } = ctx;
try { try {
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua); const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
if (typeof ctx.store.isEmptyListingPage === "function") { if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 }; if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
} }
const parser = ctx.store.parseProducts || config.defaultParseProducts; const parser = ctx.store.parseProducts || config.defaultParseProducts;
const items = parser(text, ctx).length; const items = parser(text, ctx).length;
return { ok: items > 0, items }; return { ok: items > 0, items };
} catch { } catch {
return { ok: false, items: 0 }; return { ok: false, items: 0 };
} }
} }
async function probePage(ctx, baseUrl, pageNum, state) { async function probePage(ctx, baseUrl, pageNum, state) {
const url = makePageUrlForCtx(ctx, baseUrl, pageNum); const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0; const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0;
if (delay > 0) await sleep(delay); if (delay > 0) await sleep(delay);
const t0 = Date.now(); const t0 = Date.now();
const r = await pageHasProducts(ctx, url); const r = await pageHasProducts(ctx, url);
const ms = Date.now() - t0; const ms = Date.now() - t0;
const prog = discoverProg(state); const prog = discoverProg(state);
logProgressLine( logProgressLine(
ctx.logger, ctx.logger,
ctx, ctx,
`Discover probe page=${padLeftV(pageNum, 4)}`, `Discover probe page=${padLeftV(pageNum, 4)}`,
r.ok ? "OK" : "MISS", r.ok ? "OK" : "MISS",
Boolean(r.ok), Boolean(r.ok),
prog, prog,
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}` `items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
); );
return r; return r;
} }
function discoverProg(state) { function discoverProg(state) {
if (!state || state.phase !== "binary") return " 0%"; if (!state || state.phase !== "binary") return " 0%";
const span = Math.max(1, state.hiMiss - state.loOk); const span = Math.max(1, state.hiMiss - state.loOk);
const initial = Math.max(1, state.binInitialSpan); const initial = Math.max(1, state.binInitialSpan);
if (initial <= 1) return "100%"; if (initial <= 1) return "100%";
const remaining = Math.max(0, span - 1); const remaining = Math.max(0, span - 1);
const total = Math.max(1, initial - 1); const total = Math.max(1, initial - 1);
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100))); const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
return `${padLeft(pct, 3)}%`; return `${padLeft(pct, 3)}%`;
} }
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) { async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
state.phase = "binary"; state.phase = "binary";
state.loOk = loOk; state.loOk = loOk;
state.hiMiss = hiMiss; state.hiMiss = hiMiss;
state.binInitialSpan = Math.max(1, hiMiss - loOk); state.binInitialSpan = Math.max(1, hiMiss - loOk);
while (hiMiss - loOk > 1) { while (hiMiss - loOk > 1) {
const mid = loOk + Math.floor((hiMiss - loOk) / 2); const mid = loOk + Math.floor((hiMiss - loOk) / 2);
state.loOk = loOk; state.loOk = loOk;
state.hiMiss = hiMiss; state.hiMiss = hiMiss;
const pm = await probePage(ctx, baseUrl, mid, state); const pm = await probePage(ctx, baseUrl, mid, state);
if (pm.ok) loOk = mid; if (pm.ok) loOk = mid;
else hiMiss = mid; else hiMiss = mid;
} }
state.loOk = loOk; state.loOk = loOk;
state.hiMiss = hiMiss; state.hiMiss = hiMiss;
return loOk; return loOk;
} }
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) { async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 }; const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
// Fetch page 1 ONCE and try to extract total pages from pagination. // Fetch page 1 ONCE and try to extract total pages from pagination.
const url1 = makePageUrlForCtx(ctx, baseUrl, 1); const url1 = makePageUrlForCtx(ctx, baseUrl, 1);
const t0 = Date.now(); const t0 = Date.now();
const { text: html1, ms, status, bytes, finalUrl } = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua); const {
const pMs = Date.now() - t0; text: html1,
ms,
status,
bytes,
finalUrl,
} = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua);
const pMs = Date.now() - t0;
if (typeof ctx.store.isEmptyListingPage === "function") { if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(html1, ctx, url1)) { if (ctx.store.isEmptyListingPage(html1, ctx, url1)) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`); ctx.logger.warn(
return 1; `${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`,
} );
} return 1;
}
}
const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts; const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts;
const items1 = parser(html1, ctx, finalUrl).length; const items1 = parser(html1, ctx, finalUrl).length;
logProgressLine( logProgressLine(
ctx.logger, ctx.logger,
ctx, ctx,
`Discover probe page=${padLeftV(1, 4)}`, `Discover probe page=${padLeftV(1, 4)}`,
items1 > 0 ? "OK" : "MISS", items1 > 0 ? "OK" : "MISS",
items1 > 0, items1 > 0,
discoverProg(state), discoverProg(state),
`items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}` `items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}`,
); );
if (items1 <= 0) { if (items1 <= 0) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`); ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
return 1; return 1;
} }
const extracted = extractTotalPagesFromPaginationHtml(html1); const extracted = extractTotalPagesFromPaginationHtml(html1);
// Shopify collections with filters often lie about pagination. // Shopify collections with filters often lie about pagination.
// If page 1 looks full, don't trust a tiny extracted count. // If page 1 looks full, don't trust a tiny extracted count.
if (extracted && extracted >= 1) { if (extracted && extracted >= 1) {
const looksTruncated = const looksTruncated = extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
if (!looksTruncated) { if (!looksTruncated) {
ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`); ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`);
return extracted; return extracted;
} }
ctx.logger.warn( ctx.logger.warn(
`${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe` `${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe`,
); );
} }
// Fallback to probing if pagination parse fails // Fallback to probing if pagination parse fails
const g = Math.max(2, guess); const g = Math.max(2, guess);
const pg = await probePage(ctx, baseUrl, g, state); const pg = await probePage(ctx, baseUrl, g, state);
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state); if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
let lastOk = g; let lastOk = g;
while (true) { while (true) {
const probe = lastOk + step; const probe = lastOk + step;
const pr = await probePage(ctx, baseUrl, probe, state); const pr = await probePage(ctx, baseUrl, probe, state);
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state); if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
lastOk = probe; lastOk = probe;
if (lastOk > 5000) { if (lastOk > 5000) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`); ctx.logger.warn(
return lastOk; `${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`,
} );
} return lastOk;
}
}
} }
async function discoverAndScanCategory(ctx, prevDb, report) { async function discoverAndScanCategory(ctx, prevDb, report) {
const { logger, config } = ctx; const { logger, config } = ctx;
if (typeof ctx.store.scanCategory === "function") { if (typeof ctx.store.scanCategory === "function") {
await ctx.store.scanCategory(ctx, prevDb, report); await ctx.store.scanCategory(ctx, prevDb, report);
return; return;
} }
const t0 = Date.now(); const t0 = Date.now();
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess; const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep; const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep;
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step); const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages); const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pages = []; const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p)); for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
let donePages = 0; let donePages = 0;
const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency; const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency;
const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs; const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs;
const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => { const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => {
const pnum = idx + 1; const pnum = idx + 1;
const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry( const {
pageUrl, text: html,
`page:${ctx.store.key}:${ctx.cat.key}:${pnum}`, ms,
ctx.store.ua bytes,
); status,
finalUrl,
} = await ctx.http.fetchTextWithRetry(pageUrl, `page:${ctx.store.key}:${ctx.cat.key}:${pnum}`, ctx.store.ua);
const parser = ctx.store.parseProducts || config.defaultParseProducts; const parser = ctx.store.parseProducts || config.defaultParseProducts;
const itemsRaw = parser(html, ctx, finalUrl); const itemsRaw = parser(html, ctx, finalUrl);
const items = []; const items = [];
for (const it of itemsRaw) { for (const it of itemsRaw) {
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it); if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
} }
donePages++; donePages++;
logProgressLine( logProgressLine(
logger, logger,
ctx, ctx,
`Page ${pageStr(pnum, pages.length)}`, `Page ${pageStr(pnum, pages.length)}`,
status ? String(status) : "", status ? String(status) : "",
status >= 200 && status < 400, status >= 200 && status < 400,
pctStr(donePages, pages.length), pctStr(donePages, pages.length),
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}` `items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
); );
return items; return items;
}); });
const discovered = new Map(); const discovered = new Map();
let dups = 0; let dups = 0;
for (const arr of perPageItems) { for (const arr of perPageItems) {
for (const it of arr) { for (const it of arr) {
if (discovered.has(it.url)) dups++; if (discovered.has(it.url)) dups++;
discovered.set(it.url, it); discovered.set(it.url, it);
} }
} }
if (typeof ctx.store.repairDiscoveredItems === "function") { if (typeof ctx.store.repairDiscoveredItems === "function") {
await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb); await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb);
} }
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged); const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj); writeJsonAtomic(ctx.dbFile, dbObj);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`); logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0; const elapsed = Date.now() - t0;
logger.ok( logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
); );
report.categories.push({ report.categories.push({
store: ctx.store.name, store: ctx.store.name,
label: ctx.cat.label, label: ctx.cat.label,
key: ctx.cat.key, key: ctx.cat.key,
dbFile: ctx.dbFile, dbFile: ctx.dbFile,
scannedPages: scanPages, scannedPages: scanPages,
discoveredUnique: discovered.size, discoveredUnique: discovered.size,
newCount: newItems.length, newCount: newItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length, metaChangedCount: metaChangedItems.length,
elapsedMs: elapsed, elapsedMs: elapsed,
}); });
report.totals.newCount += newItems.length; report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length; report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length; report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length; report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length; report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
} }
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory }; module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };

View file

@ -8,87 +8,87 @@ const { normalizeSkuKey } = require("../utils/sku");
const { priceToNumber } = require("../utils/price"); const { priceToNumber } = require("../utils/price");
function ensureDir(dir) { function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true }); fs.mkdirSync(dir, { recursive: true });
} }
function dbPathFor(key, baseUrl, dbDir) { function dbPathFor(key, baseUrl, dbDir) {
ensureDir(dbDir); ensureDir(dbDir);
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8); const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-"); const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
return path.join(dbDir, `${safeKey}__${hash}.json`); return path.join(dbDir, `${safeKey}__${hash}.json`);
} }
function readDb(file) { function readDb(file) {
const byUrl = new Map(); const byUrl = new Map();
try { try {
const txt = fs.readFileSync(file, "utf8"); const txt = fs.readFileSync(file, "utf8");
const obj = JSON.parse(txt); const obj = JSON.parse(txt);
if (obj && Array.isArray(obj.items)) { if (obj && Array.isArray(obj.items)) {
for (const it of obj.items) { for (const it of obj.items) {
if (it && typeof it.url === "string" && it.url.startsWith("http")) { if (it && typeof it.url === "string" && it.url.startsWith("http")) {
byUrl.set(it.url, { byUrl.set(it.url, {
name: String(it.name || ""), name: String(it.name || ""),
price: String(it.price || ""), price: String(it.price || ""),
sku: String(it.sku || ""), sku: String(it.sku || ""),
url: it.url, url: it.url,
img: String(it.img || it.image || it.thumb || "").trim(), img: String(it.img || it.image || it.thumb || "").trim(),
removed: Boolean(it.removed), removed: Boolean(it.removed),
}); });
} }
} }
} }
} catch { } catch {
// ignore missing or parse errors // ignore missing or parse errors
} }
return { byUrl }; return { byUrl };
} }
function writeJsonAtomic(file, obj) { function writeJsonAtomic(file, obj) {
ensureDir(path.dirname(file)); ensureDir(path.dirname(file));
const tmp = `${file}.tmp`; const tmp = `${file}.tmp`;
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8"); fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
fs.renameSync(tmp, file); fs.renameSync(tmp, file);
} }
function buildDbObject(ctx, merged) { function buildDbObject(ctx, merged) {
const storeLabel = ctx?.store?.name || ctx?.store?.host || ""; const storeLabel = ctx?.store?.name || ctx?.store?.host || "";
return { return {
version: 6, version: 6,
store: ctx.store.host, store: ctx.store.host,
storeLabel: ctx.store.name, storeLabel: ctx.store.name,
category: ctx.cat.key, category: ctx.cat.key,
categoryLabel: ctx.cat.label, categoryLabel: ctx.cat.label,
source: ctx.baseUrl, source: ctx.baseUrl,
updatedAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
count: merged.size, count: merged.size,
items: [...merged.values()] items: [...merged.values()]
.sort((a, b) => (a.name || "").localeCompare(b.name || "")) .sort((a, b) => (a.name || "").localeCompare(b.name || ""))
.map((it) => ({ .map((it) => ({
name: it.name, name: it.name,
price: it.price || "", price: it.price || "",
// IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url) // IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url)
sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "", sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "",
url: it.url, url: it.url,
img: String(it.img || "").trim(), img: String(it.img || "").trim(),
removed: Boolean(it.removed), removed: Boolean(it.removed),
})), })),
}; };
} }
function listDbFiles(dbDir) { function listDbFiles(dbDir) {
const out = []; const out = [];
try { try {
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) { for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
if (!ent.isFile()) continue; if (!ent.isFile()) continue;
const name = ent.name || ""; const name = ent.name || "";
if (!name.endsWith(".json")) continue; if (!name.endsWith(".json")) continue;
out.push(path.join(dbDir, name)); out.push(path.join(dbDir, name));
} }
} catch { } catch {
// ignore // ignore
} }
return out; return out;
} }
/** /**
@ -96,42 +96,43 @@ function listDbFiles(dbDir) {
* but DB rows remain raw/mined skuKey. * but DB rows remain raw/mined skuKey.
*/ */
function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) { function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) {
const cheapest = new Map(); // canonSku -> { storeLabel, priceNum } const cheapest = new Map(); // canonSku -> { storeLabel, priceNum }
for (const file of listDbFiles(dbDir)) { for (const file of listDbFiles(dbDir)) {
try { try {
const obj = JSON.parse(fs.readFileSync(file, "utf8")); const obj = JSON.parse(fs.readFileSync(file, "utf8"));
const storeLabel = String(obj?.storeLabel || obj?.store || ""); const storeLabel = String(obj?.storeLabel || obj?.store || "");
const items = Array.isArray(obj?.items) ? obj.items : []; const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) { for (const it of items) {
if (it?.removed) continue; if (it?.removed) continue;
const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" }); const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" });
if (!skuKey) continue; if (!skuKey) continue;
const canon = skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey; const canon =
skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
const p = priceToNumber(it?.price || ""); const p = priceToNumber(it?.price || "");
if (!Number.isFinite(p) || p <= 0) continue; if (!Number.isFinite(p) || p <= 0) continue;
const prev = cheapest.get(canon); const prev = cheapest.get(canon);
if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p }); if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p });
} }
} catch { } catch {
// ignore parse errors // ignore parse errors
} }
} }
return cheapest; return cheapest;
} }
module.exports = { module.exports = {
ensureDir, ensureDir,
dbPathFor, dbPathFor,
readDb, readDb,
writeJsonAtomic, writeJsonAtomic,
buildDbObject, buildDbObject,
listDbFiles, listDbFiles,
buildCheapestSkuIndexFromAllDbs, buildCheapestSkuIndexFromAllDbs,
}; };

View file

@ -5,207 +5,206 @@ const { normalizeSkuKey, normalizeCspc, pickBetterSku } = require("../utils/sku"
const { normPrice } = require("../utils/price"); const { normPrice } = require("../utils/price");
function normImg(v) { function normImg(v) {
const s = String(v || "").trim(); const s = String(v || "").trim();
if (!s) return ""; if (!s) return "";
if (/^data:/i.test(s)) return ""; if (/^data:/i.test(s)) return "";
if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs
return s; return s;
} }
function dbStoreLabel(prevDb) { function dbStoreLabel(prevDb) {
return String(prevDb?.storeLabel || prevDb?.store || "").trim(); return String(prevDb?.storeLabel || prevDb?.store || "").trim();
} }
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) { function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim(); const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
if (!effectiveStoreLabel) { if (!effectiveStoreLabel) {
throw new Error( throw new Error(
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'" "mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'",
); );
} }
function normalizeSkuForDb(raw, url) { function normalizeSkuForDb(raw, url) {
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url }); return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
} }
const merged = new Map(prevDb.byUrl); const merged = new Map(prevDb.byUrl);
const newItems = []; const newItems = [];
const updatedItems = []; const updatedItems = [];
const removedItems = []; const removedItems = [];
const restoredItems = []; const restoredItems = [];
const metaChangedItems = []; const metaChangedItems = [];
// Choose a deterministic "best" record among dup active SKU rows. // Choose a deterministic "best" record among dup active SKU rows.
// Prefer: more complete fields, then lexicographically smallest URL. // Prefer: more complete fields, then lexicographically smallest URL.
function scoreItem(it) { function scoreItem(it) {
if (!it) return 0; if (!it) return 0;
const name = String(it.name || "").trim(); const name = String(it.name || "").trim();
const price = String(it.price || "").trim(); const price = String(it.price || "").trim();
const url = String(it.url || "").trim(); const url = String(it.url || "").trim();
const img = String(it.img || "").trim(); const img = String(it.img || "").trim();
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0); return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
} }
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) { function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
const sa = scoreItem(a); const sa = scoreItem(a);
const sb = scoreItem(b); const sb = scoreItem(b);
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b }; if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
// tie-breaker: stable + deterministic // tie-breaker: stable + deterministic
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b }; return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
} }
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc). // Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
// Also track *all* urls per skuKey to cleanup dupes. // Also track *all* urls per skuKey to cleanup dupes.
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best) const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls) const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
for (const [url, it] of prevDb.byUrl.entries()) { for (const [url, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue; if (!it || it.removed) continue;
const skuKey = normalizeSkuForDb(it.sku, url); const skuKey = normalizeSkuForDb(it.sku, url);
if (!skuKey || /^u:/i.test(skuKey)) continue; if (!skuKey || /^u:/i.test(skuKey)) continue;
let set = prevUrlsBySkuKey.get(skuKey); let set = prevUrlsBySkuKey.get(skuKey);
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set())); if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
set.add(url); set.add(url);
const cur = prevBySkuKey.get(skuKey); const cur = prevBySkuKey.get(skuKey);
const next = { url, item: it }; const next = { url, item: it };
if (!cur) prevBySkuKey.set(skuKey, next); if (!cur) prevBySkuKey.set(skuKey, next);
else prevBySkuKey.set(skuKey, pickBetter(cur, next)); else prevBySkuKey.set(skuKey, pickBetter(cur, next));
} }
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
for (const [url, nowRaw] of discovered.entries()) { for (const [url, nowRaw] of discovered.entries()) {
let prev = prevDb.byUrl.get(url); let prev = prevDb.byUrl.get(url);
let prevUrlForThisItem = url; let prevUrlForThisItem = url;
// URL not found in previous DB: try to match by non-synthetic skuKey. // URL not found in previous DB: try to match by non-synthetic skuKey.
if (!prev) { if (!prev) {
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url); const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) { if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
const hit = prevBySkuKey.get(nowSkuKey); const hit = prevBySkuKey.get(nowSkuKey);
if (hit && hit.url && hit.url !== url) { if (hit && hit.url && hit.url !== url) {
prev = hit.item; prev = hit.item;
prevUrlForThisItem = hit.url; prevUrlForThisItem = hit.url;
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them. // Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
const allOld = prevUrlsBySkuKey.get(nowSkuKey); const allOld = prevUrlsBySkuKey.get(nowSkuKey);
if (allOld) { if (allOld) {
for (const u of allOld) matchedPrevUrls.add(u); for (const u of allOld) matchedPrevUrls.add(u);
} else { } else {
matchedPrevUrls.add(hit.url); matchedPrevUrls.add(hit.url);
} }
// Cleanup: remove any existing active duplicates for this skuKey from the merged map. // Cleanup: remove any existing active duplicates for this skuKey from the merged map.
// We'll re-add the chosen record at the new URL below. // We'll re-add the chosen record at the new URL below.
if (allOld) { if (allOld) {
for (const u of allOld) { for (const u of allOld) {
if (u !== url && merged.has(u)) merged.delete(u); if (u !== url && merged.has(u)) merged.delete(u);
} }
} else { } else {
if (merged.has(hit.url)) merged.delete(hit.url); if (merged.has(hit.url)) merged.delete(hit.url);
} }
} }
} }
} }
// Truly new (no URL match, no skuKey match) // Truly new (no URL match, no skuKey match)
if (!prev) { if (!prev) {
const nowSku = normalizeSkuForDb(nowRaw.sku, url); const nowSku = normalizeSkuForDb(nowRaw.sku, url);
const now = { const now = {
...nowRaw, ...nowRaw,
sku: nowSku, sku: nowSku,
img: normImg(nowRaw.img), img: normImg(nowRaw.img),
removed: false, removed: false,
}; };
newItems.push(now); newItems.push(now);
merged.set(url, now); merged.set(url, now);
continue; continue;
} }
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored). // If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
if (prevUrlForThisItem === url && prev.removed) { if (prevUrlForThisItem === url && prev.removed) {
const prevSku = normalizeSkuForDb(prev.sku, prev.url); const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url); const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku); const nowSku = pickBetterSku(rawNowSku, prevSku);
const now = { const now = {
...nowRaw, ...nowRaw,
sku: nowSku, sku: nowSku,
img: normImg(nowRaw.img) || normImg(prev.img), img: normImg(nowRaw.img) || normImg(prev.img),
removed: false, removed: false,
}; };
restoredItems.push({ restoredItems.push({
url, url,
name: now.name || prev.name || "", name: now.name || prev.name || "",
price: now.price || prev.price || "", price: now.price || prev.price || "",
sku: now.sku || "", sku: now.sku || "",
}); });
merged.set(url, now); merged.set(url, now);
continue; continue;
} }
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally. // Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
const prevPrice = normPrice(prev.price); const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price); const nowPrice = normPrice(nowRaw.price);
const prevSku = normalizeSkuForDb(prev.sku, prev.url); const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url); const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku); const nowSku = pickBetterSku(rawNowSku, prevSku);
const prevImg = normImg(prev.img); const prevImg = normImg(prev.img);
let nowImg = normImg(nowRaw.img); let nowImg = normImg(nowRaw.img);
if (!nowImg) nowImg = prevImg; if (!nowImg) nowImg = prevImg;
const nameChanged = String(prev.name || "") !== String(nowRaw.name || ""); const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
const priceChanged = prevPrice !== nowPrice; const priceChanged = prevPrice !== nowPrice;
const skuChanged = prevSku !== nowSku; const skuChanged = prevSku !== nowSku;
const imgChanged = prevImg !== nowImg; const imgChanged = prevImg !== nowImg;
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) { if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false }); merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
} }
if (priceChanged) { if (priceChanged) {
updatedItems.push({ updatedItems.push({
url, url,
name: nowRaw.name || prev.name || "", name: nowRaw.name || prev.name || "",
sku: nowSku || "", sku: nowSku || "",
oldPrice: prev.price || "", oldPrice: prev.price || "",
newPrice: nowRaw.price || "", newPrice: nowRaw.price || "",
}); });
} else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) { } else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
// Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful. // Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful.
metaChangedItems.push({ metaChangedItems.push({
url, url,
name: nowRaw.name || prev.name || "", name: nowRaw.name || prev.name || "",
sku: nowSku || "", sku: nowSku || "",
}); });
} }
} }
for (const [url, prev] of prevDb.byUrl.entries()) { for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue; if (discovered.has(url)) continue;
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes) if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
if (!prev.removed) { if (!prev.removed) {
const removed = { ...prev, removed: true }; const removed = { ...prev, removed: true };
merged.set(url, removed); merged.set(url, removed);
removedItems.push({ removedItems.push({
url, url,
name: prev.name || "", name: prev.name || "",
price: prev.price || "", price: prev.price || "",
sku: normalizeCspc(prev.sku) || "", sku: normalizeCspc(prev.sku) || "",
}); });
} }
} }
return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems }; return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems };
} }
module.exports = { mergeDiscoveredIntoDb }; module.exports = { mergeDiscoveredIntoDb };

View file

@ -8,247 +8,279 @@ const { buildCheapestSkuIndexFromAllDbs } = require("./db");
const { loadSkuMap } = require("../utils/sku_map"); const { loadSkuMap } = require("../utils/sku_map");
function secStr(ms) { function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0; const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10; const tenths = Math.round(s * 10) / 10;
let out; let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`; if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`; else out = `${Math.round(s)}s`;
return out.padStart(7, " "); return out.padStart(7, " ");
} }
function createReport() { function createReport() {
return { return {
startedAt: new Date(), startedAt: new Date(),
categories: [], categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 }, totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 },
newItems: [], newItems: [],
updatedItems: [], updatedItems: [],
removedItems: [], removedItems: [],
restoredItems: [], restoredItems: [],
}; };
} }
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) { function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
const reportCatLabel = `${storeName} | ${catLabel}`; const reportCatLabel = `${storeName} | ${catLabel}`;
for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); for (const it of newItems)
report.newItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
for (const it of restoredItems) for (const it of restoredItems)
report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); report.restoredItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
for (const u of updatedItems) { for (const u of updatedItems) {
report.updatedItems.push({ report.updatedItems.push({
catLabel: reportCatLabel, catLabel: reportCatLabel,
name: u.name, name: u.name,
sku: u.sku || "", sku: u.sku || "",
oldPrice: u.oldPrice, oldPrice: u.oldPrice,
newPrice: u.newPrice, newPrice: u.newPrice,
url: u.url, url: u.url,
}); });
} }
for (const it of removedItems) for (const it of removedItems)
report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); report.removedItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
} }
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) { function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
const paint = (s, code) => color(s, code, colorize); const paint = (s, code) => color(s, code, colorize);
// Load mapping for comparisons only // Load mapping for comparisons only
const skuMap = loadSkuMap({ dbDir }); const skuMap = loadSkuMap({ dbDir });
// Cheapest index is keyed by canonical sku (mapped) // Cheapest index is keyed by canonical sku (mapped)
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap }); const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap });
const endedAt = new Date(); const endedAt = new Date();
const durMs = endedAt - report.startedAt; const durMs = endedAt - report.startedAt;
const storesSet = new Set(report.categories.map((c) => c.store)); const storesSet = new Set(report.categories.map((c) => c.store));
const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0); const totalUnique = report.categories.reduce(
(acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0),
0,
);
let out = ""; let out = "";
const ln = (s = "") => { const ln = (s = "") => {
out += String(s) + "\n"; out += String(s) + "\n";
}; };
ln(""); ln("");
ln(paint("========== REPORT ==========", C.bold)); ln(paint("========== REPORT ==========", C.bold));
ln( ln(
paint("[OK] ", C.green) + paint("[OK] ", C.green) +
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr( `Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
durMs durMs,
)}` )}`,
); );
ln(""); ln("");
ln(paint("Per-category summary:", C.bold)); ln(paint("Per-category summary:", C.bold));
const rows = report.categories.map((c) => ({ const rows = report.categories.map((c) => ({
cat: `${c.store} | ${c.label}`, cat: `${c.store} | ${c.label}`,
pages: c.scannedPages, pages: c.scannedPages,
uniq: c.discoveredUnique, uniq: c.discoveredUnique,
newC: c.newCount, newC: c.newCount,
resC: c.restoredCount, resC: c.restoredCount,
remC: c.removedCount, remC: c.removedCount,
updC: c.updatedCount, updC: c.updatedCount,
ms: c.elapsedMs, ms: c.elapsedMs,
})); }));
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8)); const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`); ln(
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`); `${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`,
for (const r of rows) { );
ln( ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}` for (const r of rows) {
); ln(
} `${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`,
ln(""); );
}
ln("");
const reportLabelW = Math.max( const reportLabelW = Math.max(
16, 16,
...report.newItems.map((x) => x.catLabel.length), ...report.newItems.map((x) => x.catLabel.length),
...report.restoredItems.map((x) => x.catLabel.length), ...report.restoredItems.map((x) => x.catLabel.length),
...report.updatedItems.map((x) => x.catLabel.length), ...report.updatedItems.map((x) => x.catLabel.length),
...report.removedItems.map((x) => x.catLabel.length) ...report.removedItems.map((x) => x.catLabel.length),
); );
function storeFromCatLabel(catLabel) { function storeFromCatLabel(catLabel) {
return String(catLabel || "").split(" | ")[0] || ""; return String(catLabel || "").split(" | ")[0] || "";
} }
function skuInline(sku) { function skuInline(sku) {
const s = normalizeCspc(sku); const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : ""; return s ? paint(` ${s}`, C.gray) : "";
} }
function canonicalKeyForReportItem(catLabel, skuRaw, url) { function canonicalKeyForReportItem(catLabel, skuRaw, url) {
const storeLabel = storeFromCatLabel(catLabel); const storeLabel = storeFromCatLabel(catLabel);
const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url }); const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url });
if (!skuKey) return ""; if (!skuKey) return "";
return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey; return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
} }
function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) { function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url); const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return ""; if (!canon) return "";
const best = cheapestSku.get(canon); const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return ""; if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel); const curStore = storeFromCatLabel(catLabel);
if (!curStore || best.storeLabel === curStore) return ""; if (!curStore || best.storeLabel === curStore) return "";
const curP = priceToNumber(currentPriceStr); const curP = priceToNumber(currentPriceStr);
if (!Number.isFinite(curP)) return ""; if (!Number.isFinite(curP)) return "";
if (best.priceNum >= curP) return ""; if (best.priceNum >= curP) return "";
return paint(` (Cheaper at ${best.storeLabel})`, C.gray); return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
} }
function availableAtInline(catLabel, skuRaw, url) { function availableAtInline(catLabel, skuRaw, url) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url); const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return ""; if (!canon) return "";
const best = cheapestSku.get(canon); const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return ""; if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel); const curStore = storeFromCatLabel(catLabel);
if (curStore && best.storeLabel === curStore) return ""; if (curStore && best.storeLabel === curStore) return "";
return paint(` (Available at ${best.storeLabel})`, C.gray); return paint(` (Available at ${best.storeLabel})`, C.gray);
} }
if (report.newItems.length) { if (report.newItems.length) {
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green)); ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || ""); const sku = String(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || ""); const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
ln(`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`); ln(
ln(` ${paint(it.url, C.dim)}`); `${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
} );
ln(""); ln(` ${paint(it.url, C.dim)}`);
} else { }
ln(paint("NEW LISTINGS (0)", C.bold)); ln("");
ln(""); } else {
} ln(paint("NEW LISTINGS (0)", C.bold));
ln("");
}
if (report.restoredItems.length) { if (report.restoredItems.length) {
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green)); ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of report.restoredItems.sort((a, b) =>
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); (a.catLabel + a.name).localeCompare(b.catLabel + b.name),
const sku = String(it.sku || ""); )) {
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || ""); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`); const sku = String(it.sku || "");
ln(` ${paint(it.url, C.dim)}`); const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
} ln(
ln(""); `${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
} else { );
ln(paint("RESTORED (0)", C.bold)); ln(` ${paint(it.url, C.dim)}`);
ln(""); }
} ln("");
} else {
ln(paint("RESTORED (0)", C.bold));
ln("");
}
if (report.removedItems.length) { if (report.removedItems.length) {
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow)); ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || ""); const sku = String(it.sku || "");
const availTag = availableAtInline(it.catLabel, sku, it.url); const availTag = availableAtInline(it.catLabel, sku, it.url);
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`); ln(
ln(` ${paint(it.url, C.dim)}`); `${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`,
} );
ln(""); ln(` ${paint(it.url, C.dim)}`);
} else { }
ln(paint("REMOVED (0)", C.bold)); ln("");
ln(""); } else {
} ln(paint("REMOVED (0)", C.bold));
ln("");
}
if (report.updatedItems.length) { if (report.updatedItems.length) {
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan)); ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || ""; const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || ""; const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw); const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw); const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray); const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)"; let newP = newRaw ? newRaw : "(no price)";
let offTag = ""; let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) { if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) { if (newN > oldN) {
newP = paint(newP, C.red); // increase newP = paint(newP, C.red); // increase
} else if (newN < oldN) { } else if (newN < oldN) {
newP = paint(newP, C.green); // decrease newP = paint(newP, C.green); // decrease
const pct = salePctOff(oldRaw, newRaw); const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green); if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else { } else {
newP = paint(newP, C.cyan); newP = paint(newP, C.cyan);
} }
} else { } else {
newP = paint(newP, C.cyan); newP = paint(newP, C.cyan);
} }
const sku = String(u.sku || ""); const sku = String(u.sku || "");
const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || ""); const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || "");
ln( ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}` `${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`,
); );
ln(` ${paint(u.url, C.dim)}`); ln(` ${paint(u.url, C.dim)}`);
} }
ln(""); ln("");
} else { } else {
ln(paint("PRICE CHANGES (0)", C.bold)); ln(paint("PRICE CHANGES (0)", C.bold));
ln(""); ln("");
} }
ln(paint("======== END REPORT ========", C.bold)); ln(paint("======== END REPORT ========", C.bold));
return out; return out;
} }
module.exports = { createReport, addCategoryResultToReport, renderFinalReport }; module.exports = { createReport, addCategoryResultToReport, renderFinalReport };

View file

@ -3,96 +3,89 @@
const { createReport } = require("./report"); const { createReport } = require("./report");
const { setTimeout: sleep } = require("timers/promises"); const { setTimeout: sleep } = require("timers/promises");
const { const { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory } = require("./category_scan");
makeCatPrefixers,
buildCategoryContext,
loadCategoryDb,
discoverAndScanCategory,
} = require("./category_scan");
// Some sites will intermittently 403/429. We don't want a single category/store // Some sites will intermittently 403/429. We don't want a single category/store
// to abort the entire run. Log and continue. // to abort the entire run. Log and continue.
function formatErr(e) { function formatErr(e) {
if (!e) return "Unknown error"; if (!e) return "Unknown error";
if (typeof e === "string") return e; if (typeof e === "string") return e;
if (e.stack) return e.stack; if (e.stack) return e.stack;
return String(e); return String(e);
} }
async function runAllStores(stores, { config, logger, http }) { async function runAllStores(stores, { config, logger, http }) {
const report = createReport(); const report = createReport();
const { catPrefixOut } = makeCatPrefixers(stores, logger); const { catPrefixOut } = makeCatPrefixers(stores, logger);
logger.info(`Debug=on`); logger.info(`Debug=on`);
logger.info( logger.info(
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}` `Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`,
); );
logger.info( logger.info(`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`);
`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}` logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
); logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
const workItems = []; const workItems = [];
for (const store of stores) { for (const store of stores) {
for (const cat of store.categories) { for (const cat of store.categories) {
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config); const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
const ctx = { ...baseCtx, config, logger, http }; const ctx = { ...baseCtx, config, logger, http };
const prevDb = loadCategoryDb(logger, ctx); const prevDb = loadCategoryDb(logger, ctx);
workItems.push({ ctx, prevDb }); workItems.push({ ctx, prevDb });
} }
} }
// Host-level serialization: never run two categories from the same host concurrently. // Host-level serialization: never run two categories from the same host concurrently.
const maxWorkers = Math.min(config.categoryConcurrency, workItems.length); const maxWorkers = Math.min(config.categoryConcurrency, workItems.length);
const queue = workItems.slice(); const queue = workItems.slice();
const inflightHosts = new Set(); const inflightHosts = new Set();
async function runOne(w) { async function runOne(w) {
try { try {
await discoverAndScanCategory(w.ctx, w.prevDb, report); await discoverAndScanCategory(w.ctx, w.prevDb, report);
} catch (e) { } catch (e) {
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store"; const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category"; const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
// Keep it loud in logs, but do not fail the entire run. // Keep it loud in logs, but do not fail the entire run.
logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`); logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`);
} }
} }
async function worker() { async function worker() {
while (true) { while (true) {
if (queue.length === 0) return; if (queue.length === 0) return;
// Pick next item whose host isn't currently running. // Pick next item whose host isn't currently running.
const idx = queue.findIndex((w) => { const idx = queue.findIndex((w) => {
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || ""); const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
return host && !inflightHosts.has(host); return host && !inflightHosts.has(host);
}); });
if (idx === -1) { if (idx === -1) {
// Nothing available right now; wait a bit. // Nothing available right now; wait a bit.
await sleep(50); await sleep(50);
continue; continue;
} }
const w = queue.splice(idx, 1)[0]; const w = queue.splice(idx, 1)[0];
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || ""); const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
inflightHosts.add(host); inflightHosts.add(host);
try { try {
await runOne(w); await runOne(w);
} finally { } finally {
inflightHosts.delete(host); inflightHosts.delete(host);
} }
} }
} }
const workers = []; const workers = [];
for (let i = 0; i < maxWorkers; i++) workers.push(worker()); for (let i = 0; i < maxWorkers; i++) workers.push(worker());
await Promise.all(workers); await Promise.all(workers);
return report; return report;
} }
module.exports = { runAllStores }; module.exports = { runAllStores };

View file

@ -1,19 +1,19 @@
"use strict"; "use strict";
const C = { const C = {
reset: "\x1b[0m", reset: "\x1b[0m",
dim: "\x1b[2m", dim: "\x1b[2m",
bold: "\x1b[1m", bold: "\x1b[1m",
red: "\x1b[31m", red: "\x1b[31m",
green: "\x1b[32m", green: "\x1b[32m",
yellow: "\x1b[33m", yellow: "\x1b[33m",
cyan: "\x1b[36m", cyan: "\x1b[36m",
gray: "\x1b[90m", gray: "\x1b[90m",
}; };
function color(s, code, enabled) { function color(s, code, enabled) {
if (!enabled) return String(s); if (!enabled) return String(s);
return String(code || "") + String(s) + C.reset; return String(code || "") + String(s) + C.reset;
} }
module.exports = { C, color }; module.exports = { C, color };

View file

@ -1,86 +1,86 @@
"use strict"; "use strict";
function clampInt(v, def, min, max) { function clampInt(v, def, min, max) {
if (def === null && (v === null || v === undefined)) return null; if (def === null && (v === null || v === undefined)) return null;
const n = Number.parseInt(v ?? "", 10); const n = Number.parseInt(v ?? "", 10);
if (!Number.isFinite(n)) return def; if (!Number.isFinite(n)) return def;
return Math.max(min, Math.min(max, n)); return Math.max(min, Math.min(max, n));
} }
function parseArgs(argv) { function parseArgs(argv) {
let debug = false; let debug = false;
let maxPages = null; let maxPages = null;
let concurrency = null; let concurrency = null;
let staggerMs = null; let staggerMs = null;
let guess = null; let guess = null;
let step = null; let step = null;
let dataDir = null; let dataDir = null;
let reportDir = null; let reportDir = null;
const positional = []; const positional = [];
for (let i = 0; i < argv.length; i++) { for (let i = 0; i < argv.length; i++) {
const a = argv[i]; const a = argv[i];
if (a === "--debug" || a === "-d") { if (a === "--debug" || a === "-d") {
debug = true; debug = true;
continue; continue;
} }
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) { if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
maxPages = clampInt(argv[i + 1], null, 1, 5000); maxPages = clampInt(argv[i + 1], null, 1, 5000);
i++; i++;
continue; continue;
} }
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) { if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
concurrency = clampInt(argv[i + 1], null, 1, 64); concurrency = clampInt(argv[i + 1], null, 1, 64);
i++; i++;
continue; continue;
} }
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) { if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
staggerMs = clampInt(argv[i + 1], null, 0, 5000); staggerMs = clampInt(argv[i + 1], null, 0, 5000);
i++; i++;
continue; continue;
} }
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) { if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
guess = clampInt(argv[i + 1], null, 1, 5000); guess = clampInt(argv[i + 1], null, 1, 5000);
i++; i++;
continue; continue;
} }
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) { if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
step = clampInt(argv[i + 1], null, 1, 500); step = clampInt(argv[i + 1], null, 1, 500);
i++; i++;
continue; continue;
} }
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) { if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
dataDir = String(argv[i + 1]); dataDir = String(argv[i + 1]);
i++; i++;
continue; continue;
} }
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) { if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
reportDir = String(argv[i + 1]); reportDir = String(argv[i + 1]);
i++; i++;
continue; continue;
} }
if (!String(a).startsWith("-")) positional.push(a); if (!String(a).startsWith("-")) positional.push(a);
} }
if (maxPages === null) { if (maxPages === null) {
const cand = positional.find((x) => /^\d+$/.test(String(x))); const cand = positional.find((x) => /^\d+$/.test(String(x)));
if (cand) { if (cand) {
const n = Number.parseInt(cand, 10); const n = Number.parseInt(cand, 10);
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000); if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
} }
} }
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir }; return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
} }
module.exports = { clampInt, parseArgs }; module.exports = { clampInt, parseArgs };

View file

@ -3,24 +3,24 @@
const { setTimeout: sleep } = require("timers/promises"); const { setTimeout: sleep } = require("timers/promises");
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) { async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
const out = new Array(arr.length); const out = new Array(arr.length);
let next = 0; let next = 0;
async function worker(workerId) { async function worker(workerId) {
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1)); if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
while (true) { while (true) {
const i = next++; const i = next++;
if (i >= arr.length) return; if (i >= arr.length) return;
if (staggerMs > 0 && i > 0) await sleep(staggerMs); if (staggerMs > 0 && i > 0) await sleep(staggerMs);
out[i] = await fn(arr[i], i); out[i] = await fn(arr[i], i);
} }
} }
const w = Math.min(concurrency, arr.length); const w = Math.min(concurrency, arr.length);
const workers = []; const workers = [];
for (let i = 0; i < w; i++) workers.push(worker(i + 1)); for (let i = 0; i < w; i++) workers.push(worker(i + 1));
await Promise.all(workers); await Promise.all(workers);
return out; return out;
} }
module.exports = { parallelMapStaggered }; module.exports = { parallelMapStaggered };

View file

@ -1,12 +1,12 @@
"use strict"; "use strict";
function humanBytes(n) { function humanBytes(n) {
if (!Number.isFinite(n) || n <= 0) return "0B"; if (!Number.isFinite(n) || n <= 0) return "0B";
if (n < 1024) return `${n}B`; if (n < 1024) return `${n}B`;
const kb = n / 1024; const kb = n / 1024;
if (kb < 1024) return `${kb.toFixed(1)}KB`; if (kb < 1024) return `${kb.toFixed(1)}KB`;
const mb = kb / 1024; const mb = kb / 1024;
return `${mb.toFixed(1)}MB`; return `${mb.toFixed(1)}MB`;
} }
module.exports = { humanBytes }; module.exports = { humanBytes };

View file

@ -1,141 +1,137 @@
"use strict"; "use strict";
function stripTags(s) { function stripTags(s) {
return String(s).replace(/<[^>]*>/g, ""); return String(s).replace(/<[^>]*>/g, "");
} }
function cleanText(s) { function cleanText(s) {
return String(s) return String(s)
.replace(/<[^>]+>/g, " ") .replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ") .replace(/\s+/g, " ")
.trim(); .trim();
} }
function decodeHtml(s) { function decodeHtml(s) {
return String(s) return String(s)
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10))) .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16))) .replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
.replace(/&amp;/g, "&") .replace(/&amp;/g, "&")
.replace(/&quot;/g, '"') .replace(/&quot;/g, '"')
.replace(/&apos;/g, "'") .replace(/&apos;/g, "'")
.replace(/&#039;/g, "'") .replace(/&#039;/g, "'")
.replace(/&lt;/g, "<") .replace(/&lt;/g, "<")
.replace(/&gt;/g, ">") .replace(/&gt;/g, ">")
.replace(/&nbsp;/g, " ") .replace(/&nbsp;/g, " ")
.replace(/&laquo;/g, "«") .replace(/&laquo;/g, "«")
.replace(/&raquo;/g, "»"); .replace(/&raquo;/g, "»");
} }
function escapeRe(s) { function escapeRe(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
} }
function extractHtmlAttr(html, attrName) { function extractHtmlAttr(html, attrName) {
const re = new RegExp( const re = new RegExp(`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`, "i");
`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`, const m = re.exec(html);
"i" if (!m) return "";
); return m[1] ?? m[2] ?? m[3] ?? "";
const m = re.exec(html);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
} }
function pickFirstUrlFromSrcset(srcset) { function pickFirstUrlFromSrcset(srcset) {
const s = String(srcset || "").trim(); const s = String(srcset || "").trim();
if (!s) return ""; if (!s) return "";
const first = (s.split(",")[0] || "").trim(); const first = (s.split(",")[0] || "").trim();
const url = (first.split(/\s+/)[0] || "").trim(); const url = (first.split(/\s+/)[0] || "").trim();
return url.replace(/^["']|["']$/g, ""); return url.replace(/^["']|["']$/g, "");
} }
function normalizeMaybeRelativeUrl(raw, baseUrl) { function normalizeMaybeRelativeUrl(raw, baseUrl) {
const r = String(raw || "").trim(); const r = String(raw || "").trim();
if (!r) return ""; if (!r) return "";
let u = r; let u = r;
if (u.startsWith("//")) u = `https:${u}`; if (u.startsWith("//")) u = `https:${u}`;
try { try {
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString(); return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
} catch { } catch {
return u; return u;
} }
} }
function resolveShopifyWidthPlaceholder(url, tag) { function resolveShopifyWidthPlaceholder(url, tag) {
const s = String(url || ""); const s = String(url || "");
if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s; if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s;
// Pick a reasonable width from data-widths if available // Pick a reasonable width from data-widths if available
let w = 400; let w = 400;
const dw = extractHtmlAttr(tag, "data-widths"); const dw = extractHtmlAttr(tag, "data-widths");
if (dw) { if (dw) {
try { try {
const arr = JSON.parse(dw); const arr = JSON.parse(dw);
if (Array.isArray(arr) && arr.length) { if (Array.isArray(arr) && arr.length) {
if (arr.includes(400)) w = 400; if (arr.includes(400)) w = 400;
else if (arr.includes(360)) w = 360; else if (arr.includes(360)) w = 360;
else w = arr[0]; else w = arr[0];
} }
} catch {} } catch {}
} }
return s return s
.replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`) .replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`)
.replace(/_\{width\}(x)/gi, `_${w}$1`) .replace(/_\{width\}(x)/gi, `_${w}$1`)
.replace(/%7Bwidth%7D/gi, String(w)) .replace(/%7Bwidth%7D/gi, String(w))
.replace(/\{width\}/gi, String(w)); .replace(/\{width\}/gi, String(w));
} }
function extractFirstImgUrl(html, baseUrl) { function extractFirstImgUrl(html, baseUrl) {
const s = String(html || ""); const s = String(html || "");
const m = s.match(/<img\b[^>]*>/i); const m = s.match(/<img\b[^>]*>/i);
if (!m) return ""; if (!m) return "";
const tag = m[0]; const tag = m[0];
const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"]; const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"];
for (const a of attrs) { for (const a of attrs) {
let v = extractHtmlAttr(tag, a); let v = extractHtmlAttr(tag, a);
if (!v) continue; if (!v) continue;
v = decodeHtml(String(v)).trim(); v = decodeHtml(String(v)).trim();
if (!v) continue; if (!v) continue;
const isSrcset = a.toLowerCase().includes("srcset"); const isSrcset = a.toLowerCase().includes("srcset");
if (isSrcset) v = pickFirstUrlFromSrcset(v); if (isSrcset) v = pickFirstUrlFromSrcset(v);
v = String(v || "").trim(); v = String(v || "").trim();
if (!v) continue; if (!v) continue;
if (/^data:/i.test(v)) continue; if (/^data:/i.test(v)) continue;
// If this attr is a template URL, prefer trying srcset next // If this attr is a template URL, prefer trying srcset next
if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue; if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue;
let abs = normalizeMaybeRelativeUrl(v, baseUrl); let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag); abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs; if (abs) return abs;
} }
// Fallback: accept template URLs but force a width // Fallback: accept template URLs but force a width
for (const a of ["data-src", "src"]) { for (const a of ["data-src", "src"]) {
let v = extractHtmlAttr(tag, a); let v = extractHtmlAttr(tag, a);
if (!v) continue; if (!v) continue;
v = decodeHtml(String(v)).trim(); v = decodeHtml(String(v)).trim();
if (!v || /^data:/i.test(v)) continue; if (!v || /^data:/i.test(v)) continue;
let abs = normalizeMaybeRelativeUrl(v, baseUrl); let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag); abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs; if (abs) return abs;
} }
return ""; return "";
} }
module.exports = { module.exports = {
stripTags, stripTags,
cleanText, cleanText,
decodeHtml, decodeHtml,
escapeRe, escapeRe,
extractHtmlAttr, extractHtmlAttr,
extractFirstImgUrl, extractFirstImgUrl,
}; };

View file

@ -1,21 +1,23 @@
"use strict"; "use strict";
function normPrice(p) { function normPrice(p) {
return String(p || "").trim().replace(/\s+/g, ""); return String(p || "")
.trim()
.replace(/\s+/g, "");
} }
function priceToNumber(p) { function priceToNumber(p) {
const s = String(p || ""); const s = String(p || "");
const n = Number(s.replace(/[^0-9.]/g, "")); const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN; return Number.isFinite(n) ? n : NaN;
} }
function salePctOff(oldPriceStr, newPriceStr) { function salePctOff(oldPriceStr, newPriceStr) {
const oldN = priceToNumber(oldPriceStr); const oldN = priceToNumber(oldPriceStr);
const newN = priceToNumber(newPriceStr); const newN = priceToNumber(newPriceStr);
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null; if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
if (newN >= oldN) return null; if (newN >= oldN) return null;
return Math.round(((oldN - newN) / oldN) * 100); return Math.round(((oldN - newN) / oldN) * 100);
} }
module.exports = { normPrice, priceToNumber, salePctOff }; module.exports = { normPrice, priceToNumber, salePctOff };

View file

@ -2,72 +2,71 @@
"use strict"; "use strict";
function fnv1a32(str) { function fnv1a32(str) {
let h = 0x811c9dc5; let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i); h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193); h = Math.imul(h, 0x01000193);
} }
return (h >>> 0).toString(16).padStart(8, "0"); return (h >>> 0).toString(16).padStart(8, "0");
} }
function idToCspc6(idDigits) { function idToCspc6(idDigits) {
const s = String(idDigits || "").trim(); const s = String(idDigits || "").trim();
if (!/^\d{1,6}$/.test(s)) return ""; if (!/^\d{1,6}$/.test(s)) return "";
return s.padStart(6, "0"); return s.padStart(6, "0");
} }
function normalizeCspc(v) { function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/); const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function normalizeUpcDigits(v) { function normalizeUpcDigits(v) {
const m = String(v ?? "").match(/\b(\d{12,14})\b/); const m = String(v ?? "").match(/\b(\d{12,14})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
// CHANGE: allow 1-11 digits so BCL 3-digit ids like id:141 are preserved // CHANGE: allow 1-11 digits so BCL 3-digit ids like id:141 are preserved
function normalizeIdDigits(v) { function normalizeIdDigits(v) {
const m = String(v ?? "").match(/\b(\d{1,11})\b/); const m = String(v ?? "").match(/\b(\d{1,11})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization) // IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
function makeSyntheticSkuKey({ storeLabel, url }) { function makeSyntheticSkuKey({ storeLabel, url }) {
const store = String(storeLabel || "store"); const store = String(storeLabel || "store");
const u = String(url || ""); const u = String(url || "");
if (!u) return ""; if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`; return `u:${fnv1a32(`${store}|${u}`)}`;
} }
/* ---------------- NEW: SKU quality helpers ---------------- */ /* ---------------- NEW: SKU quality helpers ---------------- */
function skuQuality(v) { function skuQuality(v) {
const s = String(v ?? "").trim(); const s = String(v ?? "").trim();
if (!s) return 0; // missing if (!s) return 0; // missing
if (/^u:/i.test(s)) return 0; // synthetic if (/^u:/i.test(s)) return 0; // synthetic
if (normalizeCspc(s)) return 3; // best (6-digit CSPC) if (normalizeCspc(s)) return 3; // best (6-digit CSPC)
if (/^upc:/i.test(s)) return 2; if (/^upc:/i.test(s)) return 2;
if (/^id:/i.test(s)) return 2; if (/^id:/i.test(s)) return 2;
return 1; // explicit non-synthetic string return 1; // explicit non-synthetic string
} }
// Prefer higher quality; on ties keep existing (stable) value // Prefer higher quality; on ties keep existing (stable) value
function pickBetterSku(newSku, oldSku) { function pickBetterSku(newSku, oldSku) {
const a = String(newSku ?? "").trim(); const a = String(newSku ?? "").trim();
const b = String(oldSku ?? "").trim(); const b = String(oldSku ?? "").trim();
const qa = skuQuality(a); const qa = skuQuality(a);
const qb = skuQuality(b); const qb = skuQuality(b);
if (qa > qb) return a; if (qa > qb) return a;
if (qb > qa) return b; if (qb > qa) return b;
return b || a; return b || a;
} }
// Only fetch product pages when missing/synthetic // Only fetch product pages when missing/synthetic
function needsSkuDetail(sku) { function needsSkuDetail(sku) {
const s = String(sku ?? "").trim(); const s = String(sku ?? "").trim();
return !s || /^u:/i.test(s); return !s || /^u:/i.test(s);
} }
/** /**
@ -79,34 +78,34 @@ function needsSkuDetail(sku) {
* - else => u:<fnv(store|url)> (old recipe) * - else => u:<fnv(store|url)> (old recipe)
*/ */
function normalizeSkuKey(v, { storeLabel, url } = {}) { function normalizeSkuKey(v, { storeLabel, url } = {}) {
const raw = String(v ?? "").trim(); const raw = String(v ?? "").trim();
const cspc = normalizeCspc(raw); const cspc = normalizeCspc(raw);
if (cspc) return cspc; if (cspc) return cspc;
// NEW: only if explicitly tagged, so legacy behavior doesn't change // NEW: only if explicitly tagged, so legacy behavior doesn't change
if (/^upc:/i.test(raw)) { if (/^upc:/i.test(raw)) {
const upc = normalizeUpcDigits(raw); const upc = normalizeUpcDigits(raw);
return upc ? `upc:${upc}` : ""; return upc ? `upc:${upc}` : "";
} }
if (/^id:/i.test(raw)) { if (/^id:/i.test(raw)) {
const id = normalizeIdDigits(raw); const id = normalizeIdDigits(raw);
if (!id) return ""; if (!id) return "";
const cspc = idToCspc6(id); const cspc = idToCspc6(id);
return cspc ? cspc : `id:${id}`; return cspc ? cspc : `id:${id}`;
} }
if (raw.startsWith("u:")) return raw; if (raw.startsWith("u:")) return raw;
const syn = makeSyntheticSkuKey({ storeLabel, url }); const syn = makeSyntheticSkuKey({ storeLabel, url });
return syn || ""; return syn || "";
} }
module.exports = { module.exports = {
normalizeCspc, normalizeCspc,
normalizeSkuKey, normalizeSkuKey,
makeSyntheticSkuKey, makeSyntheticSkuKey,
skuQuality, skuQuality,
pickBetterSku, pickBetterSku,
needsSkuDetail, needsSkuDetail,
}; };

View file

@ -6,203 +6,200 @@ const path = require("path");
/* ---------------- Union-Find (undirected grouping) ---------------- */ /* ---------------- Union-Find (undirected grouping) ---------------- */
class DSU { class DSU {
constructor() { constructor() {
this.parent = new Map(); this.parent = new Map();
this.rank = new Map(); this.rank = new Map();
} }
_add(x) { _add(x) {
if (!this.parent.has(x)) { if (!this.parent.has(x)) {
this.parent.set(x, x); this.parent.set(x, x);
this.rank.set(x, 0); this.rank.set(x, 0);
} }
} }
find(x) { find(x) {
x = String(x || "").trim(); x = String(x || "").trim();
if (!x) return ""; if (!x) return "";
this._add(x); this._add(x);
let p = this.parent.get(x); let p = this.parent.get(x);
if (p !== x) { if (p !== x) {
p = this.find(p); p = this.find(p);
this.parent.set(x, p); this.parent.set(x, p);
} }
return p; return p;
} }
union(a, b) { union(a, b) {
a = String(a || "").trim(); a = String(a || "").trim();
b = String(b || "").trim(); b = String(b || "").trim();
if (!a || !b || a === b) return; if (!a || !b || a === b) return;
const ra = this.find(a); const ra = this.find(a);
const rb = this.find(b); const rb = this.find(b);
if (!ra || !rb || ra === rb) return; if (!ra || !rb || ra === rb) return;
const rka = this.rank.get(ra) || 0; const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0; const rkb = this.rank.get(rb) || 0;
if (rka < rkb) this.parent.set(ra, rb); if (rka < rkb) this.parent.set(ra, rb);
else if (rkb < rka) this.parent.set(rb, ra); else if (rkb < rka) this.parent.set(rb, ra);
else { else {
this.parent.set(rb, ra); this.parent.set(rb, ra);
this.rank.set(ra, rka + 1); this.rank.set(ra, rka + 1);
} }
} }
} }
function isUnknownSkuKey(k) { function isUnknownSkuKey(k) {
return String(k || "").startsWith("u:"); return String(k || "").startsWith("u:");
} }
function isNumericSku(k) { function isNumericSku(k) {
return /^\d+$/.test(String(k || "").trim()); return /^\d+$/.test(String(k || "").trim());
} }
function isUpcSku(k) { function isUpcSku(k) {
const s = String(k || "").trim(); const s = String(k || "").trim();
if (s.startsWith("upc:")) return true; if (s.startsWith("upc:")) return true;
return /^\d{12,14}$/.test(s); // keep legacy support return /^\d{12,14}$/.test(s); // keep legacy support
} }
function compareSku(a, b) { function compareSku(a, b) {
a = String(a || "").trim(); a = String(a || "").trim();
b = String(b || "").trim(); b = String(b || "").trim();
if (a === b) return 0; if (a === b) return 0;
const au = isUnknownSkuKey(a); const au = isUnknownSkuKey(a);
const bu = isUnknownSkuKey(b); const bu = isUnknownSkuKey(b);
if (au !== bu) return au ? 1 : -1; // real first if (au !== bu) return au ? 1 : -1; // real first
const aUpc = isUpcSku(a);
const bUpc = isUpcSku(b);
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys
const aUpc = isUpcSku(a); const an = isNumericSku(a);
const bUpc = isUpcSku(b); const bn = isNumericSku(b);
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys if (an && bn) {
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
return a < b ? -1 : 1;
const an = isNumericSku(a);
const bn = isNumericSku(b);
if (an && bn) {
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
return a < b ? -1 : 1;
} }
/* ---------------- File discovery ---------------- */ /* ---------------- File discovery ---------------- */
function tryReadJson(file) { function tryReadJson(file) {
try { try {
const txt = fs.readFileSync(file, "utf8"); const txt = fs.readFileSync(file, "utf8");
return JSON.parse(txt); return JSON.parse(txt);
} catch { } catch {
return null; return null;
} }
} }
function defaultSkuLinksCandidates(dbDir) { function defaultSkuLinksCandidates(dbDir) {
const out = []; const out = [];
// 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db) // 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db)
if (dbDir) { if (dbDir) {
out.push(path.join(dbDir, "..", "sku_links.json")); out.push(path.join(dbDir, "..", "sku_links.json"));
} }
// 2) repo root conventional location // 2) repo root conventional location
out.push(path.join(process.cwd(), "data", "sku_links.json")); out.push(path.join(process.cwd(), "data", "sku_links.json"));
// 3) common worktree location // 3) common worktree location
out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json")); out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json"));
return out; return out;
} }
function findSkuLinksFile({ dbDir, mappingFile } = {}) { function findSkuLinksFile({ dbDir, mappingFile } = {}) {
// env override // env override
const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim(); const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim();
if (env) return env; if (env) return env;
if (mappingFile) return mappingFile; if (mappingFile) return mappingFile;
for (const f of defaultSkuLinksCandidates(dbDir)) { for (const f of defaultSkuLinksCandidates(dbDir)) {
if (!f) continue; if (!f) continue;
try { try {
if (fs.existsSync(f)) return f; if (fs.existsSync(f)) return f;
} catch { } catch {
// ignore // ignore
} }
} }
return ""; return "";
} }
function normalizeImplicitSkuKey(k) { function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim(); const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i); const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0"); if (m) return String(m[1]).padStart(6, "0");
return s; return s;
} }
/* ---------------- Public API ---------------- */ /* ---------------- Public API ---------------- */
function buildSkuMapFromLinksArray(links) { function buildSkuMapFromLinksArray(links) {
const dsu = new DSU(); const dsu = new DSU();
const all = new Set(); const all = new Set();
for (const x of Array.isArray(links) ? links : []) { for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku); const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku); const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue; if (!a || !b) continue;
all.add(a); all.add(a);
all.add(b); all.add(b);
// undirected union => hardened vs A->B->C and cycles // undirected union => hardened vs A->B->C and cycles
dsu.union(a, b); dsu.union(a, b);
} }
// root -> Set(members) // root -> Set(members)
const byRoot = new Map(); const byRoot = new Map();
for (const s of all) { for (const s of all) {
const r = dsu.find(s); const r = dsu.find(s);
if (!r) continue; if (!r) continue;
let set = byRoot.get(r); let set = byRoot.get(r);
if (!set) byRoot.set(r, (set = new Set())); if (!set) byRoot.set(r, (set = new Set()));
set.add(s); set.add(s);
} }
// root -> canonical rep // root -> canonical rep
const repByRoot = new Map(); const repByRoot = new Map();
for (const [root, members] of byRoot.entries()) { for (const [root, members] of byRoot.entries()) {
const arr = Array.from(members); const arr = Array.from(members);
arr.sort(compareSku); arr.sort(compareSku);
repByRoot.set(root, arr[0] || root); repByRoot.set(root, arr[0] || root);
} }
// sku -> canonical rep // sku -> canonical rep
const canonBySku = new Map(); const canonBySku = new Map();
for (const [root, members] of byRoot.entries()) { for (const [root, members] of byRoot.entries()) {
const rep = repByRoot.get(root) || root; const rep = repByRoot.get(root) || root;
for (const s of members) canonBySku.set(s, rep); for (const s of members) canonBySku.set(s, rep);
} }
function canonicalSku(sku) { function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku); const s = normalizeImplicitSkuKey(sku);
if (!s) return s; if (!s) return s;
return canonBySku.get(s) || s; return canonBySku.get(s) || s;
} }
return { canonicalSku, _canonBySku: canonBySku }; return { canonicalSku, _canonBySku: canonBySku };
} }
function loadSkuMap({ dbDir, mappingFile } = {}) { function loadSkuMap({ dbDir, mappingFile } = {}) {
const file = findSkuLinksFile({ dbDir, mappingFile }); const file = findSkuLinksFile({ dbDir, mappingFile });
if (!file) { if (!file) {
return buildSkuMapFromLinksArray([]); return buildSkuMapFromLinksArray([]);
} }
const obj = tryReadJson(file); const obj = tryReadJson(file);
const links = Array.isArray(obj?.links) ? obj.links : []; const links = Array.isArray(obj?.links) ? obj.links : [];
return buildSkuMapFromLinksArray(links); return buildSkuMapFromLinksArray(links);
} }
module.exports = { loadSkuMap }; module.exports = { loadSkuMap };

View file

@ -1,29 +1,29 @@
"use strict"; "use strict";
function padRight(s, n) { function padRight(s, n) {
s = String(s); s = String(s);
return s.length >= n ? s : s + " ".repeat(n - s.length); return s.length >= n ? s : s + " ".repeat(n - s.length);
} }
function padLeft(s, n) { function padLeft(s, n) {
s = String(s); s = String(s);
return s.length >= n ? s : " ".repeat(n - s.length) + s; return s.length >= n ? s : " ".repeat(n - s.length) + s;
} }
function stripAnsi(s) { function stripAnsi(s) {
return String(s).replace(/\x1b\[[0-9;]*m/g, ""); return String(s).replace(/\x1b\[[0-9;]*m/g, "");
} }
function padRightV(s, n) { function padRightV(s, n) {
s = String(s); s = String(s);
const w = stripAnsi(s).length; const w = stripAnsi(s).length;
return w >= n ? s : s + " ".repeat(n - w); return w >= n ? s : s + " ".repeat(n - w);
} }
function padLeftV(s, n) { function padLeftV(s, n) {
s = String(s); s = String(s);
const w = stripAnsi(s).length; const w = stripAnsi(s).length;
return w >= n ? s : " ".repeat(n - w) + s; return w >= n ? s : " ".repeat(n - w) + s;
} }
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV }; module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };

View file

@ -3,11 +3,11 @@
const { cleanText, decodeHtml } = require("./html"); const { cleanText, decodeHtml } = require("./html");
function sanitizeName(s) { function sanitizeName(s) {
return cleanText(decodeHtml(String(s || ""))) return cleanText(decodeHtml(String(s || "")))
.replace(/['"’“”`´]/g, "") .replace(/['"’“”`´]/g, "")
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "") .replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
.replace(/\s+/g, " ") .replace(/\s+/g, " ")
.trim(); .trim();
} }
module.exports = { sanitizeName }; module.exports = { sanitizeName };

View file

@ -1,16 +1,19 @@
"use strict"; "use strict";
function ts(d = new Date()) { function ts(d = new Date()) {
const h = String(d.getHours()).padStart(2, "0"); const h = String(d.getHours()).padStart(2, "0");
const m = String(d.getMinutes()).padStart(2, "0"); const m = String(d.getMinutes()).padStart(2, "0");
const s = String(d.getSeconds()).padStart(2, "0"); const s = String(d.getSeconds()).padStart(2, "0");
const ms = String(d.getMilliseconds()).padStart(3, "0"); const ms = String(d.getMilliseconds()).padStart(3, "0");
return `${h}:${m}:${s}.${ms}`; return `${h}:${m}:${s}.${ms}`;
} }
function isoTimestampFileSafe(d = new Date()) { function isoTimestampFileSafe(d = new Date()) {
// 2026-01-16T21-27-01Z // 2026-01-16T21-27-01Z
return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z"); return d
.toISOString()
.replace(/:/g, "-")
.replace(/\.\d{3}Z$/, "Z");
} }
module.exports = { ts, isoTimestampFileSafe }; module.exports = { ts, isoTimestampFileSafe };

View file

@ -1,50 +1,56 @@
"use strict"; "use strict";
function normalizeBaseUrl(startUrl) { function normalizeBaseUrl(startUrl) {
try { try {
const u = new URL(startUrl); const u = new URL(startUrl);
u.hash = ""; u.hash = "";
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page"); if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
if (!u.pathname.endsWith("/")) u.pathname += "/"; if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/"); u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
return u.toString(); return u.toString();
} catch { } catch {
return startUrl; return startUrl;
} }
} }
function makePageUrl(baseUrl, pageNum) { function makePageUrl(baseUrl, pageNum) {
if (pageNum <= 1) return normalizeBaseUrl(baseUrl); if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
const u = new URL(baseUrl); const u = new URL(baseUrl);
if (!u.pathname.endsWith("/")) u.pathname += "/"; if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/"); u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
u.pathname = u.pathname + `page/${pageNum}/`; u.pathname = u.pathname + `page/${pageNum}/`;
u.hash = ""; u.hash = "";
return u.toString(); return u.toString();
} }
function makePageUrlForCtx(ctx, baseUrl, pageNum) { function makePageUrlForCtx(ctx, baseUrl, pageNum) {
const fn = ctx?.store?.makePageUrl; const fn = ctx?.store?.makePageUrl;
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum); return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
} }
function makePageUrlQueryParam(baseUrl, paramName, pageNum) { function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl)); const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = ""; u.hash = "";
if (pageNum <= 1) u.searchParams.set(paramName, "1"); if (pageNum <= 1) u.searchParams.set(paramName, "1");
else u.searchParams.set(paramName, String(pageNum)); else u.searchParams.set(paramName, String(pageNum));
u.search = `?${u.searchParams.toString()}`; u.search = `?${u.searchParams.toString()}`;
return u.toString(); return u.toString();
} }
function makePageUrlShopifyQueryPage(baseUrl, pageNum) { function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl)); const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = ""; u.hash = "";
u.searchParams.set("page", String(Math.max(1, pageNum))); u.searchParams.set("page", String(Math.max(1, pageNum)));
u.search = `?${u.searchParams.toString()}`; u.search = `?${u.searchParams.toString()}`;
return u.toString(); return u.toString();
} }
module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage }; module.exports = {
normalizeBaseUrl,
makePageUrl,
makePageUrlForCtx,
makePageUrlQueryParam,
makePageUrlShopifyQueryPage,
};

View file

@ -8,50 +8,50 @@ const { decodeHtml, stripTags, cleanText } = require("./html");
* - Else uses the normal price bdi/span content. * - Else uses the normal price bdi/span content.
*/ */
function extractPriceFromTmbBlock(block) { function extractPriceFromTmbBlock(block) {
const span = matchFirstPriceSpan(block); const span = matchFirstPriceSpan(block);
if (!span) return ""; if (!span) return "";
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)]; const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span; const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)]; const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
if (bdis.length) { if (bdis.length) {
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, ""); const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
if (raw) return raw.startsWith("$") ? raw : `$${raw}`; if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
} }
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i); const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
const text = cleanText(decodeHtml(stripTags(scope))); const text = cleanText(decodeHtml(stripTags(scope)));
const num = text.match(/(\d+(?:\.\d{2})?)/); const num = text.match(/(\d+(?:\.\d{2})?)/);
if (sym && num) return `${sym[1].trim()}${num[1]}`; if (sym && num) return `${sym[1].trim()}${num[1]}`;
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/); const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
return m ? m[0].replace(/\s+/g, "") : ""; return m ? m[0].replace(/\s+/g, "") : "";
} }
function matchFirstPriceSpan(html) { function matchFirstPriceSpan(html) {
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i; const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
const m = re.exec(html); const m = re.exec(html);
if (!m) return ""; if (!m) return "";
const start = m.index + m[0].length; const start = m.index + m[0].length;
let i = start; let i = start;
let depth = 1; let depth = 1;
while (i < html.length) { while (i < html.length) {
const nextOpen = html.indexOf("<span", i); const nextOpen = html.indexOf("<span", i);
const nextClose = html.indexOf("</span>", i); const nextClose = html.indexOf("</span>", i);
if (nextClose === -1) break; if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) { if (nextOpen !== -1 && nextOpen < nextClose) {
depth++; depth++;
i = nextOpen + 5; i = nextOpen + 5;
continue; continue;
} }
depth--; depth--;
if (depth === 0) return html.slice(start, nextClose); if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 7; i = nextClose + 7;
} }
return ""; return "";
} }
module.exports = { extractPriceFromTmbBlock }; module.exports = { extractPriceFromTmbBlock };

View file

@ -22,320 +22,310 @@ const path = require("path");
/* ---------------- helpers ---------------- */ /* ---------------- helpers ---------------- */
function ensureDir(dir) { function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true }); fs.mkdirSync(dir, { recursive: true });
} }
function readJson(p) { function readJson(p) {
try { try {
return JSON.parse(fs.readFileSync(p, "utf8")); return JSON.parse(fs.readFileSync(p, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function listDbFiles() { function listDbFiles() {
const dir = path.join(process.cwd(), "data", "db"); const dir = path.join(process.cwd(), "data", "db");
try { try {
return fs return fs
.readdirSync(dir, { withFileTypes: true }) .readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json")) .filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dir, e.name)); .map((e) => path.join(dir, e.name));
} catch { } catch {
return []; return [];
} }
} }
function priceToNumber(v) { function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, ""); const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s); const n = Number(s);
return Number.isFinite(n) ? n : null; return Number.isFinite(n) ? n : null;
} }
function hasRealSku6(s) { function hasRealSku6(s) {
return /\b\d{6}\b/.test(String(s || "")); return /\b\d{6}\b/.test(String(s || ""));
} }
function isSyntheticSkuKey(k) { function isSyntheticSkuKey(k) {
return String(k || "").startsWith("u:"); return String(k || "").startsWith("u:");
} }
function storeKeyFromDbPath(abs) { function storeKeyFromDbPath(abs) {
const base = path.basename(abs); const base = path.basename(abs);
const m = base.match(/^([^_]+)__.+\.json$/i); const m = base.match(/^([^_]+)__.+\.json$/i);
const k = m ? m[1] : base.replace(/\.json$/i, ""); const k = m ? m[1] : base.replace(/\.json$/i, "");
return String(k || "").toLowerCase(); return String(k || "").toLowerCase();
} }
/* ---------------- sku helpers ---------------- */ /* ---------------- sku helpers ---------------- */
function loadSkuMapOrNull() { function loadSkuMapOrNull() {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map")); const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") }); return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
} catch { } catch {
return null; return null;
} }
} }
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) { function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku")); const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url }); const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : ""; return k ? String(k) : "";
} catch { } catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/); const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1]; if (m) return m[1];
if (url) return `u:${storeLabel}:${url}`; if (url) return `u:${storeLabel}:${url}`;
return ""; return "";
} }
} }
function canonicalize(k, skuMap) { function canonicalize(k, skuMap) {
if (!k) return ""; if (!k) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") { if (skuMap && typeof skuMap.canonicalSku === "function") {
return String(skuMap.canonicalSku(k) || k); return String(skuMap.canonicalSku(k) || k);
} }
return k; return k;
} }
/* ---------------- grouping ---------------- */ /* ---------------- grouping ---------------- */
const BC_STORE_KEYS = new Set([ const BC_STORE_KEYS = new Set(["gull", "strath", "bcl", "legacy", "legacyliquor", "tudor", "vessel", "vintage", "arc"]);
"gull",
"strath",
"bcl",
"legacy",
"legacyliquor",
"tudor",
"vessel",
"vintage",
"arc"
]);
function groupAllowsStore(group, storeKey) { function groupAllowsStore(group, storeKey) {
const k = String(storeKey || "").toLowerCase(); const k = String(storeKey || "").toLowerCase();
if (group === "bc") return BC_STORE_KEYS.has(k); if (group === "bc") return BC_STORE_KEYS.has(k);
if (group === "ab") return !BC_STORE_KEYS.has(k); if (group === "ab") return !BC_STORE_KEYS.has(k);
return true; // all return true; // all
} }
/* ---------------- args ---------------- */ /* ---------------- args ---------------- */
function parseArgs(argv) { function parseArgs(argv) {
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" }; const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
for (let i = 0; i < argv.length; i++) { for (let i = 0; i < argv.length; i++) {
const a = argv[i]; const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50; if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2; else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--require-all") out.requireAll = true; else if (a === "--require-all") out.requireAll = true;
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase(); else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || ""); else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
} }
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all"; if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
return out; return out;
} }
/* ---------------- main ---------------- */ /* ---------------- main ---------------- */
function main() { function main() {
const args = parseArgs(process.argv.slice(2)); const args = parseArgs(process.argv.slice(2));
const repoRoot = process.cwd(); const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports"); const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir); ensureDir(reportsDir);
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json"); const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
ensureDir(path.dirname(outPath)); ensureDir(path.dirname(outPath));
const dbFiles = listDbFiles(); const dbFiles = listDbFiles();
if (!dbFiles.length) { if (!dbFiles.length) {
console.error("No DB files found"); console.error("No DB files found");
process.exitCode = 2; process.exitCode = 2;
return; return;
} }
const skuMap = loadSkuMapOrNull(); const skuMap = loadSkuMapOrNull();
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`); console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files`); console.log(`[debug] scanning ${dbFiles.length} db files`);
const storeToCanon = new Map(); // storeKey -> Set(canonSku) const storeToCanon = new Map(); // storeKey -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map } const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
let liveRows = 0; let liveRows = 0;
let removedRows = 0; let removedRows = 0;
for (const abs of dbFiles.sort()) { for (const abs of dbFiles.sort()) {
const obj = readJson(abs); const obj = readJson(abs);
if (!obj) continue; if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || "").trim(); const storeLabel = String(obj.storeLabel || obj.store || "").trim();
if (!storeLabel) continue; if (!storeLabel) continue;
const storeKey = storeKeyFromDbPath(abs); const storeKey = storeKeyFromDbPath(abs);
if (!groupAllowsStore(args.group, storeKey)) continue; if (!groupAllowsStore(args.group, storeKey)) continue;
if (!storeToCanon.has(storeKey)) { if (!storeToCanon.has(storeKey)) {
storeToCanon.set(storeKey, new Set()); storeToCanon.set(storeKey, new Set());
} }
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/"); const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : []; const items = Array.isArray(obj.items) ? obj.items : [];
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`); console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
for (const it of items) { for (const it of items) {
if (!it) continue; if (!it) continue;
if (it.removed) { if (it.removed) {
removedRows++; removedRows++;
continue; continue;
} }
liveRows++; liveRows++;
const skuKey = normalizeSkuKeyOrEmpty({ const skuKey = normalizeSkuKeyOrEmpty({
skuRaw: it.sku, skuRaw: it.sku,
storeLabel, storeLabel,
url: it.url, url: it.url,
}); });
if (!skuKey) continue; if (!skuKey) continue;
const canonSku = canonicalize(skuKey, skuMap); const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) continue; if (!canonSku) continue;
storeToCanon.get(storeKey).add(canonSku); storeToCanon.get(storeKey).add(canonSku);
let agg = canonAgg.get(canonSku); let agg = canonAgg.get(canonSku);
if (!agg) { if (!agg) {
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() }; agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
canonAgg.set(canonSku, agg); canonAgg.set(canonSku, agg);
} }
agg.stores.add(storeKey); agg.stores.add(storeKey);
const priceNum = priceToNumber(it.price); const priceNum = priceToNumber(it.price);
if (priceNum !== null) { if (priceNum !== null) {
const prev = agg.storeMin.get(storeKey); const prev = agg.storeMin.get(storeKey);
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum); if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
} }
const listing = { const listing = {
canonSku, canonSku,
skuKey, skuKey,
skuRaw: String(it.sku || ""), skuRaw: String(it.sku || ""),
name: String(it.name || ""), name: String(it.name || ""),
price: String(it.price || ""), price: String(it.price || ""),
priceNum, priceNum,
url: String(it.url || ""), url: String(it.url || ""),
storeKey, storeKey,
storeLabel, storeLabel,
categoryLabel: String(obj.categoryLabel || obj.category || ""), categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel, dbFile: rel,
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey), hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
}; };
agg.listings.push(listing); agg.listings.push(listing);
if (priceNum !== null) { if (priceNum !== null) {
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) { if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
agg.cheapest = { priceNum, item: listing }; agg.cheapest = { priceNum, item: listing };
} }
} }
} }
} }
const stores = [...storeToCanon.keys()].sort(); const stores = [...storeToCanon.keys()].sort();
const storeCount = stores.length; const storeCount = stores.length;
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`); console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`); console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
function pickRepresentative(agg) { function pickRepresentative(agg) {
const preferred = agg.listings const preferred = agg.listings
.filter((l) => l.hasRealSku6) .filter((l) => l.hasRealSku6)
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity)); .sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
if (preferred.length) return preferred[0]; if (preferred.length) return preferred[0];
if (agg.cheapest) return agg.cheapest.item; if (agg.cheapest) return agg.cheapest.item;
return agg.listings[0] || null; return agg.listings[0] || null;
} }
const rows = []; const rows = [];
for (const [canonSku, agg] of canonAgg.entries()) { for (const [canonSku, agg] of canonAgg.entries()) {
const rep = pickRepresentative(agg); const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s)); const missingStores = stores.filter((s) => !agg.stores.has(s));
const storePrices = {}; const storePrices = {};
for (const s of stores) { for (const s of stores) {
const p = agg.storeMin.get(s); const p = agg.storeMin.get(s);
if (Number.isFinite(p)) storePrices[s] = p; if (Number.isFinite(p)) storePrices[s] = p;
} }
rows.push({ rows.push({
canonSku, canonSku,
storeCount: agg.stores.size, storeCount: agg.stores.size,
stores: [...agg.stores].sort(), stores: [...agg.stores].sort(),
missingStores, missingStores,
storePrices, // { [storeKey]: number } min live price per store storePrices, // { [storeKey]: number } min live price per store
representative: rep representative: rep
? { ? {
name: rep.name, name: rep.name,
price: rep.price, price: rep.price,
priceNum: rep.priceNum, priceNum: rep.priceNum,
storeKey: rep.storeKey, storeKey: rep.storeKey,
storeLabel: rep.storeLabel, storeLabel: rep.storeLabel,
skuRaw: rep.skuRaw, skuRaw: rep.skuRaw,
skuKey: rep.skuKey, skuKey: rep.skuKey,
url: rep.url, url: rep.url,
categoryLabel: rep.categoryLabel, categoryLabel: rep.categoryLabel,
dbFile: rep.dbFile, dbFile: rep.dbFile,
} }
: null, : null,
cheapest: agg.cheapest cheapest: agg.cheapest
? { ? {
price: agg.cheapest.item.price, price: agg.cheapest.item.price,
priceNum: agg.cheapest.priceNum, priceNum: agg.cheapest.priceNum,
storeKey: agg.cheapest.item.storeKey, storeKey: agg.cheapest.item.storeKey,
url: agg.cheapest.item.url, url: agg.cheapest.item.url,
} }
: null, : null,
}); });
} }
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time) // Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
rows.sort((a, b) => { rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount; if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
return String(a.canonSku).localeCompare(String(b.canonSku)); return String(a.canonSku).localeCompare(String(b.canonSku));
}); });
const filtered = args.requireAll const filtered = args.requireAll
? rows.filter((r) => r.storeCount === storeCount) ? rows.filter((r) => r.storeCount === storeCount)
: rows.filter((r) => r.storeCount >= args.minStores); : rows.filter((r) => r.storeCount >= args.minStores);
const top = filtered.slice(0, args.top); const top = filtered.slice(0, args.top);
const payload = { const payload = {
generatedAt: new Date().toISOString(), generatedAt: new Date().toISOString(),
args: { args: {
top: args.top, top: args.top,
minStores: args.minStores, minStores: args.minStores,
requireAll: args.requireAll, requireAll: args.requireAll,
group: args.group, group: args.group,
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"), out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
}, },
storeCount, storeCount,
stores, stores,
totals: { totals: {
liveRows, liveRows,
removedRows, removedRows,
canonSkus: canonAgg.size, canonSkus: canonAgg.size,
outputCount: top.length, outputCount: top.length,
}, },
rows: top, rows: top,
}; };
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8"); fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`); console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
} }
main(); main();

View file

@ -30,278 +30,278 @@ const fs = require("fs");
const path = require("path"); const path = require("path");
function runGit(args) { function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
} }
function gitShowJson(sha, filePath) { function gitShowJson(sha, filePath) {
try { try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], { const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8", encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], stdio: ["ignore", "pipe", "pipe"],
}); });
return JSON.parse(txt); return JSON.parse(txt);
} catch { } catch {
return null; return null;
} }
} }
function gitFileExistsAtSha(sha, filePath) { function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false; if (!sha) return false;
try { try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], { execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"], stdio: ["ignore", "ignore", "ignore"],
}); });
return true; return true;
} catch { } catch {
return false; return false;
} }
} }
function readJson(filePath) { function readJson(filePath) {
try { try {
return JSON.parse(fs.readFileSync(filePath, "utf8")); return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function ensureDir(dir) { function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true }); fs.mkdirSync(dir, { recursive: true });
} }
function priceToNumber(v) { function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, ""); const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s); const n = Number(s);
return Number.isFinite(n) ? n : null; return Number.isFinite(n) ? n : null;
} }
function pctOff(oldStr, newStr) { function pctOff(oldStr, newStr) {
const a = priceToNumber(oldStr); const a = priceToNumber(oldStr);
const b = priceToNumber(newStr); const b = priceToNumber(newStr);
if (a === null || b === null) return null; if (a === null || b === null) return null;
if (a <= 0) return null; if (a <= 0) return null;
if (b >= a) return 0; if (b >= a) return 0;
return Math.round(((a - b) / a) * 100); return Math.round(((a - b) / a) * 100);
} }
function htmlEscape(s) { function htmlEscape(s) {
return String(s ?? "") return String(s ?? "")
.replace(/&/g, "&amp;") .replace(/&/g, "&amp;")
.replace(/</g, "&lt;") .replace(/</g, "&lt;")
.replace(/>/g, "&gt;") .replace(/>/g, "&gt;")
.replace(/"/g, "&quot;"); .replace(/"/g, "&quot;");
} }
function normToken(s) { function normToken(s) {
return String(s || "") return String(s || "")
.toLowerCase() .toLowerCase()
.trim() .trim()
.replace(/\s+/g, " ") .replace(/\s+/g, " ")
.replace(/[^\w:./-]+/g, ""); .replace(/[^\w:./-]+/g, "");
} }
function getFirstParentSha(headSha) { function getFirstParentSha(headSha) {
try { try {
const out = runGit(["rev-list", "--parents", "-n", "1", headSha]); const out = runGit(["rev-list", "--parents", "-n", "1", headSha]);
const parts = out.split(/\s+/).filter(Boolean); const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : ""; return parts.length >= 2 ? parts[1] : "";
} catch { } catch {
return ""; return "";
} }
} }
function listChangedDbFiles(fromSha, toSha) { function listChangedDbFiles(fromSha, toSha) {
try { try {
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]); const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out return out
.split(/\r?\n/) .split(/\r?\n/)
.map((s) => s.trim()) .map((s) => s.trim())
.filter((s) => s && s.endsWith(".json")); .filter((s) => s && s.endsWith(".json"));
} catch { } catch {
return []; return [];
} }
} }
function listDbFilesOnDisk() { function listDbFilesOnDisk() {
const dir = path.join(process.cwd(), "data", "db"); const dir = path.join(process.cwd(), "data", "db");
try { try {
return fs return fs
.readdirSync(dir, { withFileTypes: true }) .readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json")) .filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name)); .map((e) => path.posix.join("data/db", e.name));
} catch { } catch {
return []; return [];
} }
} }
// We reuse your existing canonical SKU mapping logic. // We reuse your existing canonical SKU mapping logic.
function loadSkuMapOrNull() { function loadSkuMapOrNull() {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map")); const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") }); return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
} catch { } catch {
return null; return null;
} }
} }
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) { function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku")); const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url }); const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : ""; return k ? String(k) : "";
} catch { } catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/); const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1]; if (m) return m[1];
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`; if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
return ""; return "";
} }
} }
function canonicalize(skuKey, skuMap) { function canonicalize(skuKey, skuMap) {
if (!skuKey) return ""; if (!skuKey) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey); if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey);
return skuKey; return skuKey;
} }
function mapDbItems(obj, skuMap, { includeRemoved }) { function mapDbItems(obj, skuMap, { includeRemoved }) {
const storeLabel = String(obj?.storeLabel || obj?.store || ""); const storeLabel = String(obj?.storeLabel || obj?.store || "");
const categoryLabel = String(obj?.categoryLabel || obj?.category || ""); const categoryLabel = String(obj?.categoryLabel || obj?.category || "");
const items = Array.isArray(obj?.items) ? obj.items : []; const items = Array.isArray(obj?.items) ? obj.items : [];
const m = new Map(); // canonSku -> item (for this store+category db) const m = new Map(); // canonSku -> item (for this store+category db)
for (const it of items) { for (const it of items) {
if (!it) continue; if (!it) continue;
const removed = Boolean(it.removed); const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue; if (!includeRemoved && removed) continue;
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url }); const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url });
const canon = canonicalize(skuKey, skuMap); const canon = canonicalize(skuKey, skuMap);
if (!canon) continue; if (!canon) continue;
m.set(canon, { m.set(canon, {
canonSku: canon, canonSku: canon,
skuRaw: String(it.sku || ""), skuRaw: String(it.sku || ""),
name: String(it.name || ""), name: String(it.name || ""),
price: String(it.price || ""), price: String(it.price || ""),
url: String(it.url || ""), url: String(it.url || ""),
img: String(it.img || it.image || it.thumb || ""), img: String(it.img || it.image || it.thumb || ""),
removed, removed,
storeLabel, storeLabel,
categoryLabel, categoryLabel,
}); });
} }
return m; return m;
} }
function diffDb(prevObj, nextObj, skuMap) { function diffDb(prevObj, nextObj, skuMap) {
const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true }); const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true });
const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true }); const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true });
const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false }); const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false });
const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false }); const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false });
const newItems = []; const newItems = [];
const priceDown = []; const priceDown = [];
for (const [canon, now] of nextLive.entries()) { for (const [canon, now] of nextLive.entries()) {
const had = prevAll.get(canon); const had = prevAll.get(canon);
if (!had) { if (!had) {
newItems.push(now); newItems.push(now);
continue; continue;
} }
} }
for (const [canon, now] of nextLive.entries()) { for (const [canon, now] of nextLive.entries()) {
const was = prevLive.get(canon); const was = prevLive.get(canon);
if (!was) continue; if (!was) continue;
const a = String(was.price || ""); const a = String(was.price || "");
const b = String(now.price || ""); const b = String(now.price || "");
if (a === b) continue; if (a === b) continue;
const aN = priceToNumber(a); const aN = priceToNumber(a);
const bN = priceToNumber(b); const bN = priceToNumber(b);
if (aN === null || bN === null) continue; if (aN === null || bN === null) continue;
if (bN >= aN) continue; if (bN >= aN) continue;
priceDown.push({ priceDown.push({
...now, ...now,
oldPrice: a, oldPrice: a,
newPrice: b, newPrice: b,
pct: pctOff(a, b), pct: pctOff(a, b),
}); });
} }
return { newItems, priceDown }; return { newItems, priceDown };
} }
function buildCurrentIndexes(skuMap) { function buildCurrentIndexes(skuMap) {
const files = listDbFilesOnDisk(); const files = listDbFilesOnDisk();
const availability = new Map(); // canonSku -> Set(storeLabel) const availability = new Map(); // canonSku -> Set(storeLabel)
const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} } const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} }
const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item) const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item)
for (const file of files) { for (const file of files) {
const obj = readJson(file); const obj = readJson(file);
if (!obj) continue; if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || ""); const storeLabel = String(obj.storeLabel || obj.store || "");
if (!storeLabel) continue; if (!storeLabel) continue;
const live = mapDbItems(obj, skuMap, { includeRemoved: false }); const live = mapDbItems(obj, skuMap, { includeRemoved: false });
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map()); if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
for (const it of live.values()) { for (const it of live.values()) {
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set()); if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
availability.get(it.canonSku).add(storeLabel); availability.get(it.canonSku).add(storeLabel);
byStoreCanon.get(storeLabel).set(it.canonSku, it); byStoreCanon.get(storeLabel).set(it.canonSku, it);
const p = priceToNumber(it.price); const p = priceToNumber(it.price);
if (p === null) continue; if (p === null) continue;
const cur = cheapest.get(it.canonSku); const cur = cheapest.get(it.canonSku);
if (!cur) { if (!cur) {
cheapest.set(it.canonSku, { cheapest.set(it.canonSku, {
priceNum: p, priceNum: p,
stores: new Set([storeLabel]), stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel }, example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
}); });
} else if (p < cur.priceNum) { } else if (p < cur.priceNum) {
cheapest.set(it.canonSku, { cheapest.set(it.canonSku, {
priceNum: p, priceNum: p,
stores: new Set([storeLabel]), stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel }, example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
}); });
} else if (p === cur.priceNum) { } else if (p === cur.priceNum) {
cur.stores.add(storeLabel); cur.stores.add(storeLabel);
} }
} }
} }
return { availability, cheapest, byStoreCanon }; return { availability, cheapest, byStoreCanon };
} }
function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl }) { function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl }) {
const now = new Date().toISOString(); const now = new Date().toISOString();
function section(titleText, rowsHtml) { function section(titleText, rowsHtml) {
return ` return `
<div style="margin:16px 0 6px 0;font-weight:700;font-size:16px">${htmlEscape(titleText)}</div> <div style="margin:16px 0 6px 0;font-weight:700;font-size:16px">${htmlEscape(titleText)}</div>
${rowsHtml || `<div style="color:#666">None</div>`} ${rowsHtml || `<div style="color:#666">None</div>`}
`; `;
} }
function card(it, extraHtml) { function card(it, extraHtml) {
const img = it.img const img = it.img
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />` ? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
: ""; : "";
const name = htmlEscape(it.name || ""); const name = htmlEscape(it.name || "");
const store = htmlEscape(it.storeLabel || ""); const store = htmlEscape(it.storeLabel || "");
const cat = htmlEscape(it.categoryLabel || ""); const cat = htmlEscape(it.categoryLabel || "");
const price = htmlEscape(it.price || ""); const price = htmlEscape(it.price || "");
const url = htmlEscape(it.url || ""); const url = htmlEscape(it.url || "");
return ` return `
<table role="presentation" width="100%" cellpadding="0" cellspacing="0" style="border:1px solid #eee;border-radius:12px;margin:10px 0"> <table role="presentation" width="100%" cellpadding="0" cellspacing="0" style="border:1px solid #eee;border-radius:12px;margin:10px 0">
<tr> <tr>
<td style="padding:12px;vertical-align:top;width:96px">${img || ""}</td> <td style="padding:12px;vertical-align:top;width:96px">${img || ""}</td>
@ -315,15 +315,15 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
</tr> </tr>
</table> </table>
`; `;
} }
const uniqueHtml = uniqueNews.map((it) => card(it)).join(""); const uniqueHtml = uniqueNews.map((it) => card(it)).join("");
const salesHtml = bigSales const salesHtml = bigSales
.map((it) => { .map((it) => {
const pct = Number.isFinite(it.pct) ? it.pct : null; const pct = Number.isFinite(it.pct) ? it.pct : null;
const oldP = htmlEscape(it.oldPrice || ""); const oldP = htmlEscape(it.oldPrice || "");
const newP = htmlEscape(it.newPrice || ""); const newP = htmlEscape(it.newPrice || "");
const extra = ` const extra = `
<div style="margin-top:6px;font-size:13px"> <div style="margin-top:6px;font-size:13px">
<span style="color:#b00020;text-decoration:line-through">${oldP}</span> <span style="color:#b00020;text-decoration:line-through">${oldP}</span>
<span style="margin:0 6px;color:#666"></span> <span style="margin:0 6px;color:#666"></span>
@ -331,11 +331,11 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
${pct !== null ? `<span style="margin-left:8px;color:#137333;font-weight:700">(${pct}% off)</span>` : ""} ${pct !== null ? `<span style="margin-left:8px;color:#137333;font-weight:700">(${pct}% off)</span>` : ""}
</div> </div>
`; `;
return card({ ...it, price: "" }, extra); return card({ ...it, price: "" }, extra);
}) })
.join(""); .join("");
const links = ` const links = `
<div style="margin-top:10px;font-size:12px;color:#666"> <div style="margin-top:10px;font-size:12px;color:#666">
${commitUrl ? `Commit: <a href="${htmlEscape(commitUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(commitUrl)}</a><br/>` : ""} ${commitUrl ? `Commit: <a href="${htmlEscape(commitUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(commitUrl)}</a><br/>` : ""}
${pagesUrl ? `Visualizer: <a href="${htmlEscape(pagesUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(pagesUrl)}</a>` : ""} ${pagesUrl ? `Visualizer: <a href="${htmlEscape(pagesUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(pagesUrl)}</a>` : ""}
@ -343,7 +343,7 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
</div> </div>
`; `;
return `<!doctype html> return `<!doctype html>
<html> <html>
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
@ -365,137 +365,137 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
} }
function writeGithubOutput(kv) { function writeGithubOutput(kv) {
const outPath = process.env.GITHUB_OUTPUT; const outPath = process.env.GITHUB_OUTPUT;
if (!outPath) return; if (!outPath) return;
const lines = []; const lines = [];
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`); for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8"); fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
} }
function main() { function main() {
const repoRoot = process.cwd(); const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports"); const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir); ensureDir(reportsDir);
const headSha = runGit(["rev-parse", "HEAD"]); const headSha = runGit(["rev-parse", "HEAD"]);
const parentSha = getFirstParentSha(headSha); const parentSha = getFirstParentSha(headSha);
if (!parentSha) { if (!parentSha) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8"); fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 }); writeGithubOutput({ should_send: 0 });
return; return;
} }
const skuMap = loadSkuMapOrNull(); const skuMap = loadSkuMapOrNull();
const changed = listChangedDbFiles(parentSha, headSha); const changed = listChangedDbFiles(parentSha, headSha);
if (!changed.length) { if (!changed.length) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8"); fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 }); writeGithubOutput({ should_send: 0 });
return; return;
} }
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap); const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
const uniqueNews = []; const uniqueNews = [];
const bigSales = []; const bigSales = [];
for (const file of changed) { for (const file of changed) {
const existedBefore = gitFileExistsAtSha(parentSha, file); const existedBefore = gitFileExistsAtSha(parentSha, file);
const existsNow = gitFileExistsAtSha(headSha, file); const existsNow = gitFileExistsAtSha(headSha, file);
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert. // NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
if (!existedBefore && existsNow) { if (!existedBefore && existsNow) {
continue; continue;
} }
const prevObj = gitShowJson(parentSha, file); const prevObj = gitShowJson(parentSha, file);
const nextObj = gitShowJson(headSha, file); const nextObj = gitShowJson(headSha, file);
if (!prevObj && !nextObj) continue; if (!prevObj && !nextObj) continue;
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap); const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
for (const it of newItems) { for (const it of newItems) {
const stores = availability.get(it.canonSku); const stores = availability.get(it.canonSku);
const storeCount = stores ? stores.size : 0; const storeCount = stores ? stores.size : 0;
if (storeCount !== 1) continue; if (storeCount !== 1) continue;
if (!stores.has(it.storeLabel)) continue; if (!stores.has(it.storeLabel)) continue;
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it; const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
uniqueNews.push(cur); uniqueNews.push(cur);
} }
for (const it of priceDown) { for (const it of priceDown) {
const pct = it.pct; const pct = it.pct;
if (!Number.isFinite(pct) || pct < 20) continue; if (!Number.isFinite(pct) || pct < 20) continue;
const best = cheapest.get(it.canonSku); const best = cheapest.get(it.canonSku);
if (!best) continue; if (!best) continue;
const newN = priceToNumber(it.newPrice); const newN = priceToNumber(it.newPrice);
if (newN === null) continue; if (newN === null) continue;
if (best.priceNum !== newN) continue; if (best.priceNum !== newN) continue;
if (!best.stores.has(it.storeLabel)) continue; if (!best.stores.has(it.storeLabel)) continue;
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it; const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
bigSales.push({ bigSales.push({
...cur, ...cur,
oldPrice: it.oldPrice, oldPrice: it.oldPrice,
newPrice: it.newPrice, newPrice: it.newPrice,
pct, pct,
}); });
} }
} }
function dedupe(arr) { function dedupe(arr) {
const out = []; const out = [];
const seen = new Set(); const seen = new Set();
for (const it of arr) { for (const it of arr) {
const k = `${it.canonSku}|${it.storeLabel}`; const k = `${it.canonSku}|${it.storeLabel}`;
if (seen.has(k)) continue; if (seen.has(k)) continue;
seen.add(k); seen.add(k);
out.push(it); out.push(it);
} }
return out; return out;
} }
const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || "")); const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || ""));
const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0)); const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0));
const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0; const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0;
const subject = shouldSend const subject = shouldSend
? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales` ? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales`
: `Spirit Tracker: (no alert)`; : `Spirit Tracker: (no alert)`;
const ghRepo = process.env.GITHUB_REPOSITORY || ""; const ghRepo = process.env.GITHUB_REPOSITORY || "";
const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com"; const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com";
const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : ""; const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : "";
const pagesUrl = process.env.PAGES_URL || ""; const pagesUrl = process.env.PAGES_URL || "";
const html = renderHtml({ const html = renderHtml({
title: "Spirit Tracker Alert", title: "Spirit Tracker Alert",
subtitle: subject, subtitle: subject,
uniqueNews: uniqueFinal, uniqueNews: uniqueFinal,
bigSales: salesFinal, bigSales: salesFinal,
commitUrl, commitUrl,
pagesUrl, pagesUrl,
}); });
const htmlPath = path.join(reportsDir, "alert.html"); const htmlPath = path.join(reportsDir, "alert.html");
const subjPath = path.join(reportsDir, "alert_subject.txt"); const subjPath = path.join(reportsDir, "alert_subject.txt");
const sendPath = path.join(reportsDir, "alert_should_send.txt"); const sendPath = path.join(reportsDir, "alert_should_send.txt");
fs.writeFileSync(htmlPath, html, "utf8"); fs.writeFileSync(htmlPath, html, "utf8");
fs.writeFileSync(subjPath, subject + "\n", "utf8"); fs.writeFileSync(subjPath, subject + "\n", "utf8");
fs.writeFileSync(sendPath, (shouldSend ? "1\n" : "0\n"), "utf8"); fs.writeFileSync(sendPath, shouldSend ? "1\n" : "0\n", "utf8");
writeGithubOutput({ writeGithubOutput({
should_send: shouldSend ? 1 : 0, should_send: shouldSend ? 1 : 0,
subject, subject,
html_path: htmlPath, html_path: htmlPath,
}); });
} }
main(); main();

View file

@ -6,134 +6,134 @@ const fs = require("fs");
const path = require("path"); const path = require("path");
function runGit(args) { function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
} }
function listDbFiles(dbDir) { function listDbFiles(dbDir) {
try { try {
return fs return fs
.readdirSync(dbDir, { withFileTypes: true }) .readdirSync(dbDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json")) .filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dbDir, e.name)); .map((e) => path.join(dbDir, e.name));
} catch { } catch {
return []; return [];
} }
} }
function listCommonListingReportFiles(reportsDir) { function listCommonListingReportFiles(reportsDir) {
try { try {
return fs return fs
.readdirSync(reportsDir, { withFileTypes: true }) .readdirSync(reportsDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json")) .filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => e.name) .map((e) => e.name)
.filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name)) .filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name))
.map((name) => path.join(reportsDir, name)); .map((name) => path.join(reportsDir, name));
} catch { } catch {
return []; return [];
} }
} }
function dateOnly(iso) { function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function buildCommitPayloadForFiles({ repoRoot, relFiles, maxRawPerFile, maxDaysPerFile }) { function buildCommitPayloadForFiles({ repoRoot, relFiles, maxRawPerFile, maxDaysPerFile }) {
const payload = { const payload = {
generatedAt: new Date().toISOString(), generatedAt: new Date().toISOString(),
branch: "data", branch: "data",
files: {}, files: {},
}; };
for (const rel of relFiles.sort()) { for (const rel of relFiles.sort()) {
let txt = ""; let txt = "";
try { try {
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz) // %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]); txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]);
} catch { } catch {
continue; continue;
} }
const lines = txt const lines = txt
.split(/\r?\n/) .split(/\r?\n/)
.map((s) => s.trim()) .map((s) => s.trim())
.filter(Boolean); .filter(Boolean);
// git log is newest -> oldest. // git log is newest -> oldest.
// Keep the FIRST commit we see for each date (that is the most recent commit for that date). // Keep the FIRST commit we see for each date (that is the most recent commit for that date).
const byDate = new Map(); // date -> { sha, date, ts } const byDate = new Map(); // date -> { sha, date, ts }
for (const line of lines) { for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i); const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue; if (!m) continue;
const sha = m[1]; const sha = m[1];
const ts = m[2]; const ts = m[2];
const d = dateOnly(ts); const d = dateOnly(ts);
if (!d) continue; if (!d) continue;
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts }); if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
} }
// Convert to oldest -> newest // Convert to oldest -> newest
let arr = [...byDate.values()].reverse(); let arr = [...byDate.values()].reverse();
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest) // Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
if (arr.length > maxDaysPerFile) { if (arr.length > maxDaysPerFile) {
arr = arr.slice(arr.length - maxDaysPerFile); arr = arr.slice(arr.length - maxDaysPerFile);
} }
payload.files[rel] = arr; payload.files[rel] = arr;
} }
return payload; return payload;
} }
function main() { function main() {
const repoRoot = process.cwd(); const repoRoot = process.cwd();
const dbDir = path.join(repoRoot, "data", "db"); const dbDir = path.join(repoRoot, "data", "db");
const reportsDir = path.join(repoRoot, "reports"); const reportsDir = path.join(repoRoot, "reports");
const outDir = path.join(repoRoot, "viz", "data"); const outDir = path.join(repoRoot, "viz", "data");
fs.mkdirSync(outDir, { recursive: true }); fs.mkdirSync(outDir, { recursive: true });
// ---- Existing output (UNCHANGED): db_commits.json ---- // ---- Existing output (UNCHANGED): db_commits.json ----
const outFileDb = path.join(outDir, "db_commits.json"); const outFileDb = path.join(outDir, "db_commits.json");
const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs))); const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
// We want the viz to show ONE point per day (the most recent run that day). // We want the viz to show ONE point per day (the most recent run that day).
// So we collapse multiple commits per day down to the newest commit for that date. // So we collapse multiple commits per day down to the newest commit for that date.
// //
// With multiple runs/day, we also want to keep a long-ish daily history. // With multiple runs/day, we also want to keep a long-ish daily history.
// Raw commits per day could be ~4, so grab a larger raw window and then collapse. // Raw commits per day could be ~4, so grab a larger raw window and then collapse.
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
const payloadDb = buildCommitPayloadForFiles({ const payloadDb = buildCommitPayloadForFiles({
repoRoot, repoRoot,
relFiles: dbFiles, relFiles: dbFiles,
maxRawPerFile: MAX_RAW_PER_FILE, maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE, maxDaysPerFile: MAX_DAYS_PER_FILE,
}); });
fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8"); fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`); process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`);
// ---- New output: common listings report commits ---- // ---- New output: common listings report commits ----
const outFileCommon = path.join(outDir, "common_listings_commits.json"); const outFileCommon = path.join(outDir, "common_listings_commits.json");
const reportFilesAbs = listCommonListingReportFiles(reportsDir); const reportFilesAbs = listCommonListingReportFiles(reportsDir);
const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs))); const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs)));
const payloadCommon = buildCommitPayloadForFiles({ const payloadCommon = buildCommitPayloadForFiles({
repoRoot, repoRoot,
relFiles: reportFilesRel, relFiles: reportFilesRel,
maxRawPerFile: MAX_RAW_PER_FILE, maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE, maxDaysPerFile: MAX_DAYS_PER_FILE,
}); });
fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8"); fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`); process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`);
} }
main(); main();

View file

@ -6,233 +6,224 @@ const path = require("path");
const { execFileSync } = require("child_process"); const { execFileSync } = require("child_process");
function ensureDir(dir) { function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true }); fs.mkdirSync(dir, { recursive: true });
} }
function listJsonFiles(dir) { function listJsonFiles(dir) {
const out = []; const out = [];
try { try {
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) { for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
if (!ent.isFile()) continue; if (!ent.isFile()) continue;
if (!String(ent.name || "").endsWith(".json")) continue; if (!String(ent.name || "").endsWith(".json")) continue;
out.push(path.join(dir, ent.name)); out.push(path.join(dir, ent.name));
} }
} catch { } catch {
// ignore // ignore
} }
return out; return out;
} }
function readJson(file) { function readJson(file) {
try { try {
return JSON.parse(fs.readFileSync(file, "utf8")); return JSON.parse(fs.readFileSync(file, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function readDbCommitsOrNull(repoRoot) { function readDbCommitsOrNull(repoRoot) {
const p = path.join(repoRoot, "viz", "data", "db_commits.json"); const p = path.join(repoRoot, "viz", "data", "db_commits.json");
try { try {
return JSON.parse(fs.readFileSync(p, "utf8")); return JSON.parse(fs.readFileSync(p, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function gitShowJson(sha, filePath) { function gitShowJson(sha, filePath) {
try { try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], { const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8", encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
}); });
return JSON.parse(txt); return JSON.parse(txt);
} catch { } catch {
return null; return null;
} }
} }
function normalizeCspc(v) { function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/); const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function fnv1a32(str) { function fnv1a32(str) {
let h = 0x811c9dc5; let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i); h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193); h = Math.imul(h, 0x01000193);
} }
return (h >>> 0).toString(16).padStart(8, "0"); return (h >>> 0).toString(16).padStart(8, "0");
} }
function makeSyntheticSku(storeLabel, url) { function makeSyntheticSku(storeLabel, url) {
const store = String(storeLabel || "store"); const store = String(storeLabel || "store");
const u = String(url || ""); const u = String(url || "");
if (!u) return ""; if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`; return `u:${fnv1a32(`${store}|${u}`)}`;
} }
function keySkuForItem(it, storeLabel) { function keySkuForItem(it, storeLabel) {
const real = normalizeCspc(it?.sku); const real = normalizeCspc(it?.sku);
if (real) return real; if (real) return real;
return makeSyntheticSku(storeLabel, it?.url); return makeSyntheticSku(storeLabel, it?.url);
} }
// Returns Map(skuKey -> firstSeenAtISO) for this dbFile (store/category file). // Returns Map(skuKey -> firstSeenAtISO) for this dbFile (store/category file).
function computeFirstSeenForDbFile({ function computeFirstSeenForDbFile({ repoRoot, relDbFile, storeLabel, wantSkuKeys, commitsArr, nowIso }) {
repoRoot, const out = new Map();
relDbFile, const want = new Set(wantSkuKeys);
storeLabel,
wantSkuKeys,
commitsArr,
nowIso,
}) {
const out = new Map();
const want = new Set(wantSkuKeys);
// No commit history available -> treat as new today // No commit history available -> treat as new today
if (!Array.isArray(commitsArr) || !commitsArr.length) { if (!Array.isArray(commitsArr) || !commitsArr.length) {
for (const k of want) out.set(k, nowIso); for (const k of want) out.set(k, nowIso);
return out; return out;
} }
// commitsArr is oldest -> newest (from db_commits.json) // commitsArr is oldest -> newest (from db_commits.json)
for (const c of commitsArr) { for (const c of commitsArr) {
const sha = String(c?.sha || ""); const sha = String(c?.sha || "");
const ts = String(c?.ts || ""); const ts = String(c?.ts || "");
if (!sha || !ts) continue; if (!sha || !ts) continue;
const obj = gitShowJson(sha, relDbFile); const obj = gitShowJson(sha, relDbFile);
const items = Array.isArray(obj?.items) ? obj.items : []; const items = Array.isArray(obj?.items) ? obj.items : [];
const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || ""); const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || "");
for (const it of items) { for (const it of items) {
if (!it) continue; if (!it) continue;
if (Boolean(it.removed)) continue; // first time it existed LIVE in this file if (Boolean(it.removed)) continue; // first time it existed LIVE in this file
const k = keySkuForItem(it, sLabel); const k = keySkuForItem(it, sLabel);
if (!k) continue; if (!k) continue;
if (!want.has(k)) continue; if (!want.has(k)) continue;
if (out.has(k)) continue; if (out.has(k)) continue;
out.set(k, ts); out.set(k, ts);
if (out.size >= want.size) break; if (out.size >= want.size) break;
} }
if (out.size >= want.size) break; if (out.size >= want.size) break;
} }
// Anything never seen historically -> new today // Anything never seen historically -> new today
for (const k of want) if (!out.has(k)) out.set(k, nowIso); for (const k of want) if (!out.has(k)) out.set(k, nowIso);
return out; return out;
} }
function main() { function main() {
const repoRoot = path.resolve(__dirname, ".."); const repoRoot = path.resolve(__dirname, "..");
const dbDir = path.join(repoRoot, "data", "db"); const dbDir = path.join(repoRoot, "data", "db");
const outDir = path.join(repoRoot, "viz", "data"); const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "index.json"); const outFile = path.join(outDir, "index.json");
ensureDir(outDir); ensureDir(outDir);
const nowIso = new Date().toISOString(); const nowIso = new Date().toISOString();
const commitsManifest = readDbCommitsOrNull(repoRoot); const commitsManifest = readDbCommitsOrNull(repoRoot);
const items = []; const items = [];
let liveCount = 0; let liveCount = 0;
for (const file of listJsonFiles(dbDir)) { for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file); const obj = readJson(file);
if (!obj) continue; if (!obj) continue;
const store = String(obj.store || ""); const store = String(obj.store || "");
const storeLabel = String(obj.storeLabel || store || ""); const storeLabel = String(obj.storeLabel || store || "");
const category = String(obj.category || ""); const category = String(obj.category || "");
const categoryLabel = String(obj.categoryLabel || ""); const categoryLabel = String(obj.categoryLabel || "");
const source = String(obj.source || ""); const source = String(obj.source || "");
const updatedAt = String(obj.updatedAt || ""); const updatedAt = String(obj.updatedAt || "");
const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json
const arr = Array.isArray(obj.items) ? obj.items : []; const arr = Array.isArray(obj.items) ? obj.items : [];
// Build want keys from CURRENT file contents (includes removed rows too) // Build want keys from CURRENT file contents (includes removed rows too)
const wantSkuKeys = []; const wantSkuKeys = [];
for (const it of arr) { for (const it of arr) {
if (!it) continue; if (!it) continue;
const k = keySkuForItem(it, storeLabel); const k = keySkuForItem(it, storeLabel);
if (k) wantSkuKeys.push(k); if (k) wantSkuKeys.push(k);
} }
const commitsArr = commitsManifest?.files?.[dbFile] || null; const commitsArr = commitsManifest?.files?.[dbFile] || null;
const firstSeenByKey = computeFirstSeenForDbFile({ const firstSeenByKey = computeFirstSeenForDbFile({
repoRoot, repoRoot,
relDbFile: dbFile, relDbFile: dbFile,
storeLabel, storeLabel,
wantSkuKeys, wantSkuKeys,
commitsArr, commitsArr,
nowIso, nowIso,
}); });
for (const it of arr) { for (const it of arr) {
if (!it) continue; if (!it) continue;
const removed = Boolean(it.removed); const removed = Boolean(it.removed);
if (!removed) liveCount++; if (!removed) liveCount++;
const sku = String(it.sku || "").trim(); const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim(); const name = String(it.name || "").trim();
const price = String(it.price || "").trim(); const price = String(it.price || "").trim();
const url = String(it.url || "").trim(); const url = String(it.url || "").trim();
const img = String(it.img || it.image || it.thumb || "").trim(); const img = String(it.img || it.image || it.thumb || "").trim();
const skuKey = keySkuForItem(it, storeLabel); const skuKey = keySkuForItem(it, storeLabel);
const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso; const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso;
items.push({ items.push({
sku, sku,
name, name,
price, price,
url, url,
img, img,
removed, // NEW (additive): allows viz to show history / removed-only items removed, // NEW (additive): allows viz to show history / removed-only items
store, store,
storeLabel, storeLabel,
category, category,
categoryLabel, categoryLabel,
source, source,
updatedAt, updatedAt,
firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now) firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now)
dbFile, dbFile,
}); });
} }
} }
items.sort((a, b) => { items.sort((a, b) => {
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`; const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`; const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
return ak.localeCompare(bk); return ak.localeCompare(bk);
}); });
const outObj = { const outObj = {
generatedAt: nowIso, generatedAt: nowIso,
// Additive metadata. Old readers can ignore. // Additive metadata. Old readers can ignore.
includesRemoved: true, includesRemoved: true,
count: items.length, count: items.length,
countLive: liveCount, countLive: liveCount,
items, items,
}; };
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8"); fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
process.stdout.write( process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`);
`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`
);
} }
module.exports = { main }; module.exports = { main };
if (require.main === module) { if (require.main === module) {
main(); main();
} }

View file

@ -6,474 +6,467 @@ const fs = require("fs");
const path = require("path"); const path = require("path");
function runGit(args) { function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
} }
function gitShowJson(sha, filePath) { function gitShowJson(sha, filePath) {
try { try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], { const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8", encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
}); });
return JSON.parse(txt); return JSON.parse(txt);
} catch { } catch {
return null; return null;
} }
} }
function gitFileExistsAtSha(sha, filePath) { function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false; if (!sha) return false;
try { try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], { execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"], stdio: ["ignore", "ignore", "ignore"],
}); });
return true; return true;
} catch { } catch {
return false; return false;
} }
} }
function gitListTreeFiles(sha, dirRel) { function gitListTreeFiles(sha, dirRel) {
try { try {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]); const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); return out
} catch { .split(/\r?\n/)
return []; .map((s) => s.trim())
} .filter(Boolean);
} catch {
return [];
}
} }
function readJsonFileOrNull(filePath) { function readJsonFileOrNull(filePath) {
try { try {
return JSON.parse(fs.readFileSync(filePath, "utf8")); return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function normalizeCspc(v) { function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/); const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function normPriceStr(p) { function normPriceStr(p) {
return String(p ?? "").trim(); return String(p ?? "").trim();
} }
function priceToNumber(v) { function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, ""); const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s); const n = Number(s);
return Number.isFinite(n) ? n : null; return Number.isFinite(n) ? n : null;
} }
function dateOnly(iso) { function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
function fnv1a32(str) { function fnv1a32(str) {
let h = 0x811c9dc5; let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i); h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193); h = Math.imul(h, 0x01000193);
} }
return (h >>> 0).toString(16).padStart(8, "0"); return (h >>> 0).toString(16).padStart(8, "0");
} }
function makeSyntheticSku(storeLabel, url) { function makeSyntheticSku(storeLabel, url) {
const store = String(storeLabel || "store"); const store = String(storeLabel || "store");
const u = String(url || ""); const u = String(url || "");
if (!u) return ""; if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`; return `u:${fnv1a32(`${store}|${u}`)}`;
} }
function keySkuForItem(it, storeLabel) { function keySkuForItem(it, storeLabel) {
const real = normalizeCspc(it?.sku); const real = normalizeCspc(it?.sku);
if (real) return real; if (real) return real;
return makeSyntheticSku(storeLabel, it?.url); return makeSyntheticSku(storeLabel, it?.url);
} }
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) { function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
const m = new Map(); const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : []; const items = Array.isArray(obj?.items) ? obj.items : [];
const storeLabel = String(obj?.storeLabel || obj?.store || ""); const storeLabel = String(obj?.storeLabel || obj?.store || "");
for (const it of items) { for (const it of items) {
if (!it) continue; if (!it) continue;
const sku = keySkuForItem(it, storeLabel); const sku = keySkuForItem(it, storeLabel);
if (!sku) continue; if (!sku) continue;
const removed = Boolean(it.removed); const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue; if (!includeRemoved && removed) continue;
const next = { const next = {
sku, sku,
name: String(it.name || ""), name: String(it.name || ""),
price: String(it.price || ""), price: String(it.price || ""),
url: String(it.url || ""), url: String(it.url || ""),
removed, removed,
}; };
const prev = m.get(sku); const prev = m.get(sku);
if (!prev) { if (!prev) {
m.set(sku, next); m.set(sku, next);
continue; continue;
} }
// Prefer the non-removed record if both exist. // Prefer the non-removed record if both exist.
if (prev.removed && !next.removed) { if (prev.removed && !next.removed) {
m.set(sku, next); m.set(sku, next);
continue; continue;
} }
if (!prev.removed && next.removed) { if (!prev.removed && next.removed) {
continue; // keep the active one continue; // keep the active one
} }
// Otherwise keep the “better” one (more complete data), deterministic. // Otherwise keep the “better” one (more complete data), deterministic.
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0); const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0); const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
if (nextScore > prevScore) m.set(sku, next); if (nextScore > prevScore) m.set(sku, next);
} }
return m; return m;
} }
function diffDb(prevObj, nextObj) { function diffDb(prevObj, nextObj) {
const prevAll = mapBySku(prevObj, { includeRemoved: true }); const prevAll = mapBySku(prevObj, { includeRemoved: true });
const nextAll = mapBySku(nextObj, { includeRemoved: true }); const nextAll = mapBySku(nextObj, { includeRemoved: true });
const prevLive = mapBySku(prevObj, { includeRemoved: false }); const prevLive = mapBySku(prevObj, { includeRemoved: false });
const nextLive = mapBySku(nextObj, { includeRemoved: false }); const nextLive = mapBySku(nextObj, { includeRemoved: false });
const newItems = []; const newItems = [];
const restoredItems = []; const restoredItems = [];
const removedItems = []; const removedItems = [];
const priceChanges = []; const priceChanges = [];
for (const [sku, now] of nextLive.entries()) { for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku); const had = prevAll.get(sku);
if (!had) { if (!had) {
newItems.push({ ...now }); newItems.push({ ...now });
continue; continue;
} }
if (had.removed) { if (had.removed) {
restoredItems.push({ ...now }); restoredItems.push({ ...now });
continue; continue;
} }
} }
for (const [sku, was] of prevLive.entries()) { for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku); const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) { if (!nxt || nxt.removed) {
removedItems.push({ ...was }); removedItems.push({ ...was });
} }
} }
for (const [sku, now] of nextLive.entries()) { for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku); const was = prevLive.get(sku);
if (!was) continue; if (!was) continue;
const a = normPriceStr(was.price); const a = normPriceStr(was.price);
const b = normPriceStr(now.price); const b = normPriceStr(now.price);
if (a === b) continue; if (a === b) continue;
const aN = priceToNumber(a); const aN = priceToNumber(a);
const bN = priceToNumber(b); const bN = priceToNumber(b);
let kind = "price_change"; let kind = "price_change";
if (aN !== null && bN !== null) { if (aN !== null && bN !== null) {
if (bN < aN) kind = "price_down"; if (bN < aN) kind = "price_down";
else if (bN > aN) kind = "price_up"; else if (bN > aN) kind = "price_up";
else kind = "price_change"; else kind = "price_change";
} }
priceChanges.push({ priceChanges.push({
kind, kind,
sku, sku,
name: now.name || was.name || "", name: now.name || was.name || "",
oldPrice: a, oldPrice: a,
newPrice: b, newPrice: b,
url: now.url || was.url || "", url: now.url || was.url || "",
}); });
} }
return { newItems, restoredItems, removedItems, priceChanges }; return { newItems, restoredItems, removedItems, priceChanges };
} }
function getHeadShaOrEmpty() { function getHeadShaOrEmpty() {
try { try {
return runGit(["rev-parse", "--verify", "HEAD"]); return runGit(["rev-parse", "--verify", "HEAD"]);
} catch { } catch {
return ""; return "";
} }
} }
function firstParentSha(sha) { function firstParentSha(sha) {
try { try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]); const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean); const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : ""; return parts.length >= 2 ? parts[1] : "";
} catch { } catch {
return ""; return "";
} }
} }
function listChangedDbFiles(fromSha, toSha) { function listChangedDbFiles(fromSha, toSha) {
if (!fromSha && toSha && toSha !== "WORKTREE") { if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db"); return gitListTreeFiles(toSha, "data/db");
} }
if (!fromSha && toSha === "WORKTREE") { if (!fromSha && toSha === "WORKTREE") {
try { try {
return fs return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true }) .readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json")) .filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name)); .map((e) => path.posix.join("data/db", e.name));
} catch { } catch {
return []; return [];
} }
} }
try { try {
if (toSha === "WORKTREE") { if (toSha === "WORKTREE") {
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]); const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); return out
} .split(/\r?\n/)
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]); .map((s) => s.trim())
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); .filter(Boolean);
} catch { }
return []; const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
} return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
} catch {
return [];
}
} }
function logDbCommitsSince(sinceIso) { function logDbCommitsSince(sinceIso) {
try { try {
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]); const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); const lines = out
const arr = []; .split(/\r?\n/)
for (const line of lines) { .map((s) => s.trim())
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i); .filter(Boolean);
if (!m) continue; const arr = [];
const sha = m[1]; for (const line of lines) {
const ts = m[2]; const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
const d = dateOnly(ts); if (!m) continue;
arr.push({ sha, ts, date: d }); const sha = m[1];
} const ts = m[2];
arr.reverse(); const d = dateOnly(ts);
return arr; arr.push({ sha, ts, date: d });
} catch { }
return []; arr.reverse();
} return arr;
} catch {
return [];
}
} }
function main() { function main() {
const repoRoot = process.cwd(); const repoRoot = process.cwd();
const outDir = path.join(repoRoot, "viz", "data"); const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "recent.json"); const outFile = path.join(outDir, "recent.json");
fs.mkdirSync(outDir, { recursive: true }); fs.mkdirSync(outDir, { recursive: true });
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7)); const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7));
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000)); const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000));
const now = new Date(); const now = new Date();
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000); const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
const sinceIso = since.toISOString(); const sinceIso = since.toISOString();
const headSha = getHeadShaOrEmpty(); const headSha = getHeadShaOrEmpty();
const items = []; const items = [];
const commits = headSha ? logDbCommitsSince(sinceIso) : []; const commits = headSha ? logDbCommitsSince(sinceIso) : [];
const pairs = []; const pairs = [];
if (commits.length) { if (commits.length) {
const first = commits[0]; const first = commits[0];
const parent = firstParentSha(first.sha); const parent = firstParentSha(first.sha);
pairs.push({ pairs.push({
fromSha: parent || "", fromSha: parent || "",
toSha: first.sha, toSha: first.sha,
ts: first.ts, ts: first.ts,
date: first.date, date: first.date,
}); });
for (let i = 1; i < commits.length; i++) { for (let i = 1; i < commits.length; i++) {
pairs.push({ pairs.push({
fromSha: commits[i - 1].sha, fromSha: commits[i - 1].sha,
toSha: commits[i].sha, toSha: commits[i].sha,
ts: commits[i].ts, ts: commits[i].ts,
date: commits[i].date, date: commits[i].date,
}); });
} }
} }
if (headSha) { if (headSha) {
pairs.push({ pairs.push({
fromSha: headSha, fromSha: headSha,
toSha: "WORKTREE", toSha: "WORKTREE",
ts: now.toISOString(), ts: now.toISOString(),
date: dateOnly(now.toISOString()), date: dateOnly(now.toISOString()),
}); });
} }
function isSmwsBottle(storeLabel, it) { function isSmwsBottle(storeLabel, it) {
const hay = [ const hay = [storeLabel, it?.name, it?.url]
storeLabel, .map((x) => String(x || ""))
it?.name, .join(" | ")
it?.url, .toLowerCase();
] return hay.includes("smws") || hay.includes("scotch malt whisky society");
.map((x) => String(x || "")) }
.join(" | ")
.toLowerCase();
return hay.includes("smws") || hay.includes("scotch malt whisky society");
}
for (const p of pairs) { for (const p of pairs) {
const fromSha = p.fromSha; const fromSha = p.fromSha;
const toSha = p.toSha; const toSha = p.toSha;
const ts = p.ts; const ts = p.ts;
const d = p.date; const d = p.date;
const files = listChangedDbFiles(fromSha, toSha); const files = listChangedDbFiles(fromSha, toSha);
if (!files.length) continue; if (!files.length) continue;
for (const file of files) { for (const file of files) {
let prevObj = null; let prevObj = null;
let nextObj = null; let nextObj = null;
if (toSha === "WORKTREE") { if (toSha === "WORKTREE") {
prevObj = fromSha ? gitShowJson(fromSha, file) : null; prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = readJsonFileOrNull(path.join(repoRoot, file)); nextObj = readJsonFileOrNull(path.join(repoRoot, file));
} else { } else {
prevObj = fromSha ? gitShowJson(fromSha, file) : null; prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = gitShowJson(toSha, file); nextObj = gitShowJson(toSha, file);
} }
const nextExists = const nextExists =
toSha === "WORKTREE" toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file);
? fs.existsSync(path.join(repoRoot, file)) if (!nextExists) continue;
: gitFileExistsAtSha(toSha, file);
if (!nextExists) continue;
if (!prevObj && !nextObj) continue; if (!prevObj && !nextObj) continue;
const storeLabel = String( const storeLabel = String(
nextObj?.storeLabel || nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "",
nextObj?.store || );
prevObj?.storeLabel || const categoryLabel = String(
prevObj?.store || nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "",
"" );
);
const categoryLabel = String(
nextObj?.categoryLabel ||
nextObj?.category ||
prevObj?.categoryLabel ||
prevObj?.category ||
""
);
const isNewStoreFile = const isNewStoreFile =
Boolean(fromSha) && Boolean(fromSha) &&
!gitFileExistsAtSha(fromSha, file) && !gitFileExistsAtSha(fromSha, file) &&
(toSha === "WORKTREE" (toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file));
? fs.existsSync(path.join(repoRoot, file))
: gitFileExistsAtSha(toSha, file));
let { newItems, restoredItems, removedItems, priceChanges } = diffDb( let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
prevObj,
nextObj
);
if (isNewStoreFile) { if (isNewStoreFile) {
newItems = []; newItems = [];
restoredItems = []; restoredItems = [];
} }
for (const it of newItems) { for (const it of newItems) {
if (isSmwsBottle(storeLabel, it)) continue; if (isSmwsBottle(storeLabel, it)) continue;
items.push({ items.push({
ts, ts,
date: d, date: d,
fromSha: fromSha || "", fromSha: fromSha || "",
toSha, toSha,
kind: "new", kind: "new",
sku: it.sku, sku: it.sku,
name: it.name, name: it.name,
storeLabel, storeLabel,
categoryLabel, categoryLabel,
price: normPriceStr(it.price), price: normPriceStr(it.price),
url: it.url, url: it.url,
dbFile: file, dbFile: file,
}); });
} }
for (const it of restoredItems) { for (const it of restoredItems) {
items.push({ items.push({
ts, ts,
date: d, date: d,
fromSha: fromSha || "", fromSha: fromSha || "",
toSha, toSha,
kind: "restored", kind: "restored",
sku: it.sku, sku: it.sku,
name: it.name, name: it.name,
storeLabel, storeLabel,
categoryLabel, categoryLabel,
price: normPriceStr(it.price), price: normPriceStr(it.price),
url: it.url, url: it.url,
dbFile: file, dbFile: file,
}); });
} }
for (const it of removedItems) { for (const it of removedItems) {
items.push({ items.push({
ts, ts,
date: d, date: d,
fromSha: fromSha || "", fromSha: fromSha || "",
toSha, toSha,
kind: "removed", kind: "removed",
sku: it.sku, sku: it.sku,
name: it.name, name: it.name,
storeLabel, storeLabel,
categoryLabel, categoryLabel,
price: normPriceStr(it.price), price: normPriceStr(it.price),
url: it.url, url: it.url,
dbFile: file, dbFile: file,
}); });
} }
for (const u of priceChanges) { for (const u of priceChanges) {
items.push({ items.push({
ts, ts,
date: d, date: d,
fromSha: fromSha || "", fromSha: fromSha || "",
toSha, toSha,
kind: u.kind, kind: u.kind,
sku: u.sku, sku: u.sku,
name: u.name, name: u.name,
storeLabel, storeLabel,
categoryLabel, categoryLabel,
oldPrice: normPriceStr(u.oldPrice), oldPrice: normPriceStr(u.oldPrice),
newPrice: normPriceStr(u.newPrice), newPrice: normPriceStr(u.newPrice),
url: u.url, url: u.url,
dbFile: file, dbFile: file,
}); });
} }
} }
} }
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts))); items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
const trimmed = items.slice(0, maxItems); const trimmed = items.slice(0, maxItems);
const payload = { const payload = {
generatedAt: now.toISOString(), generatedAt: now.toISOString(),
windowDays, windowDays,
since: sinceIso, since: sinceIso,
headSha, headSha,
count: trimmed.length, count: trimmed.length,
items: trimmed, items: trimmed,
}; };
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8"); fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`); process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
} }
main(); main();

View file

@ -8,24 +8,24 @@ const DB_DIR = path.join(__dirname, "../data/db");
const LINKS_FILE = path.join(__dirname, "../data/sku_links.json"); const LINKS_FILE = path.join(__dirname, "../data/sku_links.json");
function normalizeImplicitSkuKey(k) { function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim(); const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i); const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0"); if (m) return String(m[1]).padStart(6, "0");
return s; return s;
} }
// collect all valid SKUs from db files (normalized) // collect all valid SKUs from db files (normalized)
const validSkus = new Set(); const validSkus = new Set();
for (const file of fs.readdirSync(DB_DIR)) { for (const file of fs.readdirSync(DB_DIR)) {
if (!file.endsWith(".json")) continue; if (!file.endsWith(".json")) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8")); const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue; if (!Array.isArray(data.items)) continue;
for (const item of data.items) { for (const item of data.items) {
if (!item || !item.sku) continue; if (!item || !item.sku) continue;
const k = normalizeImplicitSkuKey(item.sku); const k = normalizeImplicitSkuKey(item.sku);
if (k) validSkus.add(k); if (k) validSkus.add(k);
} }
} }
// load links // load links
@ -40,40 +40,40 @@ const seen = new Set(); // dedupe after normalization
const nextLinks = []; const nextLinks = [];
for (const x of Array.isArray(linksData.links) ? linksData.links : []) { for (const x of Array.isArray(linksData.links) ? linksData.links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku); const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku); const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) { if (!a || !b) {
prunedMissing++; prunedMissing++;
continue; continue;
} }
// drop links that are now implicit (id:1234 <-> 001234 etc) // drop links that are now implicit (id:1234 <-> 001234 etc)
if (a === b) { if (a === b) {
prunedAuto++; prunedAuto++;
continue; continue;
} }
// keep only links where BOTH normalized skus exist in db // keep only links where BOTH normalized skus exist in db
if (!validSkus.has(a) || !validSkus.has(b)) { if (!validSkus.has(a) || !validSkus.has(b)) {
prunedMissing++; prunedMissing++;
continue; continue;
} }
// dedupe (undirected) after normalization // dedupe (undirected) after normalization
const key = a < b ? `${a}|${b}` : `${b}|${a}`; const key = a < b ? `${a}|${b}` : `${b}|${a}`;
if (seen.has(key)) { if (seen.has(key)) {
prunedDup++; prunedDup++;
continue; continue;
} }
seen.add(key); seen.add(key);
// preserve datestamps/metadata; just normalize the SKUs // preserve datestamps/metadata; just normalize the SKUs
nextLinks.push({ nextLinks.push({
...x, ...x,
fromSku: a, fromSku: a,
toSku: b, toSku: b,
}); });
} }
linksData.links = nextLinks; linksData.links = nextLinks;

View file

@ -12,307 +12,340 @@ const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price");
const { isoTimestampFileSafe } = require("../src/utils/time"); const { isoTimestampFileSafe } = require("../src/utils/time");
function runGit(args) { function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
} }
function gitShowText(sha, filePath) { function gitShowText(sha, filePath) {
try { try {
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" }); return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
} catch { } catch {
return null; return null;
} }
} }
function gitListDbFiles(sha, dbDirRel) { function gitListDbFiles(sha, dbDirRel) {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]); const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); const lines = out
return new Set(lines); .split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
return new Set(lines);
} }
function parseJsonOrNull(txt) { function parseJsonOrNull(txt) {
if (txt == null) return null; if (txt == null) return null;
try { try {
return JSON.parse(txt); return JSON.parse(txt);
} catch { } catch {
return null; return null;
} }
} }
function mapItemsByUrl(obj) { function mapItemsByUrl(obj) {
const m = new Map(); const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : []; const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) { for (const it of items) {
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue; if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
m.set(it.url, { m.set(it.url, {
name: String(it.name || ""), name: String(it.name || ""),
price: String(it.price || ""), price: String(it.price || ""),
sku: String(it.sku || ""), sku: String(it.sku || ""),
url: it.url, url: it.url,
removed: Boolean(it.removed), removed: Boolean(it.removed),
}); });
} }
return m; return m;
} }
function buildDiffForDb(prevObj, nextObj) { function buildDiffForDb(prevObj, nextObj) {
const prev = mapItemsByUrl(prevObj); const prev = mapItemsByUrl(prevObj);
const next = mapItemsByUrl(nextObj); const next = mapItemsByUrl(nextObj);
const urls = new Set([...prev.keys(), ...next.keys()]); const urls = new Set([...prev.keys(), ...next.keys()]);
const newItems = []; const newItems = [];
const restoredItems = []; const restoredItems = [];
const removedItems = []; const removedItems = [];
const updatedItems = []; const updatedItems = [];
for (const url of urls) { for (const url of urls) {
const a = prev.get(url); const a = prev.get(url);
const b = next.get(url); const b = next.get(url);
const aExists = Boolean(a); const aExists = Boolean(a);
const bExists = Boolean(b); const bExists = Boolean(b);
const aRemoved = Boolean(a?.removed); const aRemoved = Boolean(a?.removed);
const bRemoved = Boolean(b?.removed); const bRemoved = Boolean(b?.removed);
if (!aExists && bExists && !bRemoved) { if (!aExists && bExists && !bRemoved) {
newItems.push({ ...b }); newItems.push({ ...b });
continue; continue;
} }
if (aExists && aRemoved && bExists && !bRemoved) { if (aExists && aRemoved && bExists && !bRemoved) {
restoredItems.push({ ...b }); restoredItems.push({ ...b });
continue; continue;
} }
if (aExists && !aRemoved && (!bExists || bRemoved)) { if (aExists && !aRemoved && (!bExists || bRemoved)) {
removedItems.push({ ...a }); removedItems.push({ ...a });
continue; continue;
} }
if (aExists && bExists && !aRemoved && !bRemoved) { if (aExists && bExists && !aRemoved && !bRemoved) {
const aP = normPrice(a.price); const aP = normPrice(a.price);
const bP = normPrice(b.price); const bP = normPrice(b.price);
if (aP !== bP) { if (aP !== bP) {
updatedItems.push({ updatedItems.push({
name: b.name || a.name || "", name: b.name || a.name || "",
sku: normalizeCspc(b.sku || a.sku || ""), sku: normalizeCspc(b.sku || a.sku || ""),
oldPrice: a.price || "", oldPrice: a.price || "",
newPrice: b.price || "", newPrice: b.price || "",
url, url,
}); });
} }
} }
} }
return { newItems, restoredItems, removedItems, updatedItems }; return { newItems, restoredItems, removedItems, updatedItems };
} }
function parseArgs(argv) { function parseArgs(argv) {
const flags = new Set(); const flags = new Set();
const kv = new Map(); const kv = new Map();
const positional = []; const positional = [];
for (let i = 0; i < argv.length; i++) { for (let i = 0; i < argv.length; i++) {
const a = argv[i]; const a = argv[i];
if (!a.startsWith("-")) { if (!a.startsWith("-")) {
positional.push(a); positional.push(a);
continue; continue;
} }
if (a === "--no-color") { if (a === "--no-color") {
flags.add("no-color"); flags.add("no-color");
continue; continue;
} }
if (a === "--color") { if (a === "--color") {
flags.add("color"); flags.add("color");
continue; continue;
} }
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) { if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
kv.set(a, argv[i + 1]); kv.set(a, argv[i + 1]);
i++; i++;
continue; continue;
} }
flags.add(a); flags.add(a);
} }
const fromSha = positional[0] || ""; const fromSha = positional[0] || "";
const toSha = positional[1] || ""; const toSha = positional[1] || "";
const dbDir = kv.get("--db-dir") || "data/db"; const dbDir = kv.get("--db-dir") || "data/db";
const outFile = kv.get("--out") || ""; const outFile = kv.get("--out") || "";
return { fromSha, toSha, dbDir, outFile, flags }; return { fromSha, toSha, dbDir, outFile, flags };
} }
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) { function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
const paint = (s, code) => color(s, code, colorize); const paint = (s, code) => color(s, code, colorize);
let out = ""; let out = "";
const ln = (s = "") => { const ln = (s = "") => {
out += String(s) + "\n"; out += String(s) + "\n";
}; };
ln(paint("========== DIFF REPORT ==========", C.bold)); ln(paint("========== DIFF REPORT ==========", C.bold));
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`); ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
ln( ln(
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}` `${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`,
); );
ln(""); ln("");
const rows = diffReport.categories; const rows = diffReport.categories;
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12)); const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
ln(paint("Per-category summary:", C.bold)); ln(paint("Per-category summary:", C.bold));
ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`); ln(
ln(`${"-".repeat(catW)} ---- ---- ---- ----`); `${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`,
for (const r of rows) { );
ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`); ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
} for (const r of rows) {
ln(""); ln(
`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`,
);
}
ln("");
const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length)); const labelW = Math.max(
16,
...diffReport.newItems.map((x) => x.catLabel.length),
...diffReport.restoredItems.map((x) => x.catLabel.length),
...diffReport.removedItems.map((x) => x.catLabel.length),
...diffReport.updatedItems.map((x) => x.catLabel.length),
);
const skuInline = (sku) => { const skuInline = (sku) => {
const s = normalizeCspc(sku); const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : ""; return s ? paint(` ${s}`, C.gray) : "";
}; };
if (diffReport.newItems.length) { if (diffReport.newItems.length) {
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green)); ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); ln(
ln(` ${paint(it.url, C.dim)}`); `${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
} );
ln(""); ln(` ${paint(it.url, C.dim)}`);
} }
ln("");
}
if (diffReport.restoredItems.length) { if (diffReport.restoredItems.length) {
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green)); ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of diffReport.restoredItems.sort((a, b) =>
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); (a.catLabel + a.name).localeCompare(b.catLabel + b.name),
ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); )) {
ln(` ${paint(it.url, C.dim)}`); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
} ln(
ln(""); `${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
} );
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.removedItems.length) { if (diffReport.removedItems.length) {
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow)); ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const it of diffReport.removedItems.sort((a, b) =>
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); (a.catLabel + a.name).localeCompare(b.catLabel + b.name),
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); )) {
ln(` ${paint(it.url, C.dim)}`); const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
} ln(
ln(""); `${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
} );
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.updatedItems.length) { if (diffReport.updatedItems.length) {
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan)); ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { for (const u of diffReport.updatedItems.sort((a, b) =>
const oldRaw = u.oldPrice || ""; (a.catLabel + a.name).localeCompare(b.catLabel + b.name),
const newRaw = u.newPrice || ""; )) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw); const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw); const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray); const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)"; let newP = newRaw ? newRaw : "(no price)";
let offTag = ""; let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) { if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) newP = paint(newP, C.red); if (newN > oldN) newP = paint(newP, C.red);
else if (newN < oldN) { else if (newN < oldN) {
newP = paint(newP, C.green); newP = paint(newP, C.green);
const pct = salePctOff(oldRaw, newRaw); const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green); if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else newP = paint(newP, C.cyan); } else newP = paint(newP, C.cyan);
} else newP = paint(newP, C.cyan); } else newP = paint(newP, C.cyan);
ln( ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}` `${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`,
); );
ln(` ${paint(u.url, C.dim)}`); ln(` ${paint(u.url, C.dim)}`);
} }
ln(""); ln("");
} }
ln(paint("======== END DIFF REPORT ========", C.bold)); ln(paint("======== END DIFF REPORT ========", C.bold));
return out; return out;
} }
async function main() { async function main() {
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2)); const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
if (!fromSha || !toSha) { if (!fromSha || !toSha) {
console.error(`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`); console.error(
process.exitCode = 2; `Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`,
return; );
} process.exitCode = 2;
return;
}
// If user provides short SHAs, git accepts them. // If user provides short SHAs, git accepts them.
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY); const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
const filesA = gitListDbFiles(fromSha, dbDir); const filesA = gitListDbFiles(fromSha, dbDir);
const filesB = gitListDbFiles(toSha, dbDir); const filesB = gitListDbFiles(toSha, dbDir);
const files = new Set([...filesA, ...filesB]); const files = new Set([...filesA, ...filesB]);
const diffReport = { const diffReport = {
categories: [], categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 }, totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
newItems: [], newItems: [],
restoredItems: [], restoredItems: [],
removedItems: [], removedItems: [],
updatedItems: [], updatedItems: [],
}; };
for (const file of [...files].sort()) { for (const file of [...files].sort()) {
const prevObj = parseJsonOrNull(gitShowText(fromSha, file)); const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
const nextObj = parseJsonOrNull(gitShowText(toSha, file)); const nextObj = parseJsonOrNull(gitShowText(toSha, file));
const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?"); const storeLabel = String(
const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file)); nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?",
const catLabelFull = `${storeLabel} | ${catLabel}`; );
const catLabel = String(
nextObj?.categoryLabel ||
prevObj?.categoryLabel ||
nextObj?.category ||
prevObj?.category ||
path.basename(file),
);
const catLabelFull = `${storeLabel} | ${catLabel}`;
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj); const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
diffReport.categories.push({ diffReport.categories.push({
catLabel: catLabelFull, catLabel: catLabelFull,
newCount: newItems.length, newCount: newItems.length,
restoredCount: restoredItems.length, restoredCount: restoredItems.length,
removedCount: removedItems.length, removedCount: removedItems.length,
updatedCount: updatedItems.length, updatedCount: updatedItems.length,
}); });
diffReport.totals.newCount += newItems.length; diffReport.totals.newCount += newItems.length;
diffReport.totals.restoredCount += restoredItems.length; diffReport.totals.restoredCount += restoredItems.length;
diffReport.totals.removedCount += removedItems.length; diffReport.totals.removedCount += removedItems.length;
diffReport.totals.updatedCount += updatedItems.length; diffReport.totals.updatedCount += updatedItems.length;
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it }); for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it }); for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it }); for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u }); for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
} }
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize }); const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
process.stdout.write(reportText); process.stdout.write(reportText);
const outPath = outFile const outPath = outFile ? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile)) : "";
? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile))
: "";
if (outPath) { if (outPath) {
fs.mkdirSync(path.dirname(outPath), { recursive: true }); fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8"); fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
} }
} }
main().catch((e) => { main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e); const msg = e && e.stack ? e.stack : String(e);
console.error(msg); console.error(msg);
process.exitCode = 1; process.exitCode = 1;
}); });

View file

@ -13,31 +13,31 @@ const includeLinked = process.argv.includes("--include-linked");
// load linked SKUs // load linked SKUs
const linkedSkus = new Set(); const linkedSkus = new Set();
if (!includeLinked && fs.existsSync(LINKS_FILE)) { if (!includeLinked && fs.existsSync(LINKS_FILE)) {
const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8")); const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8"));
for (const { fromSku, toSku } of links) { for (const { fromSku, toSku } of links) {
linkedSkus.add(String(fromSku)); linkedSkus.add(String(fromSku));
linkedSkus.add(String(toSku)); linkedSkus.add(String(toSku));
} }
} }
for (const file of fs.readdirSync(DB_DIR)) { for (const file of fs.readdirSync(DB_DIR)) {
if (!file.endsWith(".json")) continue; if (!file.endsWith(".json")) continue;
if (!includeKegNCork && file.startsWith("kegncork__")) continue; if (!includeKegNCork && file.startsWith("kegncork__")) continue;
if (!includeCoop && file.startsWith("coop__")) continue; if (!includeCoop && file.startsWith("coop__")) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8")); const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue; if (!Array.isArray(data.items)) continue;
for (const { sku, url, removed } of data.items) { for (const { sku, url, removed } of data.items) {
if ( if (
removed === false && removed === false &&
typeof sku === "string" && typeof sku === "string" &&
sku.startsWith("u:") && sku.startsWith("u:") &&
url && url &&
(includeLinked || !linkedSkus.has(sku)) (includeLinked || !linkedSkus.has(sku))
) { ) {
console.log(url); console.log(url);
} }
} }
} }

File diff suppressed because it is too large Load diff

View file

@ -4,12 +4,12 @@ import path from "node:path";
import { execSync } from "node:child_process"; import { execSync } from "node:child_process";
function die(msg) { function die(msg) {
console.error(msg); console.error(msg);
process.exit(1); process.exit(1);
} }
function sh(cmd) { function sh(cmd) {
return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim(); return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim();
} }
const ISSUE_BODY = process.env.ISSUE_BODY || ""; const ISSUE_BODY = process.env.ISSUE_BODY || "";
@ -20,16 +20,14 @@ const REPO = process.env.REPO || "";
if (!ISSUE_NUMBER) die("Missing ISSUE_NUMBER"); if (!ISSUE_NUMBER) die("Missing ISSUE_NUMBER");
if (!REPO) die("Missing REPO"); if (!REPO) die("Missing REPO");
const m = ISSUE_BODY.match( const m = ISSUE_BODY.match(/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/);
/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/
);
if (!m) die("No stviz payload found in issue body."); if (!m) die("No stviz payload found in issue body.");
let payload; let payload;
try { try {
payload = JSON.parse(m[1]); payload = JSON.parse(m[1]);
} catch (e) { } catch (e) {
die(`Invalid JSON payload: ${e?.message || e}`); die(`Invalid JSON payload: ${e?.message || e}`);
} }
if (payload?.schema !== "stviz-sku-edits-v1") die("Unsupported payload schema."); if (payload?.schema !== "stviz-sku-edits-v1") die("Unsupported payload schema.");
@ -38,259 +36,246 @@ const linksIn = Array.isArray(payload?.links) ? payload.links : [];
const ignoresIn = Array.isArray(payload?.ignores) ? payload.ignores : []; const ignoresIn = Array.isArray(payload?.ignores) ? payload.ignores : [];
function normSku(s) { function normSku(s) {
return String(s || "").trim(); return String(s || "").trim();
} }
function linkKeyFrom(a, b) { function linkKeyFrom(a, b) {
const x = normSku(a); const x = normSku(a);
const y = normSku(b); const y = normSku(b);
return x && y && x !== y ? `${x}${y}` : ""; return x && y && x !== y ? `${x}${y}` : "";
} }
function linkKey(x) { function linkKey(x) {
return linkKeyFrom(x?.fromSku, x?.toSku); return linkKeyFrom(x?.fromSku, x?.toSku);
} }
function pairKey(a, b) { function pairKey(a, b) {
const x = normSku(a), const x = normSku(a),
y = normSku(b); y = normSku(b);
if (!x || !y || x === y) return ""; if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`; return x < y ? `${x}|${y}` : `${y}|${x}`;
} }
/* ---------------- Minimal, merge-friendly JSON array insertion ---------------- */ /* ---------------- Minimal, merge-friendly JSON array insertion ---------------- */
function findJsonArraySpan(src, propName) { function findJsonArraySpan(src, propName) {
// Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent } // Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent }
const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m"); const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m");
const mm = src.match(re); const mm = src.match(re);
if (!mm) return null; if (!mm) return null;
const fieldIndent = mm[1] || ""; const fieldIndent = mm[1] || "";
const at = mm.index || 0; const at = mm.index || 0;
const open = src.indexOf("[", at); const open = src.indexOf("[", at);
if (open < 0) return null; if (open < 0) return null;
// scan to matching ']' // scan to matching ']'
let i = open; let i = open;
let depth = 0; let depth = 0;
let inStr = false; let inStr = false;
let esc = false; let esc = false;
for (; i < src.length; i++) { for (; i < src.length; i++) {
const ch = src[i]; const ch = src[i];
if (inStr) { if (inStr) {
if (esc) { if (esc) {
esc = false; esc = false;
} else if (ch === "\\") { } else if (ch === "\\") {
esc = true; esc = true;
} else if (ch === '"') { } else if (ch === '"') {
inStr = false; inStr = false;
} }
continue; continue;
} }
if (ch === '"') { if (ch === '"') {
inStr = true; inStr = true;
continue; continue;
} }
if (ch === "[") depth++; if (ch === "[") depth++;
else if (ch === "]") { else if (ch === "]") {
depth--; depth--;
if (depth === 0) { if (depth === 0) {
const close = i; const close = i;
return { start: at, open, close, end: close + 1, fieldIndent }; return { start: at, open, close, end: close + 1, fieldIndent };
} }
} }
} }
return null; return null;
} }
function splitArrayObjectBlocks(arrayInnerText) { function splitArrayObjectBlocks(arrayInnerText) {
// arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas) // arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas)
// returns raw blocks (each block is the exact text for a JSON object, preserving formatting) // returns raw blocks (each block is the exact text for a JSON object, preserving formatting)
const blocks = []; const blocks = [];
let i = 0; let i = 0;
const s = arrayInnerText; const s = arrayInnerText;
function skipWsAndCommas() { function skipWsAndCommas() {
while (i < s.length) { while (i < s.length) {
const ch = s[i]; const ch = s[i];
if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++; if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++;
else break; else break;
} }
} }
skipWsAndCommas(); skipWsAndCommas();
while (i < s.length) { while (i < s.length) {
if (s[i] !== "{") { if (s[i] !== "{") {
// if something unexpected, advance a bit // if something unexpected, advance a bit
i++; i++;
skipWsAndCommas(); skipWsAndCommas();
continue; continue;
} }
const start = i; const start = i;
let depth = 0; let depth = 0;
let inStr = false; let inStr = false;
let esc = false; let esc = false;
for (; i < s.length; i++) { for (; i < s.length; i++) {
const ch = s[i]; const ch = s[i];
if (inStr) { if (inStr) {
if (esc) { if (esc) {
esc = false; esc = false;
} else if (ch === "\\") { } else if (ch === "\\") {
esc = true; esc = true;
} else if (ch === '"') { } else if (ch === '"') {
inStr = false; inStr = false;
} }
continue; continue;
} }
if (ch === '"') { if (ch === '"') {
inStr = true; inStr = true;
continue; continue;
} }
if (ch === "{") depth++; if (ch === "{") depth++;
else if (ch === "}") { else if (ch === "}") {
depth--; depth--;
if (depth === 0) { if (depth === 0) {
i++; // include '}' i++; // include '}'
const raw = s.slice(start, i); const raw = s.slice(start, i);
blocks.push(raw); blocks.push(raw);
break; break;
} }
} }
} }
skipWsAndCommas(); skipWsAndCommas();
} }
return blocks; return blocks;
} }
function detectItemIndent(arrayInnerText, fieldIndent) { function detectItemIndent(arrayInnerText, fieldIndent) {
// Try to infer indentation for the '{' line inside the array. // Try to infer indentation for the '{' line inside the array.
// If empty array, default to fieldIndent + 2 spaces. // If empty array, default to fieldIndent + 2 spaces.
const m = arrayInnerText.match(/\n([ \t]*)\{/); const m = arrayInnerText.match(/\n([ \t]*)\{/);
if (m) return m[1]; if (m) return m[1];
return fieldIndent + " "; return fieldIndent + " ";
} }
function makePrettyObjBlock(objIndent, obj) { function makePrettyObjBlock(objIndent, obj) {
// Match JSON.stringify(..., 2) object formatting inside arrays // Match JSON.stringify(..., 2) object formatting inside arrays
const a = objIndent; const a = objIndent;
const b = objIndent + " "; const b = objIndent + " ";
const fromSku = normSku(obj?.fromSku); const fromSku = normSku(obj?.fromSku);
const toSku = normSku(obj?.toSku); const toSku = normSku(obj?.toSku);
const skuA = normSku(obj?.skuA); const skuA = normSku(obj?.skuA);
const skuB = normSku(obj?.skuB); const skuB = normSku(obj?.skuB);
if (fromSku && toSku) { if (fromSku && toSku) {
return ( return (
`${a}{\n` + `${a}{\n` +
`${b}"fromSku": ${JSON.stringify(fromSku)},\n` + `${b}"fromSku": ${JSON.stringify(fromSku)},\n` +
`${b}"toSku": ${JSON.stringify(toSku)}\n` + `${b}"toSku": ${JSON.stringify(toSku)}\n` +
`${a}}` `${a}}`
); );
} }
if (skuA && skuB) { if (skuA && skuB) {
return ( return `${a}{\n` + `${b}"skuA": ${JSON.stringify(skuA)},\n` + `${b}"skuB": ${JSON.stringify(skuB)}\n` + `${a}}`;
`${a}{\n` + }
`${b}"skuA": ${JSON.stringify(skuA)},\n` +
`${b}"skuB": ${JSON.stringify(skuB)}\n` +
`${a}}`
);
}
return `${a}{}`; return `${a}{}`;
} }
function applyInsertionsToArrayText({ function applyInsertionsToArrayText({ src, propName, incoming, keyFn, normalizeFn }) {
src, const span = findJsonArraySpan(src, propName);
propName, if (!span) die(`Could not find "${propName}" array in ${filePath}`);
incoming,
keyFn,
normalizeFn,
}) {
const span = findJsonArraySpan(src, propName);
if (!span) die(`Could not find "${propName}" array in ${filePath}`);
const before = src.slice(0, span.open + 1); // includes '[' const before = src.slice(0, span.open + 1); // includes '['
const inner = src.slice(span.open + 1, span.close); // between [ and ] const inner = src.slice(span.open + 1, span.close); // between [ and ]
const after = src.slice(span.close); // starts with ']' const after = src.slice(span.close); // starts with ']'
const itemIndent = detectItemIndent(inner, span.fieldIndent); const itemIndent = detectItemIndent(inner, span.fieldIndent);
// Parse existing objects to build a dedupe set (does NOT modify inner text) // Parse existing objects to build a dedupe set (does NOT modify inner text)
const rawBlocks = splitArrayObjectBlocks(inner); const rawBlocks = splitArrayObjectBlocks(inner);
const seen = new Set(); const seen = new Set();
for (const raw of rawBlocks) { for (const raw of rawBlocks) {
try { try {
const obj = JSON.parse(raw); const obj = JSON.parse(raw);
const k = keyFn(obj); const k = keyFn(obj);
if (k) seen.add(k); if (k) seen.add(k);
} catch { } catch {
// ignore unparsable blocks for dedupe purposes // ignore unparsable blocks for dedupe purposes
} }
} }
const toAdd = []; const toAdd = [];
for (const x of incoming) { for (const x of incoming) {
const nx = normalizeFn(x); const nx = normalizeFn(x);
const k = keyFn(nx); const k = keyFn(nx);
if (!k || seen.has(k)) continue; if (!k || seen.has(k)) continue;
seen.add(k); seen.add(k);
toAdd.push(nx); toAdd.push(nx);
} }
if (!toAdd.length) return src; if (!toAdd.length) return src;
// Deterministic order for new items only (doesn't reorder existing) // Deterministic order for new items only (doesn't reorder existing)
const addBlocks = toAdd const addBlocks = toAdd
.map((obj) => ({ obj, key: keyFn(obj) })) .map((obj) => ({ obj, key: keyFn(obj) }))
.sort((a, b) => String(a.key).localeCompare(String(b.key))) .sort((a, b) => String(a.key).localeCompare(String(b.key)))
.map((x) => makePrettyObjBlock(itemIndent, x.obj)); .map((x) => makePrettyObjBlock(itemIndent, x.obj));
const wasInlineEmpty = /^\s*$/.test(inner); const wasInlineEmpty = /^\s*$/.test(inner);
let newInner; let newInner;
if (wasInlineEmpty) { if (wasInlineEmpty) {
// "links": [] -> pretty multiline // "links": [] -> pretty multiline
newInner = newInner = "\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent;
"\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent; } else {
} else { // Keep existing whitespace EXACTLY; append before trailing whitespace
// Keep existing whitespace EXACTLY; append before trailing whitespace const m = inner.match(/\s*$/);
const m = inner.match(/\s*$/); const tail = m ? m[0] : "";
const tail = m ? m[0] : ""; const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
newInner = body + ",\n" + addBlocks.join(",\n") + tail; newInner = body + ",\n" + addBlocks.join(",\n") + tail;
} }
return before + newInner + after;
}
return before + newInner + after;
}
/* ---------------- Apply edits ---------------- */ /* ---------------- Apply edits ---------------- */
const filePath = path.join("data", "sku_links.json"); const filePath = path.join("data", "sku_links.json");
function ensureFileExists() { function ensureFileExists() {
if (fs.existsSync(filePath)) return; if (fs.existsSync(filePath)) return;
fs.mkdirSync(path.dirname(filePath), { recursive: true }); fs.mkdirSync(path.dirname(filePath), { recursive: true });
// Create with stable formatting; generatedAt intentionally blank (we do not mutate it later) // Create with stable formatting; generatedAt intentionally blank (we do not mutate it later)
const seed = { generatedAt: "", links: [], ignores: [] }; const seed = { generatedAt: "", links: [], ignores: [] };
fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8"); fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8");
} }
ensureFileExists(); ensureFileExists();
@ -301,42 +286,42 @@ let text = fs.readFileSync(filePath, "utf8");
// Also: do NOT re-stringify entire JSON; we only surgically insert into arrays. // Also: do NOT re-stringify entire JSON; we only surgically insert into arrays.
const normLinksIn = linksIn.map((x) => ({ const normLinksIn = linksIn.map((x) => ({
fromSku: normSku(x?.fromSku), fromSku: normSku(x?.fromSku),
toSku: normSku(x?.toSku), toSku: normSku(x?.toSku),
})); }));
const normIgnoresIn = ignoresIn.map((x) => { const normIgnoresIn = ignoresIn.map((x) => {
const a = normSku(x?.skuA); const a = normSku(x?.skuA);
const b = normSku(x?.skuB); const b = normSku(x?.skuB);
const k = pairKey(a, b); const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" }; if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|"); const [p, q] = k.split("|");
return { skuA: p, skuB: q }; return { skuA: p, skuB: q };
}); });
// Insert links (sorted by from→to) // Insert links (sorted by from→to)
text = applyInsertionsToArrayText({ text = applyInsertionsToArrayText({
src: text, src: text,
propName: "links", propName: "links",
incoming: normLinksIn, incoming: normLinksIn,
keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku), keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku),
normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }), normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }),
}); });
// Insert ignores (sorted by canonical pair) // Insert ignores (sorted by canonical pair)
text = applyInsertionsToArrayText({ text = applyInsertionsToArrayText({
src: text, src: text,
propName: "ignores", propName: "ignores",
incoming: normIgnoresIn, incoming: normIgnoresIn,
keyFn: (o) => pairKey(o?.skuA, o?.skuB), keyFn: (o) => pairKey(o?.skuA, o?.skuB),
normalizeFn: (o) => { normalizeFn: (o) => {
const a = normSku(o?.skuA); const a = normSku(o?.skuA);
const b = normSku(o?.skuB); const b = normSku(o?.skuB);
const k = pairKey(a, b); const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" }; if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|"); const [p, q] = k.split("|");
return { skuA: p, skuB: q }; return { skuA: p, skuB: q };
}, },
}); });
fs.writeFileSync(filePath, text, "utf8"); fs.writeFileSync(filePath, text, "utf8");
@ -345,10 +330,10 @@ fs.writeFileSync(filePath, text, "utf8");
// Ensure git identity is set for commit (Actions runners often lack it) // Ensure git identity is set for commit (Actions runners often lack it)
try { try {
sh(`git config user.name "github-actions[bot]"`); sh(`git config user.name "github-actions[bot]"`);
sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`); sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`);
} catch { } catch {
// ignore // ignore
} }
const ts = new Date().toISOString().replace(/[:.]/g, "-"); const ts = new Date().toISOString().replace(/[:.]/g, "-");
@ -360,8 +345,8 @@ sh(`git add "${filePath}"`);
// If no diffs (all edits were duplicates), don't create PR or close issue. // If no diffs (all edits were duplicates), don't create PR or close issue.
const diff = sh(`git status --porcelain "${filePath}"`); const diff = sh(`git status --porcelain "${filePath}"`);
if (!diff) { if (!diff) {
console.log("No changes to commit (all edits already present). Leaving issue open."); console.log("No changes to commit (all edits already present). Leaving issue open.");
process.exit(0); process.exit(0);
} }
sh(`git commit -m "stviz: apply sku edits (issue #${ISSUE_NUMBER})"`); sh(`git commit -m "stviz: apply sku edits (issue #${ISSUE_NUMBER})"`);
@ -371,21 +356,20 @@ const prTitle = `STVIZ: SKU link updates (issue #${ISSUE_NUMBER})`;
const prBody = `Automated PR created from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}`; const prBody = `Automated PR created from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}`;
function extractPrUrl(out) { function extractPrUrl(out) {
// gh pr create usually prints the PR URL to stdout; be robust in case extra text appears. // gh pr create usually prints the PR URL to stdout; be robust in case extra text appears.
const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/); const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/);
if (!m) die(`Could not find PR URL in gh output:\n${out}`); if (!m) die(`Could not find PR URL in gh output:\n${out}`);
return m[0]; return m[0];
} }
// Create PR and capture URL/number without relying on unsupported flags // Create PR and capture URL/number without relying on unsupported flags
const prCreateOut = sh( const prCreateOut = sh(
`gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"` `gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"`,
); );
const prUrl = extractPrUrl(prCreateOut); const prUrl = extractPrUrl(prCreateOut);
const prNumber = sh(`gh -R "${REPO}" pr view "${prUrl}" --json number --jq .number`); const prNumber = sh(`gh -R "${REPO}" pr view "${prUrl}" --json number --jq .number`);
sh( sh(
`gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"` `gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"`,
); );

View file

@ -4,7 +4,7 @@
const { main } = require("./src/main"); const { main } = require("./src/main");
main().catch((e) => { main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e); const msg = e && e.stack ? e.stack : String(e);
console.error(msg); console.error(msg);
process.exitCode = 1; process.exitCode = 1;
}); });

View file

@ -1,62 +1,64 @@
export async function fetchJson(url) { export async function fetchJson(url) {
const res = await fetch(url, { cache: "no-store" }); const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.json(); return await res.json();
} }
export async function fetchText(url) { export async function fetchText(url) {
const res = await fetch(url, { cache: "no-store" }); const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.text(); return await res.text();
} }
export function inferGithubOwnerRepo() { export function inferGithubOwnerRepo() {
const host = location.hostname || ""; const host = location.hostname || "";
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i); const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
if (m) { if (m) {
const owner = m[1]; const owner = m[1];
const parts = (location.pathname || "/").split("/").filter(Boolean); const parts = (location.pathname || "/").split("/").filter(Boolean);
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`; const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
return { owner, repo }; return { owner, repo };
} }
return { owner: "brennanwilkes", repo: "spirit-tracker" }; return { owner: "brennanwilkes", repo: "spirit-tracker" };
} }
export function isLocalWriteMode() { export function isLocalWriteMode() {
const h = String(location.hostname || "").toLowerCase(); const h = String(location.hostname || "").toLowerCase();
return (location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost"); return (
(location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost")
);
} }
/* ---- Local disk-backed SKU link API (only on viz/serve.js) ---- */ /* ---- Local disk-backed SKU link API (only on viz/serve.js) ---- */
export async function apiReadSkuMetaFromLocalServer() { export async function apiReadSkuMetaFromLocalServer() {
const r = await fetch("/__stviz/sku-links", { cache: "no-store" }); const r = await fetch("/__stviz/sku-links", { cache: "no-store" });
if (!r.ok) throw new Error(`HTTP ${r.status}`); if (!r.ok) throw new Error(`HTTP ${r.status}`);
const j = await r.json(); const j = await r.json();
return { return {
links: Array.isArray(j?.links) ? j.links : [], links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [], ignores: Array.isArray(j?.ignores) ? j.ignores : [],
}; };
} }
export async function apiWriteSkuLink(fromSku, toSku) { export async function apiWriteSkuLink(fromSku, toSku) {
const res = await fetch("/__stviz/sku-links", { const res = await fetch("/__stviz/sku-links", {
method: "POST", method: "POST",
headers: { "content-type": "application/json" }, headers: { "content-type": "application/json" },
body: JSON.stringify({ fromSku, toSku }), body: JSON.stringify({ fromSku, toSku }),
}); });
if (!res.ok) throw new Error(`HTTP ${res.status}`); if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json(); return await res.json();
} }
export async function apiWriteSkuIgnore(skuA, skuB) { export async function apiWriteSkuIgnore(skuA, skuB) {
const res = await fetch("/__stviz/sku-ignores", { const res = await fetch("/__stviz/sku-ignores", {
method: "POST", method: "POST",
headers: { "content-type": "application/json" }, headers: { "content-type": "application/json" },
body: JSON.stringify({ skuA, skuB }), body: JSON.stringify({ skuA, skuB }),
}); });
if (!res.ok) throw new Error(`HTTP ${res.status}`); if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json(); return await res.json();
} }
/** /**
@ -65,50 +67,50 @@ export async function apiWriteSkuIgnore(skuA, skuB) {
* - On local server: reads via /__stviz/sku-links (disk) * - On local server: reads via /__stviz/sku-links (disk)
*/ */
export async function loadSkuMetaBestEffort() { export async function loadSkuMetaBestEffort() {
// 1) GitHub Pages / static deploy inside viz/ // 1) GitHub Pages / static deploy inside viz/
try { try {
const j = await fetchJson("./data/sku_links.json"); const j = await fetchJson("./data/sku_links.json");
return { return {
links: Array.isArray(j?.links) ? j.links : [], links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [], ignores: Array.isArray(j?.ignores) ? j.ignores : [],
}; };
} catch {} } catch {}
// 2) alternate static path (in case you later serve viz under a subpath) // 2) alternate static path (in case you later serve viz under a subpath)
try { try {
const j = await fetchJson("/data/sku_links.json"); const j = await fetchJson("/data/sku_links.json");
return { return {
links: Array.isArray(j?.links) ? j.links : [], links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [], ignores: Array.isArray(j?.ignores) ? j.ignores : [],
}; };
} catch {} } catch {}
// 3) Local server API (disk) // 3) Local server API (disk)
try { try {
return await apiReadSkuMetaFromLocalServer(); return await apiReadSkuMetaFromLocalServer();
} catch {} } catch {}
return { links: [], ignores: [] }; return { links: [], ignores: [] };
} }
/* ---- GitHub history helpers ---- */ /* ---- GitHub history helpers ---- */
export async function githubListCommits({ owner, repo, branch, path }) { export async function githubListCommits({ owner, repo, branch, path }) {
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`; const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`; const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
const page1 = await fetchJson(u1); const page1 = await fetchJson(u1);
if (Array.isArray(page1) && page1.length === 100) { if (Array.isArray(page1) && page1.length === 100) {
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`; const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
const page2 = await fetchJson(u2); const page2 = await fetchJson(u2);
return [...page1, ...(Array.isArray(page2) ? page2 : [])]; return [...page1, ...(Array.isArray(page2) ? page2 : [])];
} }
return Array.isArray(page1) ? page1 : []; return Array.isArray(page1) ? page1 : [];
} }
export async function githubFetchFileAtSha({ owner, repo, sha, path }) { export async function githubFetchFileAtSha({ owner, repo, sha, path }) {
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`; const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`;
const txt = await fetchText(raw); const txt = await fetchText(raw);
return JSON.parse(txt); return JSON.parse(txt);
} }

View file

@ -3,106 +3,106 @@ import { parsePriceToNumber, keySkuForRow, normSearchText } from "./sku.js";
// Build one row per *canonical* SKU (after applying sku map) + combined searchable text // Build one row per *canonical* SKU (after applying sku map) + combined searchable text
export function aggregateBySku(listings, canonicalizeSkuFn) { export function aggregateBySku(listings, canonicalizeSkuFn) {
const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x; const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x;
const bySku = new Map(); const bySku = new Map();
for (const r of listings) { for (const r of listings) {
const rawSku = keySkuForRow(r); const rawSku = keySkuForRow(r);
const sku = canon(rawSku); const sku = canon(rawSku);
const name = String(r?.name || ""); const name = String(r?.name || "");
const url = String(r?.url || ""); const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || ""); const storeLabel = String(r?.storeLabel || r?.store || "");
const removed = Boolean(r?.removed); const removed = Boolean(r?.removed);
const img = normImg(r?.img || r?.image || r?.thumb || ""); const img = normImg(r?.img || r?.image || r?.thumb || "");
const pNum = parsePriceToNumber(r?.price); const pNum = parsePriceToNumber(r?.price);
const pStr = String(r?.price || ""); const pStr = String(r?.price || "");
let agg = bySku.get(sku); let agg = bySku.get(sku);
if (!agg) { if (!agg) {
agg = { agg = {
sku, // canonical sku sku, // canonical sku
name: name || "", name: name || "",
img: "", img: "",
cheapestPriceStr: pStr || "", cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum, cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "", cheapestStoreLabel: storeLabel || "",
stores: new Set(), // LIVE stores only stores: new Set(), // LIVE stores only
storesEver: new Set(), // live + removed presence (history) storesEver: new Set(), // live + removed presence (history)
sampleUrl: url || "", sampleUrl: url || "",
_searchParts: [], _searchParts: [],
searchText: "", searchText: "",
_imgByName: new Map(), _imgByName: new Map(),
_imgAny: "", _imgAny: "",
}; };
bySku.set(sku, agg); bySku.set(sku, agg);
} }
if (storeLabel) { if (storeLabel) {
agg.storesEver.add(storeLabel); agg.storesEver.add(storeLabel);
if (!removed) agg.stores.add(storeLabel); if (!removed) agg.stores.add(storeLabel);
} }
if (!agg.sampleUrl && url) agg.sampleUrl = url; if (!agg.sampleUrl && url) agg.sampleUrl = url;
// Keep first non-empty name, but keep thumbnail aligned to chosen name // Keep first non-empty name, but keep thumbnail aligned to chosen name
if (!agg.name && name) { if (!agg.name && name) {
agg.name = name; agg.name = name;
if (img) agg.img = img; if (img) agg.img = img;
} else if (agg.name && name === agg.name && img && !agg.img) { } else if (agg.name && name === agg.name && img && !agg.img) {
agg.img = img; agg.img = img;
} }
if (img) { if (img) {
if (!agg._imgAny) agg._imgAny = img; if (!agg._imgAny) agg._imgAny = img;
if (name) agg._imgByName.set(name, img); if (name) agg._imgByName.set(name, img);
} }
// cheapest across LIVE rows only (so removed history doesn't "win") // cheapest across LIVE rows only (so removed history doesn't "win")
if (!removed && pNum !== null) { if (!removed && pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) { if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum; agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || ""; agg.cheapestPriceStr = pStr || "";
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel; agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
} }
} }
// search parts: include canonical + raw sku so searching either works // search parts: include canonical + raw sku so searching either works
agg._searchParts.push(sku); agg._searchParts.push(sku);
if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku); if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku);
if (name) agg._searchParts.push(name); if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url); if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel); if (storeLabel) agg._searchParts.push(storeLabel);
if (removed) agg._searchParts.push("removed"); if (removed) agg._searchParts.push("removed");
} }
const out = [...bySku.values()]; const out = [...bySku.values()];
for (const it of out) { for (const it of out) {
if (!it.img) { if (!it.img) {
const m = it._imgByName; const m = it._imgByName;
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || ""; if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
else it.img = it._imgAny || ""; else it.img = it._imgAny || "";
} }
delete it._imgByName; delete it._imgByName;
delete it._imgAny; delete it._imgAny;
it.storeCount = it.stores.size; it.storeCount = it.stores.size;
it.storeCountEver = it.storesEver.size; it.storeCountEver = it.storesEver.size;
it.removedEverywhere = it.storeCount === 0; it.removedEverywhere = it.storeCount === 0;
it._searchParts.push(it.sku); it._searchParts.push(it.sku);
it._searchParts.push(it.name || ""); it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || ""); it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || ""); it._searchParts.push(it.cheapestStoreLabel || "");
it.searchText = normSearchText(it._searchParts.join(" | ")); it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts; delete it._searchParts;
} }
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku)); out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
return out; return out;
} }

View file

@ -1,59 +1,61 @@
export function esc(s) { export function esc(s) {
return String(s ?? "").replace(/[&<>"']/g, (c) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;" }[c])); return String(s ?? "").replace(
} /[&<>"']/g,
(c) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;" })[c],
);
}
export function normImg(s) { export function normImg(s) {
const v = String(s || "").trim(); const v = String(s || "").trim();
if (!v) return ""; if (!v) return "";
if (/^data:/i.test(v)) return ""; if (/^data:/i.test(v)) return "";
return v; return v;
} }
export function dateOnly(iso) { export function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
export function prettyTs(iso) { export function prettyTs(iso) {
const s = String(iso || ""); const s = String(iso || "");
if (!s) return ""; if (!s) return "";
const d0 = new Date(s); const d0 = new Date(s);
const t0 = d0.getTime(); const t0 = d0.getTime();
if (!Number.isFinite(t0)) return ""; if (!Number.isFinite(t0)) return "";
// Round to nearest hour // Round to nearest hour
const d = new Date(Math.round(t0 / 3600000) * 3600000); const d = new Date(Math.round(t0 / 3600000) * 3600000);
const parts = new Intl.DateTimeFormat("en-US", { const parts = new Intl.DateTimeFormat("en-US", {
timeZone: "America/Vancouver", timeZone: "America/Vancouver",
month: "long", month: "long",
day: "numeric", day: "numeric",
hour: "numeric", hour: "numeric",
minute: "2-digit", minute: "2-digit",
hour12: true, hour12: true,
}).formatToParts(d); }).formatToParts(d);
let month = ""; let month = "";
let day = ""; let day = "";
let hour = ""; let hour = "";
let minute = ""; let minute = "";
let dayPeriod = ""; let dayPeriod = "";
for (const p of parts) { for (const p of parts) {
if (p.type === "month") month = p.value; if (p.type === "month") month = p.value;
else if (p.type === "day") day = p.value; else if (p.type === "day") day = p.value;
else if (p.type === "hour") hour = p.value; else if (p.type === "hour") hour = p.value;
else if (p.type === "minute") minute = p.value; else if (p.type === "minute") minute = p.value;
else if (p.type === "dayPeriod") dayPeriod = p.value; else if (p.type === "dayPeriod") dayPeriod = p.value;
} }
return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`; return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`;
} }
export function renderThumbHtml(imgUrl, cls = "thumb") {
const img = normImg(imgUrl);
if (!img) return `<div class="thumbPlaceholder"></div>`;
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
}
export function renderThumbHtml(imgUrl, cls = "thumb") {
const img = normImg(imgUrl);
if (!img) return `<div class="thumbPlaceholder"></div>`;
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
}

File diff suppressed because it is too large Load diff

View file

@ -2,91 +2,88 @@
import { keySkuForRow } from "../sku.js"; import { keySkuForRow } from "../sku.js";
function isRealSkuKey(skuKey) { function isRealSkuKey(skuKey) {
const s = String(skuKey || "").trim(); const s = String(skuKey || "").trim();
return /^\d{6}$/.test(s); return /^\d{6}$/.test(s);
} }
function isSoftSkuKey(k) { function isSoftSkuKey(k) {
const s = String(k || ""); const s = String(k || "");
return s.startsWith("upc:") || s.startsWith("id:"); return s.startsWith("upc:") || s.startsWith("id:");
} }
function isUnknownSkuKey2(k) { function isUnknownSkuKey2(k) {
return String(k || "").trim().startsWith("u:"); return String(k || "")
.trim()
.startsWith("u:");
} }
function isBCStoreLabel(label) { function isBCStoreLabel(label) {
const s = String(label || "").toLowerCase(); const s = String(label || "").toLowerCase();
return ( return (
s.includes("bcl") || s.includes("bcl") ||
s.includes("strath") || s.includes("strath") ||
s.includes("gull") || s.includes("gull") ||
s.includes("legacy") || s.includes("legacy") ||
s.includes("tudor") || s.includes("tudor") ||
s.includes("vessel") || s.includes("vessel") ||
s.includes("arc") || s.includes("arc") ||
s.includes("vintagespirits") s.includes("vintagespirits")
); );
} }
function skuIsBC(allRows, skuKey) { function skuIsBC(allRows, skuKey) {
for (const r of allRows) { for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue; if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || ""); const lab = String(r.storeLabel || r.store || "");
if (isBCStoreLabel(lab)) return true; if (isBCStoreLabel(lab)) return true;
} }
return false; return false;
} }
function isABStoreLabel(label) { function isABStoreLabel(label) {
const s = String(label || "").toLowerCase(); const s = String(label || "").toLowerCase();
return ( return s.includes("alberta") || s.includes("calgary") || s.includes("edmonton") || /\bab\b/.test(s);
s.includes("alberta") ||
s.includes("calgary") ||
s.includes("edmonton") ||
/\bab\b/.test(s)
);
} }
function skuIsAB(allRows, skuKey) { function skuIsAB(allRows, skuKey) {
for (const r of allRows) { for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue; if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || ""); const lab = String(r.storeLabel || r.store || "");
if (isABStoreLabel(lab)) return true; if (isABStoreLabel(lab)) return true;
} }
return false; return false;
} }
function scoreCanonical(allRows, skuKey) { function scoreCanonical(allRows, skuKey) {
const s = String(skuKey || ""); const s = String(skuKey || "");
const real = isRealSkuKey(s) ? 1 : 0; const real = isRealSkuKey(s) ? 1 : 0;
const ab = skuIsAB(allRows, s) ? 1 : 0; const ab = skuIsAB(allRows, s) ? 1 : 0;
const bc = skuIsBC(allRows, s) ? 1 : 0; const bc = skuIsBC(allRows, s) ? 1 : 0;
const soft = isSoftSkuKey(s) ? 1 : 0; const soft = isSoftSkuKey(s) ? 1 : 0;
const unk = isUnknownSkuKey2(s) ? 1 : 0; const unk = isUnknownSkuKey2(s) ? 1 : 0;
let base = 0; let base = 0;
if (real) base = 1000; if (real) base = 1000;
else if (soft) base = 200; else if (soft) base = 200;
else if (!unk) base = 100; else if (!unk) base = 100;
else base = -1000; else base = -1000;
return base + ab * 25 - bc * 10; return base + ab * 25 - bc * 10;
} }
export function pickPreferredCanonical(allRows, skuKeys) { export function pickPreferredCanonical(allRows, skuKeys) {
let best = ""; let best = "";
let bestScore = -Infinity; let bestScore = -Infinity;
for (const k of skuKeys) { for (const k of skuKeys) {
const s = String(k || "").trim(); const s = String(k || "").trim();
if (!s) continue; if (!s) continue;
const sc = scoreCanonical(allRows, s); const sc = scoreCanonical(allRows, s);
if (sc > bestScore) { if (sc > bestScore) {
bestScore = sc; bestScore = sc;
best = s; best = s;
} else if (sc === bestScore && s && best && s < best) { } else if (sc === bestScore && s && best && s < best) {
best = s; best = s;
} }
} }
return best; return best;
} }

View file

@ -1,78 +1,77 @@
// viz/app/linker/price.js // viz/app/linker/price.js
export function buildPricePenaltyForPair({ allAgg, rules, kPerGroup = 6 }) { export function buildPricePenaltyForPair({ allAgg, rules, kPerGroup = 6 }) {
// canonSku -> sorted array of up to kPerGroup lowest prices // canonSku -> sorted array of up to kPerGroup lowest prices
const groupPrices = new Map(); const groupPrices = new Map();
function insertPrice(arr, p) { function insertPrice(arr, p) {
// keep sorted ascending, cap length // keep sorted ascending, cap length
let i = 0; let i = 0;
while (i < arr.length && arr[i] <= p) i++; while (i < arr.length && arr[i] <= p) i++;
arr.splice(i, 0, p); arr.splice(i, 0, p);
if (arr.length > kPerGroup) arr.length = kPerGroup; if (arr.length > kPerGroup) arr.length = kPerGroup;
} }
for (const it of allAgg || []) { for (const it of allAgg || []) {
if (!it) continue; if (!it) continue;
const sku = String(it.sku || ""); const sku = String(it.sku || "");
if (!sku) continue; if (!sku) continue;
const p = it.cheapestPriceNum; const p = it.cheapestPriceNum;
if (p == null || !(p > 0)) continue; if (p == null || !(p > 0)) continue;
const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku); const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku);
let arr = groupPrices.get(canon); let arr = groupPrices.get(canon);
if (!arr) groupPrices.set(canon, (arr = [])); if (!arr) groupPrices.set(canon, (arr = []));
insertPrice(arr, p); insertPrice(arr, p);
} }
function bestRelativeGap(prA, prB) { function bestRelativeGap(prA, prB) {
// min |a-b| / min(a,b) // min |a-b| / min(a,b)
let best = Infinity; let best = Infinity;
for (let i = 0; i < prA.length; i++) { for (let i = 0; i < prA.length; i++) {
const a = prA[i]; const a = prA[i];
for (let j = 0; j < prB.length; j++) { for (let j = 0; j < prB.length; j++) {
const b = prB[j]; const b = prB[j];
const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b)); const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b));
if (gap < best) best = gap; if (gap < best) best = gap;
if (best <= 0.001) return best; if (best <= 0.001) return best;
} }
} }
return best; return best;
} }
function gapToMultiplier(gap) { function gapToMultiplier(gap) {
// gap = 0.40 => 40% relative difference // gap = 0.40 => 40% relative difference
// <=35%: no penalty // <=35%: no penalty
// 35-50%: ease down to ~0.75 // 35-50%: ease down to ~0.75
// >50%: continue down gently, floor at 0.35 // >50%: continue down gently, floor at 0.35
if (!(gap >= 0)) return 1.0; if (!(gap >= 0)) return 1.0;
if (gap <= 0.35) return 1.0; if (gap <= 0.35) return 1.0;
if (gap <= 0.50) { if (gap <= 0.5) {
const t = (gap - 0.35) / 0.15; // 0..1 const t = (gap - 0.35) / 0.15; // 0..1
return 1.0 - 0.25 * t; // 1.00 -> 0.75 return 1.0 - 0.25 * t; // 1.00 -> 0.75
} }
const m = 0.75 * (0.5 / gap); const m = 0.75 * (0.5 / gap);
return Math.max(0.35, m); return Math.max(0.35, m);
} }
return function pricePenaltyForPair(aSku, bSku) { return function pricePenaltyForPair(aSku, bSku) {
const a = String(aSku || ""); const a = String(aSku || "");
const b = String(bSku || ""); const b = String(bSku || "");
if (!a || !b) return 1.0; if (!a || !b) return 1.0;
const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a); const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a);
const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b); const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b);
const prA = groupPrices.get(aCanon); const prA = groupPrices.get(aCanon);
const prB = groupPrices.get(bCanon); const prB = groupPrices.get(bCanon);
if (!prA || !prB || !prA.length || !prB.length) return 1.0; if (!prA || !prB || !prA.length || !prB.length) return 1.0;
const gap = bestRelativeGap(prA, prB); const gap = bestRelativeGap(prA, prB);
if (!isFinite(gap)) return 1.0; if (!isFinite(gap)) return 1.0;
return gapToMultiplier(gap);
};
}
return gapToMultiplier(gap);
};
}

View file

@ -3,267 +3,288 @@ import { tokenizeQuery, normSearchText } from "../sku.js";
// Ignore ultra-common / low-signal tokens in bottle names. // Ignore ultra-common / low-signal tokens in bottle names.
const SIM_STOP_TOKENS = new Set([ const SIM_STOP_TOKENS = new Set([
"the","a","an","and","of","to","in","for","with", "the",
"year","years","yr","yrs","old", "a",
"whisky","whiskey","scotch","single","malt","cask","finish","edition","release","batch","strength","abv","proof", "an",
"anniversary", "and",
"of",
"to",
"in",
"for",
"with",
"year",
"years",
"yr",
"yrs",
"old",
"whisky",
"whiskey",
"scotch",
"single",
"malt",
"cask",
"finish",
"edition",
"release",
"batch",
"strength",
"abv",
"proof",
"anniversary",
]); ]);
const SMWS_WORD_RE = /\bsmws\b/i; const SMWS_WORD_RE = /\bsmws\b/i;
const SMWS_CODE_RE = /\b(\d{1,3}\.\d{1,4})\b/; const SMWS_CODE_RE = /\b(\d{1,3}\.\d{1,4})\b/;
export function smwsKeyFromName(name) { export function smwsKeyFromName(name) {
const s = String(name || ""); const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return ""; if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE); const m = s.match(SMWS_CODE_RE);
return m ? m[1] : ""; return m ? m[1] : "";
} }
const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i; const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;
export function numKey(t) { export function numKey(t) {
const s = String(t || "").trim().toLowerCase(); const s = String(t || "")
if (!s) return ""; .trim()
if (/^\d+$/.test(s)) return s; .toLowerCase();
const m = s.match(ORDINAL_RE); if (!s) return "";
return m ? m[1] : ""; if (/^\d+$/.test(s)) return s;
const m = s.match(ORDINAL_RE);
return m ? m[1] : "";
} }
function isNumberToken(t) { function isNumberToken(t) {
return !!numKey(t); return !!numKey(t);
} }
export function extractAgeFromText(normName) { export function extractAgeFromText(normName) {
const s = String(normName || ""); const s = String(normName || "");
if (!s) return ""; if (!s) return "";
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i); const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
if (m && m[1]) return String(parseInt(m[1], 10)); if (m && m[1]) return String(parseInt(m[1], 10));
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i); const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
if (m2 && m2[1]) return String(parseInt(m2[1], 10)); if (m2 && m2[1]) return String(parseInt(m2[1], 10));
return ""; return "";
} }
export function filterSimTokens(tokens) { export function filterSimTokens(tokens) {
const out = []; const out = [];
const seen = new Set(); const seen = new Set();
const SIM_EQUIV = new Map([ const SIM_EQUIV = new Map([
["years", "yr"], ["years", "yr"],
["year", "yr"], ["year", "yr"],
["yrs", "yr"], ["yrs", "yr"],
["yr", "yr"], ["yr", "yr"],
["whiskey", "whisky"], ["whiskey", "whisky"],
["whisky", "whisky"], ["whisky", "whisky"],
["bourbon", "bourbon"], ["bourbon", "bourbon"],
]); ]);
const VOL_UNIT = new Set(["ml","l","cl","oz","liter","liters","litre","litres"]); const VOL_UNIT = new Set(["ml", "l", "cl", "oz", "liter", "liters", "litre", "litres"]);
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0% const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0%
const arr = Array.isArray(tokens) ? tokens : []; const arr = Array.isArray(tokens) ? tokens : [];
for (let i = 0; i < arr.length; i++) { for (let i = 0; i < arr.length; i++) {
const raw = arr[i]; const raw = arr[i];
let t = String(raw || "").trim().toLowerCase(); let t = String(raw || "")
if (!t) continue; .trim()
.toLowerCase();
if (!t) continue;
if (!/[a-z0-9]/i.test(t)) continue; if (!/[a-z0-9]/i.test(t)) continue;
if (VOL_INLINE_RE.test(t)) continue; if (VOL_INLINE_RE.test(t)) continue;
if (PCT_INLINE_RE.test(t)) continue; if (PCT_INLINE_RE.test(t)) continue;
t = SIM_EQUIV.get(t) || t; t = SIM_EQUIV.get(t) || t;
const nk = numKey(t); const nk = numKey(t);
if (nk) t = nk; if (nk) t = nk;
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue; if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
if (/^\d+(?:\.\d+)?$/.test(t)) { if (/^\d+(?:\.\d+)?$/.test(t)) {
const next = String(arr[i + 1] || "").trim().toLowerCase(); const next = String(arr[i + 1] || "")
const nextNorm = SIM_EQUIV.get(next) || next; .trim()
if (VOL_UNIT.has(nextNorm)) { .toLowerCase();
i++; const nextNorm = SIM_EQUIV.get(next) || next;
continue; if (VOL_UNIT.has(nextNorm)) {
} i++;
} continue;
}
}
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue; if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
if (seen.has(t)) continue; if (seen.has(t)) continue;
seen.add(t); seen.add(t);
out.push(t); out.push(t);
} }
return out; return out;
} }
export function numberMismatchPenalty(aTokens, bTokens) { export function numberMismatchPenalty(aTokens, bTokens) {
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean)); const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean)); const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
if (!aNums.size || !bNums.size) return 1.0; if (!aNums.size || !bNums.size) return 1.0;
for (const n of aNums) if (bNums.has(n)) return 1.0; for (const n of aNums) if (bNums.has(n)) return 1.0;
return 0.28; return 0.28;
} }
export function levenshtein(a, b) { export function levenshtein(a, b) {
a = String(a || ""); a = String(a || "");
b = String(b || ""); b = String(b || "");
const n = a.length, m = b.length; const n = a.length,
if (!n) return m; m = b.length;
if (!m) return n; if (!n) return m;
if (!m) return n;
const dp = new Array(m + 1); const dp = new Array(m + 1);
for (let j = 0; j <= m; j++) dp[j] = j; for (let j = 0; j <= m; j++) dp[j] = j;
for (let i = 1; i <= n; i++) { for (let i = 1; i <= n; i++) {
let prev = dp[0]; let prev = dp[0];
dp[0] = i; dp[0] = i;
const ca = a.charCodeAt(i - 1); const ca = a.charCodeAt(i - 1);
for (let j = 1; j <= m; j++) { for (let j = 1; j <= m; j++) {
const tmp = dp[j]; const tmp = dp[j];
const cost = ca === b.charCodeAt(j - 1) ? 0 : 1; const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost); dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
prev = tmp; prev = tmp;
} }
} }
return dp[m]; return dp[m];
} }
export function tokenContainmentScore(aTokens, bTokens) { export function tokenContainmentScore(aTokens, bTokens) {
const A = filterSimTokens(aTokens || []); const A = filterSimTokens(aTokens || []);
const B = filterSimTokens(bTokens || []); const B = filterSimTokens(bTokens || []);
if (!A.length || !B.length) return 0; if (!A.length || !B.length) return 0;
const aSet = new Set(A); const aSet = new Set(A);
const bSet = new Set(B); const bSet = new Set(B);
const small = aSet.size <= bSet.size ? aSet : bSet; const small = aSet.size <= bSet.size ? aSet : bSet;
const big = aSet.size <= bSet.size ? bSet : aSet; const big = aSet.size <= bSet.size ? bSet : aSet;
let hit = 0; let hit = 0;
for (const t of small) if (big.has(t)) hit++; for (const t of small) if (big.has(t)) hit++;
const recall = hit / Math.max(1, small.size); const recall = hit / Math.max(1, small.size);
const precision = hit / Math.max(1, big.size); const precision = hit / Math.max(1, big.size);
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall); const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
return f1; return f1;
} }
export function similarityScore(aName, bName) { export function similarityScore(aName, bName) {
const a = normSearchText(aName); const a = normSearchText(aName);
const b = normSearchText(bName); const b = normSearchText(bName);
if (!a || !b) return 0; if (!a || !b) return 0;
const aAge = extractAgeFromText(a); const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b); const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge); const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge; const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge; const ageMismatch = ageBoth && aAge !== bAge;
const aToksRaw = tokenizeQuery(a); const aToksRaw = tokenizeQuery(a);
const bToksRaw = tokenizeQuery(b); const bToksRaw = tokenizeQuery(b);
const aToks = filterSimTokens(aToksRaw); const aToks = filterSimTokens(aToksRaw);
const bToks = filterSimTokens(bToksRaw); const bToks = filterSimTokens(bToksRaw);
if (!aToks.length || !bToks.length) return 0; if (!aToks.length || !bToks.length) return 0;
const contain = tokenContainmentScore(aToksRaw, bToksRaw); const contain = tokenContainmentScore(aToksRaw, bToksRaw);
const aFirst = aToks[0] || ""; const aFirst = aToks[0] || "";
const bFirst = bToks[0] || ""; const bFirst = bToks[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0; const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const A = new Set(aToks.slice(1)); const A = new Set(aToks.slice(1));
const B = new Set(bToks.slice(1)); const B = new Set(bToks.slice(1));
let inter = 0; let inter = 0;
for (const w of A) if (B.has(w)) inter++; for (const w of A) if (B.has(w)) inter++;
const denom = Math.max(1, Math.max(A.size, B.size)); const denom = Math.max(1, Math.max(A.size, B.size));
const overlapTail = inter / denom; const overlapTail = inter / denom;
const d = levenshtein(a, b); const d = levenshtein(a, b);
const maxLen = Math.max(1, Math.max(a.length, b.length)); const maxLen = Math.max(1, Math.max(a.length, b.length));
const levSim = 1 - d / maxLen; const levSim = 1 - d / maxLen;
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain); let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
const smallN = Math.min(aToks.length, bToks.length); const smallN = Math.min(aToks.length, bToks.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18; if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
const numGate = numberMismatchPenalty(aToks, bToks); const numGate = numberMismatchPenalty(aToks, bToks);
let s = let s = numGate * (firstMatch * 3.0 + overlapTail * 2.2 * gate + levSim * (firstMatch ? 1.0 : 0.1 + 0.7 * contain));
numGate *
(firstMatch * 3.0 +
overlapTail * 2.2 * gate +
levSim * (firstMatch ? 1.0 : (0.10 + 0.70 * contain)));
if (ageMatch) s *= 2.2; if (ageMatch) s *= 2.2;
else if (ageMismatch) s *= 0.18; else if (ageMismatch) s *= 0.18;
s *= 1 + 0.9 * contain; s *= 1 + 0.9 * contain;
return s; return s;
} }
export function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) { export function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
const aTokensRaw = aTokens || []; const aTokensRaw = aTokens || [];
const bTokensRaw = bTokens || []; const bTokensRaw = bTokens || [];
const aTokF = filterSimTokens(aTokensRaw); const aTokF = filterSimTokens(aTokensRaw);
const bTokF = filterSimTokens(bTokensRaw); const bTokF = filterSimTokens(bTokensRaw);
if (!aTokF.length || !bTokF.length) return 0; if (!aTokF.length || !bTokF.length) return 0;
const a = String(aNormName || ""); const a = String(aNormName || "");
const b = String(bNormName || ""); const b = String(bNormName || "");
const aAge = extractAgeFromText(a); const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b); const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge); const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge; const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge; const ageMismatch = ageBoth && aAge !== bAge;
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw); const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
const aFirst = aTokF[0] || ""; const aFirst = aTokF[0] || "";
const bFirst = bTokF[0] || ""; const bFirst = bTokF[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0; const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const aTail = aTokF.slice(1); const aTail = aTokF.slice(1);
const bTail = bTokF.slice(1); const bTail = bTokF.slice(1);
let inter = 0; let inter = 0;
const bSet = new Set(bTail); const bSet = new Set(bTail);
for (const t of aTail) if (bSet.has(t)) inter++; for (const t of aTail) if (bSet.has(t)) inter++;
const denom = Math.max(1, Math.max(aTail.length, bTail.length)); const denom = Math.max(1, Math.max(aTail.length, bTail.length));
const overlapTail = inter / denom; const overlapTail = inter / denom;
const pref = const pref = firstMatch && a.slice(0, 10) && b.slice(0, 10) && a.slice(0, 10) === b.slice(0, 10) ? 0.2 : 0;
firstMatch &&
a.slice(0, 10) &&
b.slice(0, 10) &&
a.slice(0, 10) === b.slice(0, 10)
? 0.2
: 0;
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain); let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
const smallN = Math.min(aTokF.length, bTokF.length); const smallN = Math.min(aTokF.length, bTokF.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18; if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
const numGate = numberMismatchPenalty(aTokF, bTokF); const numGate = numberMismatchPenalty(aTokF, bTokF);
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref); let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
if (ageMatch) s *= 2.0; if (ageMatch) s *= 2.0;
else if (ageMismatch) s *= 0.2; else if (ageMismatch) s *= 0.2;
s *= 1 + 0.9 * contain; s *= 1 + 0.9 * contain;
return s; return s;
} }

View file

@ -4,43 +4,43 @@ import { keySkuForRow } from "../sku.js";
const SIZE_TOLERANCE_ML = 8; const SIZE_TOLERANCE_ML = 8;
export function parseSizesMlFromText(text) { export function parseSizesMlFromText(text) {
const s = String(text || "").toLowerCase(); const s = String(text || "").toLowerCase();
if (!s) return []; if (!s) return [];
const out = new Set(); const out = new Set();
const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g; const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g;
let m; let m;
while ((m = re.exec(s))) { while ((m = re.exec(s))) {
const val = parseFloat(m[1]); const val = parseFloat(m[1]);
const unit = m[2]; const unit = m[2];
if (!isFinite(val) || val <= 0) continue; if (!isFinite(val) || val <= 0) continue;
let ml = 0; let ml = 0;
if (unit === "ml") ml = Math.round(val); if (unit === "ml") ml = Math.round(val);
else if (unit === "cl") ml = Math.round(val * 10); else if (unit === "cl") ml = Math.round(val * 10);
else ml = Math.round(val * 1000); else ml = Math.round(val * 1000);
if (ml >= 50 && ml <= 5000) out.add(ml); if (ml >= 50 && ml <= 5000) out.add(ml);
} }
return Array.from(out); return Array.from(out);
} }
function sizeSetsMatch(aSet, bSet) { function sizeSetsMatch(aSet, bSet) {
if (!aSet?.size || !bSet?.size) return false; if (!aSet?.size || !bSet?.size) return false;
for (const a of aSet) { for (const a of aSet) {
for (const b of bSet) { for (const b of bSet) {
if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true; if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true;
} }
} }
return false; return false;
} }
export function sizePenalty(aSet, bSet) { export function sizePenalty(aSet, bSet) {
if (!aSet?.size || !bSet?.size) return 1.0; if (!aSet?.size || !bSet?.size) return 1.0;
if (sizeSetsMatch(aSet, bSet)) return 1.0; if (sizeSetsMatch(aSet, bSet)) return 1.0;
return 0.08; return 0.08;
} }
/** /**
@ -48,61 +48,61 @@ export function sizePenalty(aSet, bSet) {
* This keeps linker_page.js clean and makes cache rebuild explicit when rules change. * This keeps linker_page.js clean and makes cache rebuild explicit when rules change.
*/ */
export function buildSizePenaltyForPair({ allRows, allAgg, rules }) { export function buildSizePenaltyForPair({ allRows, allAgg, rules }) {
const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml> const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml>
function ensureSkuSet(k) { function ensureSkuSet(k) {
let set = SKU_SIZE_CACHE.get(k); let set = SKU_SIZE_CACHE.get(k);
if (!set) SKU_SIZE_CACHE.set(k, (set = new Set())); if (!set) SKU_SIZE_CACHE.set(k, (set = new Set()));
return set; return set;
} }
for (const r of allRows) { for (const r of allRows) {
if (!r || r.removed) continue; if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim(); const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue; if (!skuKey) continue;
const name = r.name || r.title || r.productName || ""; const name = r.name || r.title || r.productName || "";
const sizes = parseSizesMlFromText(name); const sizes = parseSizesMlFromText(name);
if (!sizes.length) continue; if (!sizes.length) continue;
const set = ensureSkuSet(skuKey); const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x); for (const x of sizes) set.add(x);
} }
for (const it of allAgg) { for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim(); const skuKey = String(it?.sku || "").trim();
if (!skuKey || !it?.name) continue; if (!skuKey || !it?.name) continue;
const sizes = parseSizesMlFromText(it.name); const sizes = parseSizesMlFromText(it.name);
if (!sizes.length) continue; if (!sizes.length) continue;
const set = ensureSkuSet(skuKey); const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x); for (const x of sizes) set.add(x);
} }
const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml> const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml>
function ensureCanonSet(k) { function ensureCanonSet(k) {
let set = CANON_SIZE_CACHE.get(k); let set = CANON_SIZE_CACHE.get(k);
if (!set) CANON_SIZE_CACHE.set(k, (set = new Set())); if (!set) CANON_SIZE_CACHE.set(k, (set = new Set()));
return set; return set;
} }
for (const it of allAgg) { for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim(); const skuKey = String(it?.sku || "").trim();
if (!skuKey) continue; if (!skuKey) continue;
const canon = String(rules.canonicalSku(skuKey) || skuKey); const canon = String(rules.canonicalSku(skuKey) || skuKey);
const canonSet = ensureCanonSet(canon); const canonSet = ensureCanonSet(canon);
const skuSet = SKU_SIZE_CACHE.get(skuKey); const skuSet = SKU_SIZE_CACHE.get(skuKey);
if (skuSet) for (const x of skuSet) canonSet.add(x); if (skuSet) for (const x of skuSet) canonSet.add(x);
} }
return function sizePenaltyForPair(aSku, bSku) { return function sizePenaltyForPair(aSku, bSku) {
const aCanon = String(rules.canonicalSku(String(aSku || "")) || ""); const aCanon = String(rules.canonicalSku(String(aSku || "")) || "");
const bCanon = String(rules.canonicalSku(String(bSku || "")) || ""); const bCanon = String(rules.canonicalSku(String(bSku || "")) || "");
const A = aCanon ? (CANON_SIZE_CACHE.get(aCanon) || new Set()) : new Set(); const A = aCanon ? CANON_SIZE_CACHE.get(aCanon) || new Set() : new Set();
const B = bCanon ? (CANON_SIZE_CACHE.get(bCanon) || new Set()) : new Set(); const B = bCanon ? CANON_SIZE_CACHE.get(bCanon) || new Set() : new Set();
return sizePenalty(A, B); return sizePenalty(A, B);
}; };
} }

View file

@ -1,43 +1,42 @@
// viz/app/linker/store_cache.js // viz/app/linker/store_cache.js
function canonKeyForSku(rules, skuKey) { function canonKeyForSku(rules, skuKey) {
const s = String(skuKey || "").trim(); const s = String(skuKey || "").trim();
if (!s) return ""; if (!s) return "";
return String(rules.canonicalSku(s) || s); return String(rules.canonicalSku(s) || s);
} }
export function buildCanonStoreCache(allAgg, rules) { export function buildCanonStoreCache(allAgg, rules) {
const m = new Map(); // canonSku -> Set<storeLabel> const m = new Map(); // canonSku -> Set<storeLabel>
for (const it of allAgg) { for (const it of allAgg) {
if (!it) continue; if (!it) continue;
const skuKey = String(it.sku || "").trim(); const skuKey = String(it.sku || "").trim();
if (!skuKey) continue; if (!skuKey) continue;
const canon = String(rules.canonicalSku(skuKey) || skuKey); const canon = String(rules.canonicalSku(skuKey) || skuKey);
let set = m.get(canon); let set = m.get(canon);
if (!set) m.set(canon, (set = new Set())); if (!set) m.set(canon, (set = new Set()));
const stores = it.stores; const stores = it.stores;
if (stores && stores.size) for (const s of stores) set.add(s); if (stores && stores.size) for (const s of stores) set.add(s);
} }
return m; return m;
} }
function canonStoresForSku(rules, canonStoreCache, skuKey) { function canonStoresForSku(rules, canonStoreCache, skuKey) {
const canon = canonKeyForSku(rules, skuKey); const canon = canonKeyForSku(rules, skuKey);
return canon ? canonStoreCache.get(canon) || new Set() : new Set(); return canon ? canonStoreCache.get(canon) || new Set() : new Set();
} }
export function makeSameStoreCanonFn(rules, canonStoreCache) {
return function sameStoreCanon(aSku, bSku) {
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
if (!A.size || !B.size) return false;
for (const s of A) if (B.has(s)) return true;
return false;
};
}
export function makeSameStoreCanonFn(rules, canonStoreCache) {
return function sameStoreCanon(aSku, bSku) {
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
if (!A.size || !B.size) return false;
for (const s of A) if (B.has(s)) return true;
return false;
};
}

File diff suppressed because it is too large Load diff

View file

@ -2,46 +2,46 @@
import { keySkuForRow } from "../sku.js"; import { keySkuForRow } from "../sku.js";
function urlQuality(r) { function urlQuality(r) {
const u = String(r?.url || "").trim(); const u = String(r?.url || "").trim();
if (!u) return -1; if (!u) return -1;
let s = 0; let s = 0;
s += u.length; s += u.length;
if (/\bproduct\/\d+\//.test(u)) s += 50; if (/\bproduct\/\d+\//.test(u)) s += 50;
if (/[a-z0-9-]{8,}/i.test(u)) s += 10; if (/[a-z0-9-]{8,}/i.test(u)) s += 10;
return s; return s;
} }
export function buildUrlBySkuStore(allRows) { export function buildUrlBySkuStore(allRows) {
const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url) const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url)
for (const r of allRows) { for (const r of allRows) {
if (!r || r.removed) continue; if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim(); const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue; if (!skuKey) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim(); const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim(); const url = String(r.url || "").trim();
if (!storeLabel || !url) continue; if (!storeLabel || !url) continue;
let m = URL_BY_SKU_STORE.get(skuKey); let m = URL_BY_SKU_STORE.get(skuKey);
if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map())); if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map()));
const prevUrl = m.get(storeLabel); const prevUrl = m.get(storeLabel);
if (!prevUrl) { if (!prevUrl) {
m.set(storeLabel, url); m.set(storeLabel, url);
continue; continue;
} }
const prevScore = urlQuality({ url: prevUrl }); const prevScore = urlQuality({ url: prevUrl });
const nextScore = urlQuality(r); const nextScore = urlQuality(r);
if (nextScore > prevScore) { if (nextScore > prevScore) {
m.set(storeLabel, url); m.set(storeLabel, url);
} else if (nextScore === prevScore && url < prevUrl) { } else if (nextScore === prevScore && url < prevUrl) {
m.set(storeLabel, url); m.set(storeLabel, url);
} }
} }
return URL_BY_SKU_STORE; return URL_BY_SKU_STORE;
} }

File diff suppressed because it is too large Load diff

View file

@ -15,23 +15,23 @@ import { renderStore } from "./store_page.js";
import { renderStats, destroyStatsChart } from "./stats_page.js"; import { renderStats, destroyStatsChart } from "./stats_page.js";
function route() { function route() {
const $app = document.getElementById("app"); const $app = document.getElementById("app");
if (!$app) return; if (!$app) return;
// always clean up chart when navigating // always clean up chart when navigating
destroyChart(); destroyChart();
destroyStatsChart(); destroyStatsChart();
const h = location.hash || "#/"; const h = location.hash || "#/";
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean); const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
if (parts.length === 0) return renderSearch($app); if (parts.length === 0) return renderSearch($app);
if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1])); if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1]));
if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1])); if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1]));
if (parts[0] === "link") return renderSkuLinker($app); if (parts[0] === "link") return renderSkuLinker($app);
if (parts[0] === "stats") return renderStats($app); if (parts[0] === "stats") return renderStats($app);
return renderSearch($app); return renderSearch($app);
} }
window.addEventListener("hashchange", route); window.addEventListener("hashchange", route);

View file

@ -5,225 +5,225 @@ import { applyPendingToMeta } from "./pending.js";
let CACHED = null; let CACHED = null;
export function clearSkuRulesCache() { export function clearSkuRulesCache() {
CACHED = null; CACHED = null;
} }
function normalizeImplicitSkuKey(k) { function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim(); const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i); const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0"); if (m) return String(m[1]).padStart(6, "0");
return s; return s;
} }
function canonicalPairKey(a, b) { function canonicalPairKey(a, b) {
const x = normalizeImplicitSkuKey(a); const x = normalizeImplicitSkuKey(a);
const y = normalizeImplicitSkuKey(b); const y = normalizeImplicitSkuKey(b);
if (!x || !y) return ""; if (!x || !y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`; return x < y ? `${x}|${y}` : `${y}|${x}`;
} }
function buildForwardMap(links) { function buildForwardMap(links) {
// Keep this for reference/debug; grouping no longer depends on direction. // Keep this for reference/debug; grouping no longer depends on direction.
const m = new Map(); const m = new Map();
for (const x of Array.isArray(links) ? links : []) { for (const x of Array.isArray(links) ? links : []) {
const fromSku = normalizeImplicitSkuKey(x?.fromSku); const fromSku = normalizeImplicitSkuKey(x?.fromSku);
const toSku = normalizeImplicitSkuKey(x?.toSku); const toSku = normalizeImplicitSkuKey(x?.toSku);
if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku); if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku);
} }
return m; return m;
} }
function buildIgnoreSet(ignores) { function buildIgnoreSet(ignores) {
const s = new Set(); const s = new Set();
for (const x of Array.isArray(ignores) ? ignores : []) { for (const x of Array.isArray(ignores) ? ignores : []) {
const a = String(x?.skuA || x?.a || x?.left || "").trim(); const a = String(x?.skuA || x?.a || x?.left || "").trim();
const b = String(x?.skuB || x?.b || x?.right || "").trim(); const b = String(x?.skuB || x?.b || x?.right || "").trim();
const k = canonicalPairKey(a, b); const k = canonicalPairKey(a, b);
if (k) s.add(k); if (k) s.add(k);
} }
return s; return s;
} }
/* ---------------- Union-Find grouping (hardened) ---------------- */ /* ---------------- Union-Find grouping (hardened) ---------------- */
class DSU { class DSU {
constructor() { constructor() {
this.parent = new Map(); this.parent = new Map();
this.rank = new Map(); this.rank = new Map();
} }
_add(x) { _add(x) {
if (!this.parent.has(x)) { if (!this.parent.has(x)) {
this.parent.set(x, x); this.parent.set(x, x);
this.rank.set(x, 0); this.rank.set(x, 0);
} }
} }
find(x) { find(x) {
x = String(x || "").trim(); x = String(x || "").trim();
if (!x) return ""; if (!x) return "";
this._add(x); this._add(x);
let p = this.parent.get(x); let p = this.parent.get(x);
if (p !== x) { if (p !== x) {
p = this.find(p); p = this.find(p);
this.parent.set(x, p); this.parent.set(x, p);
} }
return p; return p;
} }
union(a, b) { union(a, b) {
a = String(a || "").trim(); a = String(a || "").trim();
b = String(b || "").trim(); b = String(b || "").trim();
if (!a || !b || a === b) return; if (!a || !b || a === b) return;
const ra = this.find(a); const ra = this.find(a);
const rb = this.find(b); const rb = this.find(b);
if (!ra || !rb || ra === rb) return; if (!ra || !rb || ra === rb) return;
const rka = this.rank.get(ra) || 0; const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0; const rkb = this.rank.get(rb) || 0;
if (rka < rkb) { if (rka < rkb) {
this.parent.set(ra, rb); this.parent.set(ra, rb);
} else if (rkb < rka) { } else if (rkb < rka) {
this.parent.set(rb, ra); this.parent.set(rb, ra);
} else { } else {
this.parent.set(rb, ra); this.parent.set(rb, ra);
this.rank.set(ra, rka + 1); this.rank.set(ra, rka + 1);
} }
} }
} }
function isUnknownSkuKey(key) { function isUnknownSkuKey(key) {
return String(key || "").startsWith("u:"); return String(key || "").startsWith("u:");
} }
function isNumericSku(key) { function isNumericSku(key) {
return /^\d+$/.test(String(key || "").trim()); return /^\d+$/.test(String(key || "").trim());
} }
function compareSku(a, b) { function compareSku(a, b) {
// Stable ordering to choose a canonical representative. // Stable ordering to choose a canonical representative.
// Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex. // Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex.
a = String(a || "").trim(); a = String(a || "").trim();
b = String(b || "").trim(); b = String(b || "").trim();
if (a === b) return 0; if (a === b) return 0;
const aUnknown = isUnknownSkuKey(a); const aUnknown = isUnknownSkuKey(a);
const bUnknown = isUnknownSkuKey(b); const bUnknown = isUnknownSkuKey(b);
if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first
const aNum = isNumericSku(a); const aNum = isNumericSku(a);
const bNum = isNumericSku(b); const bNum = isNumericSku(b);
if (aNum && bNum) { if (aNum && bNum) {
// compare as integers (safe: these are small SKU strings) // compare as integers (safe: these are small SKU strings)
const na = Number(a); const na = Number(a);
const nb = Number(b); const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1; if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
} }
// fallback lex // fallback lex
return a < b ? -1 : 1; return a < b ? -1 : 1;
} }
function buildGroupsAndCanonicalMap(links) { function buildGroupsAndCanonicalMap(links) {
const dsu = new DSU(); const dsu = new DSU();
const all = new Set(); const all = new Set();
for (const x of Array.isArray(links) ? links : []) { for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku); const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku); const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue; if (!a || !b) continue;
all.add(a); all.add(a);
all.add(b); all.add(b);
// IMPORTANT: union is undirected for grouping (hardened vs cycles) // IMPORTANT: union is undirected for grouping (hardened vs cycles)
dsu.union(a, b); dsu.union(a, b);
} }
// root -> Set(members) // root -> Set(members)
const groupsByRoot = new Map(); const groupsByRoot = new Map();
for (const s of all) { for (const s of all) {
const r = dsu.find(s); const r = dsu.find(s);
if (!r) continue; if (!r) continue;
let set = groupsByRoot.get(r); let set = groupsByRoot.get(r);
if (!set) groupsByRoot.set(r, (set = new Set())); if (!set) groupsByRoot.set(r, (set = new Set()));
set.add(s); set.add(s);
} }
// Choose a canonical representative per group // Choose a canonical representative per group
const repByRoot = new Map(); const repByRoot = new Map();
for (const [root, members] of groupsByRoot.entries()) { for (const [root, members] of groupsByRoot.entries()) {
const arr = Array.from(members); const arr = Array.from(members);
arr.sort(compareSku); arr.sort(compareSku);
const rep = arr[0] || root; const rep = arr[0] || root;
repByRoot.set(root, rep); repByRoot.set(root, rep);
} }
// sku -> canonical rep // sku -> canonical rep
const canonBySku = new Map(); const canonBySku = new Map();
// canonical rep -> Set(members) (what the rest of the app uses) // canonical rep -> Set(members) (what the rest of the app uses)
const groupsByCanon = new Map(); const groupsByCanon = new Map();
for (const [root, members] of groupsByRoot.entries()) { for (const [root, members] of groupsByRoot.entries()) {
const rep = repByRoot.get(root) || root; const rep = repByRoot.get(root) || root;
let g = groupsByCanon.get(rep); let g = groupsByCanon.get(rep);
if (!g) groupsByCanon.set(rep, (g = new Set([rep]))); if (!g) groupsByCanon.set(rep, (g = new Set([rep])));
for (const s of members) { for (const s of members) {
canonBySku.set(s, rep); canonBySku.set(s, rep);
g.add(s); g.add(s);
} }
} }
return { canonBySku, groupsByCanon }; return { canonBySku, groupsByCanon };
} }
export async function loadSkuRules() { export async function loadSkuRules() {
if (CACHED) return CACHED; if (CACHED) return CACHED;
let meta = await loadSkuMetaBestEffort(); let meta = await loadSkuMetaBestEffort();
// On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage // On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage
if (!isLocalWriteMode()) { if (!isLocalWriteMode()) {
meta = applyPendingToMeta(meta); meta = applyPendingToMeta(meta);
} }
const links = Array.isArray(meta?.links) ? meta.links : []; const links = Array.isArray(meta?.links) ? meta.links : [];
const ignores = Array.isArray(meta?.ignores) ? meta.ignores : []; const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
// keep forwardMap for visibility/debug; grouping uses union-find // keep forwardMap for visibility/debug; grouping uses union-find
const forwardMap = buildForwardMap(links); const forwardMap = buildForwardMap(links);
const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links); const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links);
const ignoreSet = buildIgnoreSet(ignores); const ignoreSet = buildIgnoreSet(ignores);
function canonicalSku(sku) { function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku); const s = normalizeImplicitSkuKey(sku);
if (!s) return s; if (!s) return s;
return canonBySku.get(s) || s; return canonBySku.get(s) || s;
} }
function groupForCanonical(toSku) { function groupForCanonical(toSku) {
const canon = canonicalSku(toSku); const canon = canonicalSku(toSku);
const g = groupsByCanon.get(canon); const g = groupsByCanon.get(canon);
return g ? new Set(g) : new Set([canon]); return g ? new Set(g) : new Set([canon]);
} }
function isIgnoredPair(a, b) { function isIgnoredPair(a, b) {
const k = canonicalPairKey(a, b); const k = canonicalPairKey(a, b);
return k ? ignoreSet.has(k) : false; return k ? ignoreSet.has(k) : false;
} }
CACHED = { CACHED = {
links, links,
ignores, ignores,
forwardMap, forwardMap,
// "toGroups" retained name for compatibility with existing code // "toGroups" retained name for compatibility with existing code
toGroups: groupsByCanon, toGroups: groupsByCanon,
ignoreSet, ignoreSet,
canonicalSku, canonicalSku,
groupForCanonical, groupForCanonical,
isIgnoredPair, isIgnoredPair,
canonicalPairKey, canonicalPairKey,
}; };
return CACHED; return CACHED;
} }

View file

@ -3,219 +3,217 @@ const LS_KEY = "stviz:v1:pendingSkuEdits";
const LS_SUBMITTED_KEY = "stviz:v1:submittedSkuEdits"; const LS_SUBMITTED_KEY = "stviz:v1:submittedSkuEdits";
function safeParseJson(s) { function safeParseJson(s) {
try { try {
return JSON.parse(String(s || "")); return JSON.parse(String(s || ""));
} catch { } catch {
return null; return null;
} }
} }
function normSku(s) { function normSku(s) {
return String(s || "").trim(); return String(s || "").trim();
} }
function linkKey(fromSku, toSku) { function linkKey(fromSku, toSku) {
const f = normSku(fromSku); const f = normSku(fromSku);
const t = normSku(toSku); const t = normSku(toSku);
if (!f || !t || f === t) return ""; if (!f || !t || f === t) return "";
return `${f}${t}`; return `${f}${t}`;
} }
function pairKey(a, b) { function pairKey(a, b) {
const x = normSku(a); const x = normSku(a);
const y = normSku(b); const y = normSku(b);
if (!x || !y || x === y) return ""; if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`; return x < y ? `${x}|${y}` : `${y}|${x}`;
} }
function loadEditsFromKey(key) { function loadEditsFromKey(key) {
const raw = (() => { const raw = (() => {
try { try {
return localStorage.getItem(key) || ""; return localStorage.getItem(key) || "";
} catch { } catch {
return ""; return "";
} }
})(); })();
const j = safeParseJson(raw); const j = safeParseJson(raw);
const links = Array.isArray(j?.links) ? j.links : []; const links = Array.isArray(j?.links) ? j.links : [];
const ignores = Array.isArray(j?.ignores) ? j.ignores : []; const ignores = Array.isArray(j?.ignores) ? j.ignores : [];
return { return {
createdAt: String(j?.createdAt || ""), createdAt: String(j?.createdAt || ""),
links: links links: links
.map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) })) .map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) }))
.filter((x) => linkKey(x.fromSku, x.toSku)), .filter((x) => linkKey(x.fromSku, x.toSku)),
ignores: ignores ignores: ignores
.map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) })) .map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) }))
.filter((x) => pairKey(x.skuA, x.skuB)), .filter((x) => pairKey(x.skuA, x.skuB)),
}; };
} }
function saveEditsToKey(key, edits) { function saveEditsToKey(key, edits) {
const out = { const out = {
createdAt: edits?.createdAt || new Date().toISOString(), createdAt: edits?.createdAt || new Date().toISOString(),
links: Array.isArray(edits?.links) ? edits.links : [], links: Array.isArray(edits?.links) ? edits.links : [],
ignores: Array.isArray(edits?.ignores) ? edits.ignores : [], ignores: Array.isArray(edits?.ignores) ? edits.ignores : [],
}; };
try { try {
localStorage.setItem(key, JSON.stringify(out)); localStorage.setItem(key, JSON.stringify(out));
} catch {} } catch {}
return out; return out;
} }
export function loadPendingEdits() { export function loadPendingEdits() {
return loadEditsFromKey(LS_KEY); return loadEditsFromKey(LS_KEY);
} }
export function savePendingEdits(edits) { export function savePendingEdits(edits) {
return saveEditsToKey(LS_KEY, edits); return saveEditsToKey(LS_KEY, edits);
} }
export function clearPendingEdits() { export function clearPendingEdits() {
try { try {
localStorage.removeItem(LS_KEY); localStorage.removeItem(LS_KEY);
} catch {} } catch {}
} }
export function loadSubmittedEdits() { export function loadSubmittedEdits() {
return loadEditsFromKey(LS_SUBMITTED_KEY); return loadEditsFromKey(LS_SUBMITTED_KEY);
} }
export function saveSubmittedEdits(edits) { export function saveSubmittedEdits(edits) {
return saveEditsToKey(LS_SUBMITTED_KEY, edits); return saveEditsToKey(LS_SUBMITTED_KEY, edits);
} }
export function clearSubmittedEdits() { export function clearSubmittedEdits() {
try { try {
localStorage.removeItem(LS_SUBMITTED_KEY); localStorage.removeItem(LS_SUBMITTED_KEY);
} catch {} } catch {}
} }
export function pendingCounts() { export function pendingCounts() {
const e = loadPendingEdits(); const e = loadPendingEdits();
return { return {
links: e.links.length, links: e.links.length,
ignores: e.ignores.length, ignores: e.ignores.length,
total: e.links.length + e.ignores.length, total: e.links.length + e.ignores.length,
}; };
} }
export function addPendingLink(fromSku, toSku) { export function addPendingLink(fromSku, toSku) {
const f = normSku(fromSku); const f = normSku(fromSku);
const t = normSku(toSku); const t = normSku(toSku);
const k = linkKey(f, t); const k = linkKey(f, t);
if (!k) return false; if (!k) return false;
const pending = loadPendingEdits(); const pending = loadPendingEdits();
const submitted = loadSubmittedEdits(); const submitted = loadSubmittedEdits();
const seen = new Set( const seen = new Set(
[ [
...pending.links.map((x) => linkKey(x.fromSku, x.toSku)), ...pending.links.map((x) => linkKey(x.fromSku, x.toSku)),
...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)), ...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)),
].filter(Boolean) ].filter(Boolean),
); );
if (seen.has(k)) return false; if (seen.has(k)) return false;
pending.links.push({ fromSku: f, toSku: t }); pending.links.push({ fromSku: f, toSku: t });
savePendingEdits(pending); savePendingEdits(pending);
return true; return true;
} }
export function addPendingIgnore(skuA, skuB) { export function addPendingIgnore(skuA, skuB) {
const a = normSku(skuA); const a = normSku(skuA);
const b = normSku(skuB); const b = normSku(skuB);
const k = pairKey(a, b); const k = pairKey(a, b);
if (!k) return false; if (!k) return false;
const pending = loadPendingEdits(); const pending = loadPendingEdits();
const submitted = loadSubmittedEdits(); const submitted = loadSubmittedEdits();
const seen = new Set( const seen = new Set(
[ [
...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)), ...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)),
...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)), ...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)),
].filter(Boolean) ].filter(Boolean),
); );
if (seen.has(k)) return false; if (seen.has(k)) return false;
pending.ignores.push({ skuA: a, skuB: b }); pending.ignores.push({ skuA: a, skuB: b });
savePendingEdits(pending); savePendingEdits(pending);
return true; return true;
} }
// Merge PENDING + SUBMITTED into a meta object {links, ignores} // Merge PENDING + SUBMITTED into a meta object {links, ignores}
export function applyPendingToMeta(meta) { export function applyPendingToMeta(meta) {
const base = { const base = {
generatedAt: String(meta?.generatedAt || ""), generatedAt: String(meta?.generatedAt || ""),
links: Array.isArray(meta?.links) ? meta.links.slice() : [], links: Array.isArray(meta?.links) ? meta.links.slice() : [],
ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [], ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [],
}; };
const p0 = loadPendingEdits(); const p0 = loadPendingEdits();
const p1 = loadSubmittedEdits(); const p1 = loadSubmittedEdits();
const overlay = { const overlay = {
links: [...(p0.links || []), ...(p1.links || [])], links: [...(p0.links || []), ...(p1.links || [])],
ignores: [...(p0.ignores || []), ...(p1.ignores || [])], ignores: [...(p0.ignores || []), ...(p1.ignores || [])],
}; };
// merge links (dedupe by from→to) // merge links (dedupe by from→to)
const seenL = new Set( const seenL = new Set(
base.links base.links.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim())).filter(Boolean),
.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim())) );
.filter(Boolean) for (const x of overlay.links) {
); const k = linkKey(x.fromSku, x.toSku);
for (const x of overlay.links) { if (!k || seenL.has(k)) continue;
const k = linkKey(x.fromSku, x.toSku); seenL.add(k);
if (!k || seenL.has(k)) continue; base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
seenL.add(k); }
base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
}
// merge ignores (dedupe by canonical pair key) // merge ignores (dedupe by canonical pair key)
const seenI = new Set( const seenI = new Set(
base.ignores base.ignores
.map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim())) .map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim()))
.filter(Boolean) .filter(Boolean),
); );
for (const x of overlay.ignores) { for (const x of overlay.ignores) {
const k = pairKey(x.skuA, x.skuB); const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue; if (!k || seenI.has(k)) continue;
seenI.add(k); seenI.add(k);
base.ignores.push({ skuA: x.skuA, skuB: x.skuB }); base.ignores.push({ skuA: x.skuA, skuB: x.skuB });
} }
return base; return base;
} }
// Move everything from pending -> submitted, then clear pending. // Move everything from pending -> submitted, then clear pending.
// Returns the moved payload (what should be sent in PR/issue). // Returns the moved payload (what should be sent in PR/issue).
export function movePendingToSubmitted() { export function movePendingToSubmitted() {
const pending = loadPendingEdits(); const pending = loadPendingEdits();
if (!pending.links.length && !pending.ignores.length) return pending; if (!pending.links.length && !pending.ignores.length) return pending;
const sub = loadSubmittedEdits(); const sub = loadSubmittedEdits();
const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean)); const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean));
for (const x of pending.links) { for (const x of pending.links) {
const k = linkKey(x.fromSku, x.toSku); const k = linkKey(x.fromSku, x.toSku);
if (!k || seenL.has(k)) continue; if (!k || seenL.has(k)) continue;
seenL.add(k); seenL.add(k);
sub.links.push({ fromSku: x.fromSku, toSku: x.toSku }); sub.links.push({ fromSku: x.fromSku, toSku: x.toSku });
} }
const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean)); const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean));
for (const x of pending.ignores) { for (const x of pending.ignores) {
const k = pairKey(x.skuA, x.skuB); const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue; if (!k || seenI.has(k)) continue;
seenI.add(k); seenI.add(k);
sub.ignores.push({ skuA: x.skuA, skuB: x.skuB }); sub.ignores.push({ skuA: x.skuA, skuB: x.skuB });
} }
saveSubmittedEdits(sub); saveSubmittedEdits(sub);
clearPendingEdits(); clearPendingEdits();
return pending; return pending;
} }

View file

@ -1,21 +1,12 @@
import { esc, renderThumbHtml, prettyTs } from "./dom.js"; import { esc, renderThumbHtml, prettyTs } from "./dom.js";
import { import { tokenizeQuery, matchesAllTokens, displaySku, keySkuForRow, parsePriceToNumber } from "./sku.js";
tokenizeQuery,
matchesAllTokens,
displaySku,
keySkuForRow,
parsePriceToNumber,
} from "./sku.js";
import { loadIndex, loadRecent, loadSavedQuery, saveQuery } from "./state.js"; import { loadIndex, loadRecent, loadSavedQuery, saveQuery } from "./state.js";
import { aggregateBySku } from "./catalog.js"; import { aggregateBySku } from "./catalog.js";
import { loadSkuRules } from "./mapping.js"; import { loadSkuRules } from "./mapping.js";
import { import { smwsDistilleryCodesForQueryPrefix, smwsDistilleryCodeFromName } from "./smws.js";
smwsDistilleryCodesForQueryPrefix,
smwsDistilleryCodeFromName,
} from "./smws.js";
export function renderSearch($app) { export function renderSearch($app) {
$app.innerHTML = ` $app.innerHTML = `
<div class="container"> <div class="container">
<div class="header"> <div class="header">
<!-- Row 1 --> <!-- Row 1 -->
@ -50,123 +41,117 @@ export function renderSearch($app) {
</div> </div>
`; `;
const $q = document.getElementById("q"); const $q = document.getElementById("q");
const $results = document.getElementById("results"); const $results = document.getElementById("results");
const $stores = document.getElementById("stores"); const $stores = document.getElementById("stores");
const $clearSearch = document.getElementById("clearSearch"); const $clearSearch = document.getElementById("clearSearch");
$q.value = loadSavedQuery(); $q.value = loadSavedQuery();
let aggBySku = new Map(); let aggBySku = new Map();
let allAgg = []; let allAgg = [];
let indexReady = false; let indexReady = false;
// canonicalSku -> storeLabel -> url // canonicalSku -> storeLabel -> url
let URL_BY_SKU_STORE = new Map(); let URL_BY_SKU_STORE = new Map();
function buildUrlMap(listings, canonicalSkuFn) { function buildUrlMap(listings, canonicalSkuFn) {
const out = new Map(); const out = new Map();
for (const r of Array.isArray(listings) ? listings : []) { for (const r of Array.isArray(listings) ? listings : []) {
if (!r || r.removed) continue; if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim(); const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue; if (!skuKey) continue;
const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey); const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey);
if (!sku) continue; if (!sku) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim(); const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim(); const url = String(r.url || "").trim();
if (!storeLabel || !url) continue; if (!storeLabel || !url) continue;
let m = out.get(sku); let m = out.get(sku);
if (!m) out.set(sku, (m = new Map())); if (!m) out.set(sku, (m = new Map()));
if (!m.has(storeLabel)) m.set(storeLabel, url); if (!m.has(storeLabel)) m.set(storeLabel, url);
} }
return out; return out;
} }
function urlForAgg(it, storeLabel) { function urlForAgg(it, storeLabel) {
const sku = String(it?.sku || ""); const sku = String(it?.sku || "");
const s = String(storeLabel || ""); const s = String(storeLabel || "");
return URL_BY_SKU_STORE.get(sku)?.get(s) || ""; return URL_BY_SKU_STORE.get(sku)?.get(s) || "";
} }
function normStoreLabel(s) { function normStoreLabel(s) {
return String(s || "").trim(); return String(s || "").trim();
} }
function renderStoreButtons(listings) { function renderStoreButtons(listings) {
// include all stores seen (live or removed) so the selector is stable // include all stores seen (live or removed) so the selector is stable
const set = new Set(); const set = new Set();
for (const r of Array.isArray(listings) ? listings : []) { for (const r of Array.isArray(listings) ? listings : []) {
const lab = normStoreLabel(r?.storeLabel || r?.store || ""); const lab = normStoreLabel(r?.storeLabel || r?.store || "");
if (lab) set.add(lab); if (lab) set.add(lab);
} }
const stores = Array.from(set).sort((a, b) => a.localeCompare(b)); const stores = Array.from(set).sort((a, b) => a.localeCompare(b));
if (!stores.length) { if (!stores.length) {
$stores.innerHTML = ""; $stores.innerHTML = "";
return; return;
} }
const totalChars = stores.reduce((n, s) => n + s.length, 0); const totalChars = stores.reduce((n, s) => n + s.length, 0);
const target = totalChars / 2; const target = totalChars / 2;
let acc = 0; let acc = 0;
let breakAt = stores.length; let breakAt = stores.length;
for (let i = 0; i < stores.length; i++) { for (let i = 0; i < stores.length; i++) {
acc += stores[i].length; acc += stores[i].length;
if (acc >= target) { if (acc >= target) {
breakAt = i + 1; breakAt = i + 1;
break; break;
} }
} }
$stores.innerHTML = stores $stores.innerHTML = stores
.map((s, i) => { .map((s, i) => {
const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent( const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent(s)}">${esc(s)}</a>`;
s const brk =
)}">${esc(s)}</a>`; i === breakAt - 1 && stores.length > 1 ? `<span class="storeBreak" aria-hidden="true"></span>` : "";
const brk = return btn + brk;
i === breakAt - 1 && stores.length > 1 })
? `<span class="storeBreak" aria-hidden="true"></span>` .join("");
: ""; }
return btn + brk;
})
.join("");
}
function renderAggregates(items) { function renderAggregates(items) {
if (!items.length) { if (!items.length) {
$results.innerHTML = `<div class="small">No matches.</div>`; $results.innerHTML = `<div class="small">No matches.</div>`;
return; return;
} }
const limited = items.slice(0, 80); const limited = items.slice(0, 80);
$results.innerHTML = limited $results.innerHTML = limited
.map((it) => { .map((it) => {
const storeCount = it.stores.size || 0; const storeCount = it.stores.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : ""; const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)"; const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
const store = it.cheapestStoreLabel || [...it.stores][0] || "Store"; const store = it.cheapestStoreLabel || [...it.stores][0] || "Store";
// link must match the displayed store label // link must match the displayed store label
const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim(); const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim();
const storeBadge = href const storeBadge = href
? `<a class="badge" href="${esc( ? `<a class="badge" href="${esc(
href href,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc( )}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
store store,
)}${esc(plus)}</a>` )}${esc(plus)}</a>`
: `<span class="badge">${esc(store)}${esc(plus)}</span>`; : `<span class="badge">${esc(store)}${esc(plus)}</span>`;
const skuLink = `#/link/?left=${encodeURIComponent( const skuLink = `#/link/?left=${encodeURIComponent(String(it.sku || ""))}`;
String(it.sku || "")
)}`;
return ` return `
<div class="item" data-sku="${esc(it.sku)}"> <div class="item" data-sku="${esc(it.sku)}">
<div class="itemRow"> <div class="itemRow">
<div class="thumbBox"> <div class="thumbBox">
@ -176,10 +161,10 @@ export function renderSearch($app) {
<div class="itemTop"> <div class="itemTop">
<div class="itemName">${esc(it.name || "(no name)")}</div> <div class="itemName">${esc(it.name || "(no name)")}</div>
<a class="badge mono skuLink" href="${esc( <a class="badge mono skuLink" href="${esc(
skuLink skuLink,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc( )}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(it.sku) displaySku(it.sku),
)}</a> )}</a>
</div> </div>
<div class="metaRow"> <div class="metaRow">
<span class="mono price">${esc(price)}</span> <span class="mono price">${esc(price)}</span>
@ -189,299 +174,280 @@ export function renderSearch($app) {
</div> </div>
</div> </div>
`; `;
}) })
.join(""); .join("");
for (const el of Array.from($results.querySelectorAll(".item"))) { for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => { el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || ""; const sku = el.getAttribute("data-sku") || "";
if (!sku) return; if (!sku) return;
saveQuery($q.value); saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash); sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`; location.hash = `#/item/${encodeURIComponent(sku)}`;
}); });
} }
} }
function salePctOff(oldRaw, newRaw) { function salePctOff(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw); const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw); const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null; if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null; if (!(oldN > 0)) return null;
if (!(newN < oldN)) return null; if (!(newN < oldN)) return null;
const pct = Math.round(((oldN - newN) / oldN) * 100); const pct = Math.round(((oldN - newN) / oldN) * 100);
return Number.isFinite(pct) && pct > 0 ? pct : null; return Number.isFinite(pct) && pct > 0 ? pct : null;
} }
function pctChange(oldRaw, newRaw) { function pctChange(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw); const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw); const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null; if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null; if (!(oldN > 0)) return null;
const pct = Math.round(((newN - oldN) / oldN) * 100); const pct = Math.round(((newN - oldN) / oldN) * 100);
return Number.isFinite(pct) ? pct : null; return Number.isFinite(pct) ? pct : null;
} }
function tsValue(r) { function tsValue(r) {
const t = String(r?.ts || ""); const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN; const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms; if (Number.isFinite(ms)) return ms;
const d = String(r?.date || ""); const d = String(r?.date || "");
const ms2 = d ? Date.parse(d) : NaN; const ms2 = d ? Date.parse(d) : NaN;
return Number.isFinite(ms2) ? ms2 : 0; return Number.isFinite(ms2) ? ms2 : 0;
} }
// Custom priority (unchanged) // Custom priority (unchanged)
function rankRecent(r, canonSkuFn) { function rankRecent(r, canonSkuFn) {
const rawSku = String(r?.sku || ""); const rawSku = String(r?.sku || "");
const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku); const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku);
const agg = aggBySku.get(sku) || null; const agg = aggBySku.get(sku) || null;
const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim(); const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim();
const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim(); const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim();
const normStore = (s) => String(s || "").trim().toLowerCase(); const normStore = (s) =>
String(s || "")
.trim()
.toLowerCase();
// Normalize kind // Normalize kind
let kind = String(r?.kind || ""); let kind = String(r?.kind || "");
if (kind === "price_change") { if (kind === "price_change") {
const o = parsePriceToNumber(r?.oldPrice || ""); const o = parsePriceToNumber(r?.oldPrice || "");
const n = parsePriceToNumber(r?.newPrice || ""); const n = parsePriceToNumber(r?.newPrice || "");
if (Number.isFinite(o) && Number.isFinite(n)) { if (Number.isFinite(o) && Number.isFinite(n)) {
if (n < o) kind = "price_down"; if (n < o) kind = "price_down";
else if (n > o) kind = "price_up"; else if (n > o) kind = "price_up";
} }
} }
const pctOff = const pctOff = kind === "price_down" ? salePctOff(r?.oldPrice || "", r?.newPrice || "") : null;
kind === "price_down" const pctUp = kind === "price_up" ? pctChange(r?.oldPrice || "", r?.newPrice || "") : null;
? salePctOff(r?.oldPrice || "", r?.newPrice || "")
: null;
const pctUp =
kind === "price_up"
? pctChange(r?.oldPrice || "", r?.newPrice || "")
: null;
const isNew = kind === "new"; const isNew = kind === "new";
const storeCount = agg?.stores?.size || 0; const storeCount = agg?.stores?.size || 0;
const isNewUnique = isNew && storeCount <= 1; const isNewUnique = isNew && storeCount <= 1;
// Cheapest checks (use aggregate index) // Cheapest checks (use aggregate index)
const newPriceNum = const newPriceNum = kind === "price_down" || kind === "price_up" ? parsePriceToNumber(r?.newPrice || "") : null;
kind === "price_down" || kind === "price_up" const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum) ? agg.cheapestPriceNum : null;
? parsePriceToNumber(r?.newPrice || "")
: null;
const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum)
? agg.cheapestPriceNum
: null;
const EPS = 0.01; const EPS = 0.01;
const priceMatchesBest = const priceMatchesBest =
Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum) Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum)
? Math.abs(newPriceNum - bestPriceNum) <= EPS ? Math.abs(newPriceNum - bestPriceNum) <= EPS
: false; : false;
const storeIsBest = const storeIsBest =
normStore(storeLabelRaw) && normStore(storeLabelRaw) && normStore(bestStoreRaw) && normStore(storeLabelRaw) === normStore(bestStoreRaw);
normStore(bestStoreRaw) &&
normStore(storeLabelRaw) === normStore(bestStoreRaw);
const saleIsCheapestHere = const saleIsCheapestHere = kind === "price_down" && storeIsBest && priceMatchesBest;
kind === "price_down" && storeIsBest && priceMatchesBest; const saleIsTiedCheapest = kind === "price_down" && !storeIsBest && priceMatchesBest;
const saleIsTiedCheapest = const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
kind === "price_down" && !storeIsBest && priceMatchesBest;
const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
// Bucketed scoring (higher = earlier) // Bucketed scoring (higher = earlier)
let score = 0; let score = 0;
function saleBucketScore(isCheapest, pct) { function saleBucketScore(isCheapest, pct) {
const p = Number.isFinite(pct) ? pct : 0; const p = Number.isFinite(pct) ? pct : 0;
if (isCheapest) { if (isCheapest) {
if (p >= 20) return 9000 + p; if (p >= 20) return 9000 + p;
if (p >= 10) return 7000 + p; if (p >= 10) return 7000 + p;
if (p > 0) return 6000 + p; if (p > 0) return 6000 + p;
return 5900; return 5900;
} else { } else {
if (p >= 20) return 4500 + p; if (p >= 20) return 4500 + p;
if (p >= 10) return 1500 + p; if (p >= 10) return 1500 + p;
if (p > 0) return 1200 + p; if (p > 0) return 1200 + p;
return 1000; return 1000;
} }
} }
if (kind === "price_down") { if (kind === "price_down") {
score = saleBucketScore(saleIsCheapest, pctOff); score = saleBucketScore(saleIsCheapest, pctOff);
} else if (isNewUnique) { } else if (isNewUnique) {
score = 8000; score = 8000;
} else if (kind === "removed") { } else if (kind === "removed") {
score = 3000; score = 3000;
} else if (kind === "price_up") { } else if (kind === "price_up") {
score = 2000 + Math.min(99, Math.max(0, pctUp || 0)); score = 2000 + Math.min(99, Math.max(0, pctUp || 0));
} else if (kind === "new") { } else if (kind === "new") {
score = 1100; score = 1100;
} else if (kind === "restored") { } else if (kind === "restored") {
score = 5000; score = 5000;
} else { } else {
score = 0; score = 0;
} }
let tie = 0; let tie = 0;
if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r); if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r);
else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r); else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r);
else tie = tsValue(r); else tie = tsValue(r);
return { sku, kind, pctOff, storeCount, isNewUnique, score, tie }; return { sku, kind, pctOff, storeCount, isNewUnique, score, tie };
} }
function renderRecent(recent, canonicalSkuFn) { function renderRecent(recent, canonicalSkuFn) {
const items = Array.isArray(recent?.items) ? recent.items : []; const items = Array.isArray(recent?.items) ? recent.items : [];
if (!items.length) { if (!items.length) {
$results.innerHTML = `<div class="small">Type to search…</div>`; $results.innerHTML = `<div class="small">Type to search…</div>`;
return; return;
} }
const canon = const canon = typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
const nowMs = Date.now(); const nowMs = Date.now();
const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000; const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000;
function eventMs(r) { function eventMs(r) {
const t = String(r?.ts || ""); const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN; const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms; if (Number.isFinite(ms)) return ms;
const d = String(r?.date || ""); const d = String(r?.date || "");
const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN; const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN;
return Number.isFinite(ms2) ? ms2 : 0; return Number.isFinite(ms2) ? ms2 : 0;
} }
const inWindow = items.filter((r) => { const inWindow = items.filter((r) => {
const ms = eventMs(r); const ms = eventMs(r);
return ms >= cutoffMs && ms <= nowMs; return ms >= cutoffMs && ms <= nowMs;
}); });
if (!inWindow.length) { if (!inWindow.length) {
$results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`; $results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`;
return; return;
} }
const bySkuStore = new Map(); const bySkuStore = new Map();
for (const r of inWindow) { for (const r of inWindow) {
const rawSku = String(r?.sku || "").trim(); const rawSku = String(r?.sku || "").trim();
if (!rawSku) continue; if (!rawSku) continue;
const sku = String(canon(rawSku) || "").trim(); const sku = String(canon(rawSku) || "").trim();
if (!sku) continue; if (!sku) continue;
const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store"; const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store";
const ms = eventMs(r); const ms = eventMs(r);
let storeMap = bySkuStore.get(sku); let storeMap = bySkuStore.get(sku);
if (!storeMap) bySkuStore.set(sku, (storeMap = new Map())); if (!storeMap) bySkuStore.set(sku, (storeMap = new Map()));
const prev = storeMap.get(storeLabel); const prev = storeMap.get(storeLabel);
if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r); if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r);
} }
const picked = []; const picked = [];
for (const [sku, storeMap] of bySkuStore.entries()) { for (const [sku, storeMap] of bySkuStore.entries()) {
let best = null; let best = null;
for (const r of storeMap.values()) { for (const r of storeMap.values()) {
const meta = rankRecent(r, canon); const meta = rankRecent(r, canon);
const ms = eventMs(r); const ms = eventMs(r);
if ( if (
!best || !best ||
meta.score > best.meta.score || meta.score > best.meta.score ||
(meta.score === best.meta.score && meta.tie > best.meta.tie) || (meta.score === best.meta.score && meta.tie > best.meta.tie) ||
(meta.score === best.meta.score && (meta.score === best.meta.score && meta.tie === best.meta.tie && ms > best.ms)
meta.tie === best.meta.tie && ) {
ms > best.ms) best = { r, meta, ms };
) { }
best = { r, meta, ms }; }
}
}
if (best) picked.push(best); if (best) picked.push(best);
} }
const ranked = picked.sort((a, b) => { const ranked = picked.sort((a, b) => {
if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score; if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score;
if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie; if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie;
return String(a.meta.sku || "").localeCompare(String(b.meta.sku || "")); return String(a.meta.sku || "").localeCompare(String(b.meta.sku || ""));
}); });
const limited = ranked.slice(0, 140); const limited = ranked.slice(0, 140);
$results.innerHTML = $results.innerHTML =
`<div class="small">Recently changed (last 3 days):</div>` + `<div class="small">Recently changed (last 3 days):</div>` +
limited limited
.map(({ r, meta }) => { .map(({ r, meta }) => {
const kindLabel = const kindLabel =
meta.kind === "new" meta.kind === "new"
? "NEW" ? "NEW"
: meta.kind === "restored" : meta.kind === "restored"
? "RESTORED" ? "RESTORED"
: meta.kind === "removed" : meta.kind === "removed"
? "REMOVED" ? "REMOVED"
: meta.kind === "price_down" : meta.kind === "price_down"
? "PRICE ↓" ? "PRICE ↓"
: meta.kind === "price_up" : meta.kind === "price_up"
? "PRICE ↑" ? "PRICE ↑"
: meta.kind === "price_change" : meta.kind === "price_change"
? "PRICE" ? "PRICE"
: "CHANGE"; : "CHANGE";
const priceLine = const priceLine =
meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed" meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed"
? `${esc(r.price || "")}` ? `${esc(r.price || "")}`
: `${esc(r.oldPrice || "")}${esc(r.newPrice || "")}`; : `${esc(r.oldPrice || "")}${esc(r.newPrice || "")}`;
const when = r.ts ? prettyTs(r.ts) : r.date || ""; const when = r.ts ? prettyTs(r.ts) : r.date || "";
const sku = meta.sku; // canonical SKU const sku = meta.sku; // canonical SKU
const agg = aggBySku.get(sku) || null; const agg = aggBySku.get(sku) || null;
const img = agg?.img || ""; const img = agg?.img || "";
const storeCount = agg?.stores?.size || 0; const storeCount = agg?.stores?.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : ""; const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const href = String(r.url || "").trim(); const href = String(r.url || "").trim();
const storeBadge = href const storeBadge = href
? `<a class="badge" href="${esc( ? `<a class="badge" href="${esc(
href href,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc( )}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
(r.storeLabel || r.store || "") + plus (r.storeLabel || r.store || "") + plus,
)}</a>` )}</a>`
: `<span class="badge">${esc( : `<span class="badge">${esc((r.storeLabel || r.store || "") + plus)}</span>`;
(r.storeLabel || r.store || "") + plus
)}</span>`;
const dateBadge = when const dateBadge = when ? `<span class="badge mono">${esc(when)}</span>` : "";
? `<span class="badge mono">${esc(when)}</span>`
: "";
const offBadge = const offBadge =
meta.kind === "price_down" && meta.pctOff !== null meta.kind === "price_down" && meta.pctOff !== null
? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc( ? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc(
meta.pctOff meta.pctOff,
)}% Off]</span>` )}% Off]</span>`
: ""; : "";
const kindBadgeStyle = const kindBadgeStyle =
meta.kind === "new" && meta.isNewUnique meta.kind === "new" && meta.isNewUnique
? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"` ? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"`
: ""; : "";
const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`; const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`;
return ` return `
<div class="item" data-sku="${esc(sku)}"> <div class="item" data-sku="${esc(sku)}">
<div class="itemRow"> <div class="itemRow">
<div class="thumbBox"> <div class="thumbBox">
@ -491,10 +457,10 @@ export function renderSearch($app) {
<div class="itemTop"> <div class="itemTop">
<div class="itemName">${esc(r.name || "(no name)")}</div> <div class="itemName">${esc(r.name || "(no name)")}</div>
<a class="badge mono skuLink" href="${esc( <a class="badge mono skuLink" href="${esc(
skuLink skuLink,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc( )}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(sku) displaySku(sku),
)}</a> )}</a>
</div> </div>
<div class="metaRow"> <div class="metaRow">
<span class="badge"${kindBadgeStyle}>${esc(kindLabel)}</span> <span class="badge"${kindBadgeStyle}>${esc(kindLabel)}</span>
@ -507,113 +473,103 @@ export function renderSearch($app) {
</div> </div>
</div> </div>
`; `;
}) })
.join(""); .join("");
for (const el of Array.from($results.querySelectorAll(".item"))) { for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => { el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || ""; const sku = el.getAttribute("data-sku") || "";
if (!sku) return; if (!sku) return;
saveQuery($q.value); saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash); sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`; location.hash = `#/item/${encodeURIComponent(sku)}`;
}); });
} }
} }
function applySearch() { function applySearch() {
if (!indexReady) return; if (!indexReady) return;
const tokens = tokenizeQuery($q.value); const tokens = tokenizeQuery($q.value);
if (!tokens.length) return; if (!tokens.length) return;
const matches = allAgg.filter((it) => const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens));
matchesAllTokens(it.searchText, tokens)
);
const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value)); const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value));
if (!wantCodes.size) { if (!wantCodes.size) {
renderAggregates(matches); renderAggregates(matches);
return; return;
} }
const seen = new Set(matches.map((it) => String(it?.sku || ""))); const seen = new Set(matches.map((it) => String(it?.sku || "")));
const extra = []; const extra = [];
for (const it of allAgg) { for (const it of allAgg) {
const sku = String(it?.sku || ""); const sku = String(it?.sku || "");
if (!sku || seen.has(sku)) continue; if (!sku || seen.has(sku)) continue;
const dCode = smwsDistilleryCodeFromName(it?.name || ""); const dCode = smwsDistilleryCodeFromName(it?.name || "");
if (dCode && wantCodes.has(String(dCode))) { if (dCode && wantCodes.has(String(dCode))) {
extra.push(it); extra.push(it);
seen.add(sku); seen.add(sku);
} }
} }
renderAggregates([...extra, ...matches]); renderAggregates([...extra, ...matches]);
} }
$results.innerHTML = `<div class="small">Loading index…</div>`; $results.innerHTML = `<div class="small">Loading index…</div>`;
Promise.all([loadIndex(), loadSkuRules()]) Promise.all([loadIndex(), loadSkuRules()])
.then(([idx, rules]) => { .then(([idx, rules]) => {
const listings = Array.isArray(idx.items) ? idx.items : []; const listings = Array.isArray(idx.items) ? idx.items : [];
renderStoreButtons(listings); renderStoreButtons(listings);
allAgg = aggregateBySku(listings, rules.canonicalSku); allAgg = aggregateBySku(listings, rules.canonicalSku);
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x])); aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku); URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku);
indexReady = true; indexReady = true;
$q.focus(); $q.focus();
const tokens = tokenizeQuery($q.value); const tokens = tokenizeQuery($q.value);
if (tokens.length) { if (tokens.length) {
applySearch(); applySearch();
} else { } else {
return loadRecent().then((recent) => return loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku));
renderRecent(recent, rules.canonicalSku) }
); })
} .catch((e) => {
}) $results.innerHTML = `<div class="small">Failed to load: ${esc(e.message)}</div>`;
.catch((e) => { });
$results.innerHTML = `<div class="small">Failed to load: ${esc(
e.message
)}</div>`;
});
$clearSearch.addEventListener("click", () => { $clearSearch.addEventListener("click", () => {
if ($q.value) { if ($q.value) {
$q.value = ""; $q.value = "";
saveQuery(""); saveQuery("");
} }
loadSkuRules() loadSkuRules()
.then((rules) => .then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)) .catch(() => {
) $results.innerHTML = `<div class="small">Type to search…</div>`;
.catch(() => { });
$results.innerHTML = `<div class="small">Type to search…</div>`; $q.focus();
}); });
$q.focus();
});
let t = null; let t = null;
$q.addEventListener("input", () => { $q.addEventListener("input", () => {
saveQuery($q.value); saveQuery($q.value);
if (t) clearTimeout(t); if (t) clearTimeout(t);
t = setTimeout(() => { t = setTimeout(() => {
const tokens = tokenizeQuery($q.value); const tokens = tokenizeQuery($q.value);
if (!tokens.length) { if (!tokens.length) {
loadSkuRules() loadSkuRules()
.then((rules) => .then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)) .catch(() => {
) $results.innerHTML = `<div class="small">Type to search…</div>`;
.catch(() => { });
$results.innerHTML = `<div class="small">Type to search…</div>`; return;
}); }
return; applySearch();
} }, 50);
applySearch(); });
}, 50);
});
} }

View file

@ -1,60 +1,59 @@
export function parsePriceToNumber(v) { export function parsePriceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, ""); const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s); const n = Number(s);
return Number.isFinite(n) ? n : null; return Number.isFinite(n) ? n : null;
} }
export function fnv1a32(str) { export function fnv1a32(str) {
let h = 0x811c9dc5; let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i); h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193); h = Math.imul(h, 0x01000193);
} }
return (h >>> 0).toString(16).padStart(8, "0"); return (h >>> 0).toString(16).padStart(8, "0");
} }
export function makeSyntheticSku(r) { export function makeSyntheticSku(r) {
const store = String(r?.storeLabel || r?.store || "store"); const store = String(r?.storeLabel || r?.store || "store");
const url = String(r?.url || ""); const url = String(r?.url || "");
const key = `${store}|${url}`; const key = `${store}|${url}`;
return `u:${fnv1a32(key)}`; return `u:${fnv1a32(key)}`;
} }
export function keySkuForRow(r) { export function keySkuForRow(r) {
const real0 = String(r?.sku || "").trim(); const real0 = String(r?.sku || "").trim();
if (real0) { if (real0) {
const m = real0.match(/^id:(\d{1,6})$/i); const m = real0.match(/^id:(\d{1,6})$/i);
return m ? String(m[1]).padStart(6, "0") : real0; return m ? String(m[1]).padStart(6, "0") : real0;
} }
return makeSyntheticSku(r); return makeSyntheticSku(r);
} }
export function displaySku(key) { export function displaySku(key) {
const s = String(key || ""); const s = String(key || "");
return s.startsWith("u:") ? "unknown" : s; return s.startsWith("u:") ? "unknown" : s;
} }
export function isUnknownSkuKey(key) { export function isUnknownSkuKey(key) {
return String(key || "").startsWith("u:"); return String(key || "").startsWith("u:");
} }
// Normalize for search: lowercase, punctuation -> space, collapse spaces // Normalize for search: lowercase, punctuation -> space, collapse spaces
export function normSearchText(s) { export function normSearchText(s) {
return String(s ?? "") return String(s ?? "")
.toLowerCase() .toLowerCase()
.replace(/[^a-z0-9]+/g, " ") .replace(/[^a-z0-9]+/g, " ")
.replace(/\s+/g, " ") .replace(/\s+/g, " ")
.trim(); .trim();
} }
export function tokenizeQuery(q) { export function tokenizeQuery(q) {
const n = normSearchText(q); const n = normSearchText(q);
return n ? n.split(" ").filter(Boolean) : []; return n ? n.split(" ").filter(Boolean) : [];
} }
export function matchesAllTokens(hayNorm, tokens) {
if (!tokens.length) return true;
for (const t of tokens) if (!hayNorm.includes(t)) return false;
return true;
}
export function matchesAllTokens(hayNorm, tokens) {
if (!tokens.length) return true;
for (const t of tokens) if (!hayNorm.includes(t)) return false;
return true;
}

View file

@ -2,196 +2,196 @@
import { normSearchText } from "./sku.js"; import { normSearchText } from "./sku.js";
const DISTILLERIES = [ const DISTILLERIES = [
{ code: "1", name: "Glenfarclas" }, { code: "1", name: "Glenfarclas" },
{ code: "2", name: "Glenlivet" }, { code: "2", name: "Glenlivet" },
{ code: "3", name: "Bowmore" }, { code: "3", name: "Bowmore" },
{ code: "4", name: "Highland Park" }, { code: "4", name: "Highland Park" },
{ code: "5", name: "Auchentoshan" }, { code: "5", name: "Auchentoshan" },
{ code: "6", name: "Macduff" }, { code: "6", name: "Macduff" },
{ code: "7", name: "Longmorn" }, { code: "7", name: "Longmorn" },
{ code: "8", name: "Tamdhu" }, { code: "8", name: "Tamdhu" },
{ code: "9", name: "Glen Grant" }, { code: "9", name: "Glen Grant" },
{ code: "10", name: "Bunnahabhain" }, { code: "10", name: "Bunnahabhain" },
{ code: "11", name: "Tomatin" }, { code: "11", name: "Tomatin" },
{ code: "12", name: "BenRiach" }, { code: "12", name: "BenRiach" },
{ code: "13", name: "Dalmore" }, { code: "13", name: "Dalmore" },
{ code: "14", name: "Talisker" }, { code: "14", name: "Talisker" },
{ code: "15", name: "Glenfiddich" }, { code: "15", name: "Glenfiddich" },
{ code: "16", name: "Glenturret" }, { code: "16", name: "Glenturret" },
{ code: "17", name: "Scapa" }, { code: "17", name: "Scapa" },
{ code: "18", name: "Inchgower" }, { code: "18", name: "Inchgower" },
{ code: "19", name: "Glen Garioch" }, { code: "19", name: "Glen Garioch" },
{ code: "20", name: "Inverleven" }, { code: "20", name: "Inverleven" },
{ code: "21", name: "Glenglassaugh" }, { code: "21", name: "Glenglassaugh" },
{ code: "22", name: "Glenkinchie" }, { code: "22", name: "Glenkinchie" },
{ code: "23", name: "Bruichladdich" }, { code: "23", name: "Bruichladdich" },
{ code: "24", name: "Macallan" }, { code: "24", name: "Macallan" },
{ code: "25", name: "Rosebank" }, { code: "25", name: "Rosebank" },
{ code: "26", name: "Clynelish" }, { code: "26", name: "Clynelish" },
{ code: "27", name: "Springbank" }, { code: "27", name: "Springbank" },
{ code: "28", name: "Tullibardine" }, { code: "28", name: "Tullibardine" },
{ code: "29", name: "Laphroaig" }, { code: "29", name: "Laphroaig" },
{ code: "30", name: "Glenrothes" }, { code: "30", name: "Glenrothes" },
{ code: "31", name: "Isle of Jura" }, { code: "31", name: "Isle of Jura" },
{ code: "32", name: "Edradour" }, { code: "32", name: "Edradour" },
{ code: "33", name: "Ardbeg" }, { code: "33", name: "Ardbeg" },
{ code: "34", name: "Tamnavulin" }, { code: "34", name: "Tamnavulin" },
{ code: "35", name: "Glen Moray" }, { code: "35", name: "Glen Moray" },
{ code: "36", name: "Benrinnes" }, { code: "36", name: "Benrinnes" },
{ code: "37", name: "Cragganmore" }, { code: "37", name: "Cragganmore" },
{ code: "38", name: "Caperdonich" }, { code: "38", name: "Caperdonich" },
{ code: "39", name: "Linkwood" }, { code: "39", name: "Linkwood" },
{ code: "40", name: "Balvenie" }, { code: "40", name: "Balvenie" },
{ code: "41", name: "Dailuaine" }, { code: "41", name: "Dailuaine" },
{ code: "42", name: "Tobermory" }, { code: "42", name: "Tobermory" },
{ code: "43", name: "Port Ellen" }, { code: "43", name: "Port Ellen" },
{ code: "44", name: "Craigellachie" }, { code: "44", name: "Craigellachie" },
{ code: "45", name: "Dallas Dhu" }, { code: "45", name: "Dallas Dhu" },
{ code: "46", name: "Glenlossie" }, { code: "46", name: "Glenlossie" },
{ code: "47", name: "Benromach" }, { code: "47", name: "Benromach" },
{ code: "48", name: "Balmenach" }, { code: "48", name: "Balmenach" },
{ code: "49", name: "St. Magdalene" }, { code: "49", name: "St. Magdalene" },
{ code: "50", name: "Bladnoch" }, { code: "50", name: "Bladnoch" },
{ code: "51", name: "Bushmills" }, { code: "51", name: "Bushmills" },
{ code: "52", name: "Old Pulteney" }, { code: "52", name: "Old Pulteney" },
{ code: "53", name: "Caol Ila" }, { code: "53", name: "Caol Ila" },
{ code: "54", name: "Aberlour" }, { code: "54", name: "Aberlour" },
{ code: "55", name: "Royal Brackla" }, { code: "55", name: "Royal Brackla" },
{ code: "56", name: "Coleburn" }, { code: "56", name: "Coleburn" },
{ code: "57", name: "Glen Mhor" }, { code: "57", name: "Glen Mhor" },
{ code: "58", name: "Strathisla" }, { code: "58", name: "Strathisla" },
{ code: "59", name: "Teaninich" }, { code: "59", name: "Teaninich" },
{ code: "60", name: "Aberfeldy" }, { code: "60", name: "Aberfeldy" },
{ code: "61", name: "Brora" }, { code: "61", name: "Brora" },
{ code: "62", name: "Glenlochy" }, { code: "62", name: "Glenlochy" },
{ code: "63", name: "Glentauchers" }, { code: "63", name: "Glentauchers" },
{ code: "64", name: "Mannochmore" }, { code: "64", name: "Mannochmore" },
{ code: "65", name: "Imperial" }, { code: "65", name: "Imperial" },
{ code: "66", name: "Ardmore" }, { code: "66", name: "Ardmore" },
{ code: "67", name: "Banff" }, { code: "67", name: "Banff" },
{ code: "68", name: "Blair Athol" }, { code: "68", name: "Blair Athol" },
{ code: "69", name: "Glen Albyn" }, { code: "69", name: "Glen Albyn" },
{ code: "70", name: "Balblair" }, { code: "70", name: "Balblair" },
{ code: "71", name: "Glenburgie" }, { code: "71", name: "Glenburgie" },
{ code: "72", name: "Miltonduff" }, { code: "72", name: "Miltonduff" },
{ code: "73", name: "Aultmore" }, { code: "73", name: "Aultmore" },
{ code: "74", name: "North Port" }, { code: "74", name: "North Port" },
{ code: "75", name: "Glenury / Glenury Royal" }, { code: "75", name: "Glenury / Glenury Royal" },
{ code: "76", name: "Mortlach" }, { code: "76", name: "Mortlach" },
{ code: "77", name: "Glen Ord" }, { code: "77", name: "Glen Ord" },
{ code: "78", name: "Ben Nevis" }, { code: "78", name: "Ben Nevis" },
{ code: "79", name: "Deanston" }, { code: "79", name: "Deanston" },
{ code: "80", name: "Glen Spey" }, { code: "80", name: "Glen Spey" },
{ code: "81", name: "Glen Keith" }, { code: "81", name: "Glen Keith" },
{ code: "82", name: "Glencadam" }, { code: "82", name: "Glencadam" },
{ code: "83", name: "Convalmore" }, { code: "83", name: "Convalmore" },
{ code: "84", name: "Glendullan" }, { code: "84", name: "Glendullan" },
{ code: "85", name: "Glen Elgin" }, { code: "85", name: "Glen Elgin" },
{ code: "86", name: "Glenesk" }, { code: "86", name: "Glenesk" },
{ code: "87", name: "Millburn" }, { code: "87", name: "Millburn" },
{ code: "88", name: "Speyburn" }, { code: "88", name: "Speyburn" },
{ code: "89", name: "Tomintoul" }, { code: "89", name: "Tomintoul" },
{ code: "90", name: "Pittyvaich" }, { code: "90", name: "Pittyvaich" },
{ code: "91", name: "Dufftown" }, { code: "91", name: "Dufftown" },
{ code: "92", name: "Lochside" }, { code: "92", name: "Lochside" },
{ code: "93", name: "Glen Scotia" }, { code: "93", name: "Glen Scotia" },
{ code: "94", name: "Fettercairn" }, { code: "94", name: "Fettercairn" },
{ code: "95", name: "Auchroisk" }, { code: "95", name: "Auchroisk" },
{ code: "96", name: "GlenDronach" }, { code: "96", name: "GlenDronach" },
{ code: "97", name: "Littlemill" }, { code: "97", name: "Littlemill" },
{ code: "98", name: "Inverleven" }, { code: "98", name: "Inverleven" },
{ code: "99", name: "Glenugie" }, { code: "99", name: "Glenugie" },
{ code: "100", name: "Strathmill" }, { code: "100", name: "Strathmill" },
{ code: "101", name: "Knockando" }, { code: "101", name: "Knockando" },
{ code: "102", name: "Dalwhinnie" }, { code: "102", name: "Dalwhinnie" },
{ code: "103", name: "Royal Lochnagar" }, { code: "103", name: "Royal Lochnagar" },
{ code: "104", name: "Glenburgie (Glencraig)" }, { code: "104", name: "Glenburgie (Glencraig)" },
{ code: "105", name: "Tormore" }, { code: "105", name: "Tormore" },
{ code: "106", name: "Cardhu" }, { code: "106", name: "Cardhu" },
{ code: "107", name: "Glenallachie" }, { code: "107", name: "Glenallachie" },
{ code: "108", name: "Allt-a-Bhainne" }, { code: "108", name: "Allt-a-Bhainne" },
{ code: "109", name: "Miltonduff (Mosstowie)" }, { code: "109", name: "Miltonduff (Mosstowie)" },
{ code: "110", name: "Oban" }, { code: "110", name: "Oban" },
{ code: "111", name: "Lagavulin" }, { code: "111", name: "Lagavulin" },
{ code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" }, { code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" },
{ code: "113", name: "Braeval (Braes of Glenlivet)" }, { code: "113", name: "Braeval (Braes of Glenlivet)" },
{ code: "114", name: "Springbank (Longrow)" }, { code: "114", name: "Springbank (Longrow)" },
{ code: "115", name: "Knockdhu (AnCnoc)" }, { code: "115", name: "Knockdhu (AnCnoc)" },
{ code: "116", name: "Yoichi" }, { code: "116", name: "Yoichi" },
{ code: "117", name: "Cooley (Unpeated)" }, { code: "117", name: "Cooley (Unpeated)" },
{ code: "118", name: "Cooley / Connemara (Peated)" }, { code: "118", name: "Cooley / Connemara (Peated)" },
{ code: "119", name: "Yamazaki" }, { code: "119", name: "Yamazaki" },
{ code: "120", name: "Hakushu" }, { code: "120", name: "Hakushu" },
{ code: "121", name: "Isle of Arran" }, { code: "121", name: "Isle of Arran" },
{ code: "122", name: "Loch Lomond (Croftengea)" }, { code: "122", name: "Loch Lomond (Croftengea)" },
{ code: "123", name: "Glengoyne" }, { code: "123", name: "Glengoyne" },
{ code: "124", name: "Miyagikyo" }, { code: "124", name: "Miyagikyo" },
{ code: "125", name: "Glenmorangie" }, { code: "125", name: "Glenmorangie" },
{ code: "126", name: "Springbank (Hazelburn)" }, { code: "126", name: "Springbank (Hazelburn)" },
{ code: "127", name: "Bruichladdich (Port Charlotte)" }, { code: "127", name: "Bruichladdich (Port Charlotte)" },
{ code: "128", name: "Penderyn" }, { code: "128", name: "Penderyn" },
{ code: "129", name: "Kilchoman" }, { code: "129", name: "Kilchoman" },
{ code: "130", name: "Chichibu" }, { code: "130", name: "Chichibu" },
{ code: "131", name: "Hanyu" }, { code: "131", name: "Hanyu" },
{ code: "132", name: "Karuizawa" }, { code: "132", name: "Karuizawa" },
{ code: "133", name: "Westland" }, { code: "133", name: "Westland" },
{ code: "134", name: "Paul John" }, { code: "134", name: "Paul John" },
{ code: "135", name: "Loch Lomond" }, { code: "135", name: "Loch Lomond" },
{ code: "136", name: "Eden Mill" }, { code: "136", name: "Eden Mill" },
{ code: "137", name: "St. Georges (The English Whisky Co.)" }, { code: "137", name: "St. Georges (The English Whisky Co.)" },
{ code: "138", name: "Nantou" }, { code: "138", name: "Nantou" },
{ code: "139", name: "Kavalan" }, { code: "139", name: "Kavalan" },
{ code: "140", name: "Balcones" }, { code: "140", name: "Balcones" },
{ code: "141", name: "Fary Lochan" }, { code: "141", name: "Fary Lochan" },
{ code: "142", name: "Breuckelen Distilling" }, { code: "142", name: "Breuckelen Distilling" },
{ code: "143", name: "Copperworks Distilling Co." }, { code: "143", name: "Copperworks Distilling Co." },
{ code: "144", name: "High Coast Distillery" }, { code: "144", name: "High Coast Distillery" },
{ code: "145", name: "Smögen Whisky" }, { code: "145", name: "Smögen Whisky" },
{ code: "146", name: "Cotswolds" }, { code: "146", name: "Cotswolds" },
{ code: "147", name: "Archie Rose" }, { code: "147", name: "Archie Rose" },
{ code: "148", name: "Starward" }, { code: "148", name: "Starward" },
{ code: "149", name: "Ardnamurchan" }, { code: "149", name: "Ardnamurchan" },
{ code: "150", name: "West Cork Distillers" }, { code: "150", name: "West Cork Distillers" },
{ code: "151", name: "Mackmyra" }, { code: "151", name: "Mackmyra" },
{ code: "152", name: "Shelter Point" }, { code: "152", name: "Shelter Point" },
{ code: "153", name: "Thy Whisky" }, { code: "153", name: "Thy Whisky" },
{ code: "154", name: "Mosgaard Whisky" }, { code: "154", name: "Mosgaard Whisky" },
{ code: "155", name: "Milk & Honey Distillery" }, { code: "155", name: "Milk & Honey Distillery" },
{ code: "156", name: "Glasgow Distillery" }, { code: "156", name: "Glasgow Distillery" },
{ code: "157", name: "Distillerie de Warenghem" }, { code: "157", name: "Distillerie de Warenghem" },
{ code: "158", name: "Yuza Distillery" }, { code: "158", name: "Yuza Distillery" },
{ code: "159", name: "Mars Shinshu" }, { code: "159", name: "Mars Shinshu" },
{ code: "160", name: "Mars Tsunuki" }, { code: "160", name: "Mars Tsunuki" },
{ code: "161", name: "Nc'nean Distillery" }, { code: "161", name: "Nc'nean Distillery" },
{ code: "162", name: "Isle of Raasay" }, { code: "162", name: "Isle of Raasay" },
{ code: "163", name: "Isle of Harris Distillery" }, { code: "163", name: "Isle of Harris Distillery" },
{ code: "164", name: "Penderyn" }, { code: "164", name: "Penderyn" },
{ code: "165", name: "Wolfburn" }, { code: "165", name: "Wolfburn" },
]; ];
const DIST = DISTILLERIES.map((d) => ({ const DIST = DISTILLERIES.map((d) => ({
code: String(d.code), code: String(d.code),
nameNorm: normSearchText(d.name), nameNorm: normSearchText(d.name),
})); }));
const SMWS_WORD_RE = /\bsmws\b/i; const SMWS_WORD_RE = /\bsmws\b/i;
const SMWS_CODE_RE = /\b(\d{1,3})\.(\d{1,4})\b/; const SMWS_CODE_RE = /\b(\d{1,3})\.(\d{1,4})\b/;
export function smwsDistilleryCodesForQueryPrefix(qRaw) { export function smwsDistilleryCodesForQueryPrefix(qRaw) {
const q = normSearchText(qRaw); const q = normSearchText(qRaw);
if (!q || q.length < 2) return []; if (!q || q.length < 2) return [];
const out = new Set(); const out = new Set();
for (const d of DIST) { for (const d of DIST) {
if (d.nameNorm.startsWith(q)) out.add(d.code); if (d.nameNorm.startsWith(q)) out.add(d.code);
} }
return Array.from(out); return Array.from(out);
} }
// If a listing name contains "SMWS" and an SMWS code like 35.123, returns "35" (distillery code). // If a listing name contains "SMWS" and an SMWS code like 35.123, returns "35" (distillery code).
export function smwsDistilleryCodeFromName(name) { export function smwsDistilleryCodeFromName(name) {
const s = String(name || ""); const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return ""; if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE); const m = s.match(SMWS_CODE_RE);
return m ? String(m[1] || "") : ""; return m ? String(m[1] || "") : "";
} }

View file

@ -4,34 +4,34 @@ let INDEX = null;
let RECENT = null; let RECENT = null;
export async function loadIndex() { export async function loadIndex() {
if (INDEX) return INDEX; if (INDEX) return INDEX;
INDEX = await fetchJson("./data/index.json"); INDEX = await fetchJson("./data/index.json");
return INDEX; return INDEX;
} }
export async function loadRecent() { export async function loadRecent() {
if (RECENT) return RECENT; if (RECENT) return RECENT;
try { try {
RECENT = await fetchJson("./data/recent.json"); RECENT = await fetchJson("./data/recent.json");
} catch { } catch {
RECENT = { count: 0, items: [] }; RECENT = { count: 0, items: [] };
} }
return RECENT; return RECENT;
} }
// persist search box value across navigation // persist search box value across navigation
const Q_LS_KEY = "stviz:v1:search:q"; const Q_LS_KEY = "stviz:v1:search:q";
export function loadSavedQuery() { export function loadSavedQuery() {
try { try {
return localStorage.getItem(Q_LS_KEY) || ""; return localStorage.getItem(Q_LS_KEY) || "";
} catch { } catch {
return ""; return "";
} }
} }
export function saveQuery(v) { export function saveQuery(v) {
try { try {
localStorage.setItem(Q_LS_KEY, String(v ?? "")); localStorage.setItem(Q_LS_KEY, String(v ?? ""));
} catch {} } catch {}
} }

File diff suppressed because it is too large Load diff

View file

@ -1,184 +1,213 @@
function normalizeId(s) { function normalizeId(s) {
return String(s || "").toLowerCase().replace(/[^a-z0-9]+/g, ""); return String(s || "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, "");
} }
// Map normalized store *labels* to canonical ids used by OVERRIDES // Map normalized store *labels* to canonical ids used by OVERRIDES
const ALIASES = { const ALIASES = {
strathliquor: "strath", strathliquor: "strath",
vesselliquor: "vessel", vesselliquor: "vessel",
tudorhouse: "tudor", tudorhouse: "tudor",
coopworldofwhisky: "coop", coopworldofwhisky: "coop",
kensingtonwinemarket: "kensingtonwinemarket", kensingtonwinemarket: "kensingtonwinemarket",
gullliquor: "gullliquor", gullliquor: "gullliquor",
legacyliquor: "legacyliquor", legacyliquor: "legacyliquor",
vintagespirits: "vintagespirits", vintagespirits: "vintagespirits",
kegncork: "kegncork", kegncork: "kegncork",
// short forms // short forms
gull: "gullliquor", gull: "gullliquor",
legacy: "legacyliquor", legacy: "legacyliquor",
vintage: "vintagespirits", vintage: "vintagespirits",
kwm: "kensingtonwinemarket", kwm: "kensingtonwinemarket",
}; };
// Your pinned colors // Your pinned colors
const OVERRIDES = { const OVERRIDES = {
strath: "#76B7FF", strath: "#76B7FF",
bsw: "#E9DF7A", bsw: "#E9DF7A",
kensingtonwinemarket: "#F2C200", kensingtonwinemarket: "#F2C200",
vessel: "#FFFFFF", vessel: "#FFFFFF",
gullliquor: "#6B0F1A", gullliquor: "#6B0F1A",
kegncork: "#111111", kegncork: "#111111",
legacyliquor: "#7B4A12", legacyliquor: "#7B4A12",
vintagespirits: "#E34A2C", vintagespirits: "#E34A2C",
craftcellars: "#E31B23", craftcellars: "#E31B23",
maltsandgrains: "#A67C52", maltsandgrains: "#A67C52",
// aliases // aliases
gull: "#6B0F1A", gull: "#6B0F1A",
legacy: "#7B4A12", legacy: "#7B4A12",
vintage: "#E34A2C", vintage: "#E34A2C",
kwm: "#F2C200", kwm: "#F2C200",
}; };
// High-contrast qualitative palette // High-contrast qualitative palette
const PALETTE = [ const PALETTE = [
"#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#1F77B4",
"#8C564B", "#E377C2", "#7F7F7F", "#17BECF", "#BCBD22", "#FF7F0E",
"#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#2CA02C",
"#C49C94", "#F7B6D2", "#C7C7C7", "#9EDAE5", "#DBDB8D", "#D62728",
"#393B79", "#637939", "#8C6D31", "#843C39", "#7B4173", "#9467BD",
"#3182BD", "#31A354", "#756BB1", "#636363", "#E6550D", "#8C564B",
"#E377C2",
"#7F7F7F",
"#17BECF",
"#BCBD22",
"#AEC7E8",
"#FFBB78",
"#98DF8A",
"#FF9896",
"#C5B0D5",
"#C49C94",
"#F7B6D2",
"#C7C7C7",
"#9EDAE5",
"#DBDB8D",
"#393B79",
"#637939",
"#8C6D31",
"#843C39",
"#7B4173",
"#3182BD",
"#31A354",
"#756BB1",
"#636363",
"#E6550D",
]; ];
function uniq(arr) { function uniq(arr) {
return [...new Set(arr)]; return [...new Set(arr)];
} }
function canonicalId(s) { function canonicalId(s) {
const id = normalizeId(s); const id = normalizeId(s);
return ALIASES[id] || id; return ALIASES[id] || id;
} }
function buildUniverse(base, extra) { function buildUniverse(base, extra) {
const a = Array.isArray(base) ? base : []; const a = Array.isArray(base) ? base : [];
const b = Array.isArray(extra) ? extra : []; const b = Array.isArray(extra) ? extra : [];
return uniq([...a, ...b].map(canonicalId).filter(Boolean)); return uniq([...a, ...b].map(canonicalId).filter(Boolean));
} }
// Keep mapping stable even if page sees a subset // Keep mapping stable even if page sees a subset
const DEFAULT_UNIVERSE = buildUniverse(Object.keys(OVERRIDES), [ const DEFAULT_UNIVERSE = buildUniverse(Object.keys(OVERRIDES), [
"bcl", "bcl",
"bsw", "bsw",
"coop", "coop",
"craftcellars", "craftcellars",
"gullliquor", "gullliquor",
"gull", "gull",
"kegncork", "kegncork",
"kwm", "kwm",
"kensingtonwinemarket", "kensingtonwinemarket",
"legacy", "legacy",
"legacyliquor", "legacyliquor",
"maltsandgrains", "maltsandgrains",
"sierrasprings", "sierrasprings",
"strath", "strath",
"tudor", "tudor",
"vessel", "vessel",
"vintage", "vintage",
"vintagespirits", "vintagespirits",
"willowpark", "willowpark",
"arc" "arc",
]); ]);
function isWhiteHex(c) { function isWhiteHex(c) {
return String(c || "").trim().toUpperCase() === "#FFFFFF"; return (
String(c || "")
.trim()
.toUpperCase() === "#FFFFFF"
);
} }
export function buildStoreColorMap(extraUniverse = []) { export function buildStoreColorMap(extraUniverse = []) {
const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort(); const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort();
const used = new Set(); const used = new Set();
const map = new Map(); const map = new Map();
// Pin overrides first // Pin overrides first
for (const id of universe) { for (const id of universe) {
const c = OVERRIDES[id]; const c = OVERRIDES[id];
if (c) { if (c) {
map.set(id, c); map.set(id, c);
used.add(String(c).toUpperCase()); used.add(String(c).toUpperCase());
} }
} }
// Filter palette to avoid collisions and keep white/black reserved // Filter palette to avoid collisions and keep white/black reserved
const palette = PALETTE const palette = PALETTE.map((c) => String(c).toUpperCase()).filter(
.map((c) => String(c).toUpperCase()) (c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111",
.filter((c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111"); );
let pi = 0; let pi = 0;
for (const id of universe) { for (const id of universe) {
if (map.has(id)) continue; if (map.has(id)) continue;
if (pi >= palette.length) pi = 0; if (pi >= palette.length) pi = 0;
const c = palette[pi++]; const c = palette[pi++];
map.set(id, c); map.set(id, c);
used.add(c); used.add(c);
} }
return map; return map;
} }
export function storeColor(storeKeyOrLabel, colorMap) { export function storeColor(storeKeyOrLabel, colorMap) {
const id = canonicalId(storeKeyOrLabel); const id = canonicalId(storeKeyOrLabel);
if (!id) return "#7F7F7F"; if (!id) return "#7F7F7F";
const forced = OVERRIDES[id]; const forced = OVERRIDES[id];
if (forced) return forced; if (forced) return forced;
if (colorMap && typeof colorMap.get === "function") { if (colorMap && typeof colorMap.get === "function") {
const c = colorMap.get(id); const c = colorMap.get(id);
if (c) return c; if (c) return c;
} }
return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length]; return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length];
} }
export function datasetStrokeWidth(color) { export function datasetStrokeWidth(color) {
return isWhiteHex(color) ? 2.5 : 1.5; return isWhiteHex(color) ? 2.5 : 1.5;
} }
export function datasetPointRadius(color) { export function datasetPointRadius(color) {
return isWhiteHex(color) ? 2.8 : 2.2; return isWhiteHex(color) ? 2.8 : 2.2;
} }
function clamp(v, lo, hi) { function clamp(v, lo, hi) {
return Math.max(lo, Math.min(hi, v)); return Math.max(lo, Math.min(hi, v));
} }
function hexToRgb(hex) { function hexToRgb(hex) {
const m = String(hex).replace("#", ""); const m = String(hex).replace("#", "");
if (m.length !== 6) return null; if (m.length !== 6) return null;
const n = parseInt(m, 16); const n = parseInt(m, 16);
return { return {
r: (n >> 16) & 255, r: (n >> 16) & 255,
g: (n >> 8) & 255, g: (n >> 8) & 255,
b: n & 255, b: n & 255,
}; };
} }
function rgbToHex({ r, g, b }) { function rgbToHex({ r, g, b }) {
const h = (x) => const h = (x) => clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0");
clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0"); return `#${h(r)}${h(g)}${h(b)}`;
return `#${h(r)}${h(g)}${h(b)}`;
} }
// Lighten by mixing with white (01) // Lighten by mixing with white (01)
export function lighten(hex, amount = 0.25) { export function lighten(hex, amount = 0.25) {
const rgb = hexToRgb(hex); const rgb = hexToRgb(hex);
if (!rgb) return hex; if (!rgb) return hex;
return rgbToHex({ return rgbToHex({
r: rgb.r + (255 - rgb.r) * amount, r: rgb.r + (255 - rgb.r) * amount,
g: rgb.g + (255 - rgb.g) * amount, g: rgb.g + (255 - rgb.g) * amount,
b: rgb.b + (255 - rgb.b) * amount, b: rgb.b + (255 - rgb.b) * amount,
}); });
} }

File diff suppressed because it is too large Load diff

View file

@ -9,151 +9,151 @@ const root = path.resolve(__dirname); // viz/
const projectRoot = path.resolve(__dirname, ".."); // repo root const projectRoot = path.resolve(__dirname, ".."); // repo root
const MIME = { const MIME = {
".html": "text/html; charset=utf-8", ".html": "text/html; charset=utf-8",
".js": "application/javascript; charset=utf-8", ".js": "application/javascript; charset=utf-8",
".css": "text/css; charset=utf-8", ".css": "text/css; charset=utf-8",
".json": "application/json; charset=utf-8", ".json": "application/json; charset=utf-8",
".png": "image/png", ".png": "image/png",
".jpg": "image/jpeg", ".jpg": "image/jpeg",
".jpeg": "image/jpeg", ".jpeg": "image/jpeg",
".svg": "image/svg+xml", ".svg": "image/svg+xml",
}; };
function safePath(urlPath) { function safePath(urlPath) {
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/"); const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
const joined = path.join(root, p); const joined = path.join(root, p);
const norm = path.normalize(joined); const norm = path.normalize(joined);
if (!norm.startsWith(root)) return null; if (!norm.startsWith(root)) return null;
return norm; return norm;
} }
// Project-level file (shared by viz + report tooling) // Project-level file (shared by viz + report tooling)
const LINKS_FILE = path.join(projectRoot, "data", "sku_links.json"); const LINKS_FILE = path.join(projectRoot, "data", "sku_links.json");
function readMeta() { function readMeta() {
try { try {
const raw = fs.readFileSync(LINKS_FILE, "utf8"); const raw = fs.readFileSync(LINKS_FILE, "utf8");
const obj = JSON.parse(raw); const obj = JSON.parse(raw);
const links = obj && Array.isArray(obj.links) ? obj.links : []; const links = obj && Array.isArray(obj.links) ? obj.links : [];
const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : []; const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : [];
return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores }; return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores };
} catch {} } catch {}
return { generatedAt: new Date().toISOString(), links: [], ignores: [] }; return { generatedAt: new Date().toISOString(), links: [], ignores: [] };
} }
function writeMeta(obj) { function writeMeta(obj) {
obj.generatedAt = new Date().toISOString(); obj.generatedAt = new Date().toISOString();
fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true }); fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true });
fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8"); fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8");
} }
function send(res, code, body, headers) { function send(res, code, body, headers) {
res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) }); res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) });
res.end(body); res.end(body);
} }
function sendJson(res, code, obj) { function sendJson(res, code, obj) {
res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" }); res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" });
res.end(JSON.stringify(obj)); res.end(JSON.stringify(obj));
} }
const server = http.createServer((req, res) => { const server = http.createServer((req, res) => {
const u = req.url || "/"; const u = req.url || "/";
const url = new URL(u, "http://127.0.0.1"); const url = new URL(u, "http://127.0.0.1");
// Local API: read/write sku links + ignore pairs on disk (only exists when using this local server) // Local API: read/write sku links + ignore pairs on disk (only exists when using this local server)
if (url.pathname === "/__stviz/sku-links") { if (url.pathname === "/__stviz/sku-links") {
if (req.method === "GET") { if (req.method === "GET") {
const obj = readMeta(); const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores }); return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores });
} }
if (req.method === "POST") { if (req.method === "POST") {
let body = ""; let body = "";
req.on("data", (c) => (body += c)); req.on("data", (c) => (body += c));
req.on("end", () => { req.on("end", () => {
try { try {
const inp = JSON.parse(body || "{}"); const inp = JSON.parse(body || "{}");
const fromSku = String(inp.fromSku || "").trim(); const fromSku = String(inp.fromSku || "").trim();
const toSku = String(inp.toSku || "").trim(); const toSku = String(inp.toSku || "").trim();
if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" }); if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" });
const obj = readMeta(); const obj = readMeta();
obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() }); obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() });
writeMeta(obj); writeMeta(obj);
return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" }); return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" });
} catch (e) { } catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) }); return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
} }
}); });
return; return;
} }
return send(res, 405, "Method Not Allowed"); return send(res, 405, "Method Not Allowed");
} }
if (url.pathname === "/__stviz/sku-ignores") { if (url.pathname === "/__stviz/sku-ignores") {
if (req.method === "GET") { if (req.method === "GET") {
const obj = readMeta(); const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores }); return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores });
} }
if (req.method === "POST") { if (req.method === "POST") {
let body = ""; let body = "";
req.on("data", (c) => (body += c)); req.on("data", (c) => (body += c));
req.on("end", () => { req.on("end", () => {
try { try {
const inp = JSON.parse(body || "{}"); const inp = JSON.parse(body || "{}");
const skuA = String(inp.skuA || "").trim(); const skuA = String(inp.skuA || "").trim();
const skuB = String(inp.skuB || "").trim(); const skuB = String(inp.skuB || "").trim();
if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" }); if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" });
if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" }); if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" });
const obj = readMeta(); const obj = readMeta();
obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() }); obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() });
writeMeta(obj); writeMeta(obj);
return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" }); return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" });
} catch (e) { } catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) }); return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
} }
}); });
return; return;
} }
return send(res, 405, "Method Not Allowed"); return send(res, 405, "Method Not Allowed");
} }
// Static // Static
let file = safePath(u === "/" ? "/index.html" : u); let file = safePath(u === "/" ? "/index.html" : u);
if (!file) { if (!file) {
res.writeHead(400); res.writeHead(400);
res.end("Bad path"); res.end("Bad path");
return; return;
} }
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) { if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
file = path.join(file, "index.html"); file = path.join(file, "index.html");
} }
fs.readFile(file, (err, buf) => { fs.readFile(file, (err, buf) => {
if (err) { if (err) {
res.writeHead(404); res.writeHead(404);
res.end("Not found"); res.end("Not found");
return; return;
} }
const ext = path.extname(file); const ext = path.extname(file);
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" }); res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
res.end(buf); res.end(buf);
}); });
}); });
const port = Number(process.env.PORT || 8080); const port = Number(process.env.PORT || 8080);
server.listen(port, "127.0.0.1", () => { server.listen(port, "127.0.0.1", () => {
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`); process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
process.stdout.write(`SKU links file: ${LINKS_FILE}\n`); process.stdout.write(`SKU links file: ${LINKS_FILE}\n`);
}); });