UX Improvements

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-10 16:45:22 -08:00
parent e9f8f805c5
commit 7a33d51c90
73 changed files with 13094 additions and 13094 deletions

View file

@ -4,7 +4,7 @@
const { main } = require("../src/main");
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

View file

@ -7,327 +7,327 @@ const { setTimeout: setTimeoutCb, clearTimeout } = require("timers");
/* ---------------- Errors ---------------- */
class RetryableError extends Error {
constructor(msg) {
super(msg);
this.name = "RetryableError";
}
constructor(msg) {
super(msg);
this.name = "RetryableError";
}
}
function isRetryable(e) {
if (!e) return false;
if (e.name === "AbortError") return true;
if (e instanceof RetryableError) return true;
const msg = String(e.message || e);
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
if (!e) return false;
if (e.name === "AbortError") return true;
if (e instanceof RetryableError) return true;
const msg = String(e.message || e);
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
}
/* ---------------- Backoff ---------------- */
function backoffMs(attempt) {
const base = Math.min(12000, 500 * Math.pow(2, attempt));
const jitter = Math.floor(Math.random() * 400);
return base + jitter;
const base = Math.min(12000, 500 * Math.pow(2, attempt));
const jitter = Math.floor(Math.random() * 400);
return base + jitter;
}
function retryAfterMs(res) {
const ra = res?.headers?.get ? res.headers.get("retry-after") : null;
if (!ra) return 0;
const ra = res?.headers?.get ? res.headers.get("retry-after") : null;
if (!ra) return 0;
const secs = Number(String(ra).trim());
if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
const secs = Number(String(ra).trim());
if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
const dt = Date.parse(String(ra));
if (Number.isFinite(dt)) return Math.max(0, dt - Date.now());
const dt = Date.parse(String(ra));
if (Number.isFinite(dt)) return Math.max(0, dt - Date.now());
return 0;
return 0;
}
/* ---------------- Utils ---------------- */
async function safeText(res) {
try {
return await res.text();
} catch {
return "";
}
try {
return await res.text();
} catch {
return "";
}
}
function hostFromUrl(u) {
try {
return new URL(u).host || "";
} catch {
return "";
}
try {
return new URL(u).host || "";
} catch {
return "";
}
}
/* ---------------- Cookies (simple jar) ---------------- */
// host -> Map(cookieName -> "name=value")
function createCookieJar() {
const jar = new Map();
const jar = new Map();
function parseSetCookieLine(line) {
const s = String(line || "").trim();
if (!s) return null;
const first = s.split(";")[0] || "";
const eq = first.indexOf("=");
if (eq <= 0) return null;
const name = first.slice(0, eq).trim();
const value = first.slice(eq + 1).trim();
if (!name) return null;
return { name, pair: `${name}=${value}` };
}
function parseSetCookieLine(line) {
const s = String(line || "").trim();
if (!s) return null;
const first = s.split(";")[0] || "";
const eq = first.indexOf("=");
if (eq <= 0) return null;
const name = first.slice(0, eq).trim();
const value = first.slice(eq + 1).trim();
if (!name) return null;
return { name, pair: `${name}=${value}` };
}
function getSetCookieArray(headers) {
if (headers && typeof headers.getSetCookie === "function") {
try {
const arr = headers.getSetCookie();
return Array.isArray(arr) ? arr : [];
} catch {}
}
function getSetCookieArray(headers) {
if (headers && typeof headers.getSetCookie === "function") {
try {
const arr = headers.getSetCookie();
return Array.isArray(arr) ? arr : [];
} catch {}
}
const one = headers?.get ? headers.get("set-cookie") : null;
if (!one) return [];
const one = headers?.get ? headers.get("set-cookie") : null;
if (!one) return [];
return String(one)
.split(/,(?=[^;,]*=)/g)
.map((x) => x.trim())
.filter(Boolean);
}
return String(one)
.split(/,(?=[^;,]*=)/g)
.map((x) => x.trim())
.filter(Boolean);
}
function storeFromResponse(url, res) {
const host = hostFromUrl(res?.url || url);
if (!host) return;
function storeFromResponse(url, res) {
const host = hostFromUrl(res?.url || url);
if (!host) return;
const lines = getSetCookieArray(res?.headers);
if (!lines.length) return;
const lines = getSetCookieArray(res?.headers);
if (!lines.length) return;
let m = jar.get(host);
if (!m) {
m = new Map();
jar.set(host, m);
}
let m = jar.get(host);
if (!m) {
m = new Map();
jar.set(host, m);
}
for (const line of lines) {
const c = parseSetCookieLine(line);
if (c) m.set(c.name, c.pair);
}
}
for (const line of lines) {
const c = parseSetCookieLine(line);
if (c) m.set(c.name, c.pair);
}
}
function cookieHeaderFor(url) {
const host = hostFromUrl(url);
if (!host) return "";
const m = jar.get(host);
if (!m || m.size === 0) return "";
return [...m.values()].join("; ");
}
function cookieHeaderFor(url) {
const host = hostFromUrl(url);
if (!host) return "";
const m = jar.get(host);
if (!m || m.size === 0) return "";
return [...m.values()].join("; ");
}
return { storeFromResponse, cookieHeaderFor };
return { storeFromResponse, cookieHeaderFor };
}
/* ---------------- HTTP client ---------------- */
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
let inflight = 0;
let reqSeq = 0;
let inflight = 0;
let reqSeq = 0;
const cookieJar = createCookieJar();
const cookieJar = createCookieJar();
// host -> epoch ms when next request is allowed
const hostNextOkAt = new Map();
// host -> epoch ms when next request is allowed
const hostNextOkAt = new Map();
// Conservative pacing defaults (slow > blocked)
const minHostIntervalMs = 2500;
// Conservative pacing defaults (slow > blocked)
const minHostIntervalMs = 2500;
// Per-host inflight clamp (prevents bursts when global concurrency is high)
const hostInflight = new Map();
const maxHostInflight = 1;
// Per-host inflight clamp (prevents bursts when global concurrency is high)
const hostInflight = new Map();
const maxHostInflight = 1;
function inflightStr() {
return `inflight=${inflight}`;
}
function inflightStr() {
return `inflight=${inflight}`;
}
async function acquireHost(url) {
const host = hostFromUrl(url);
if (!host) return () => {};
async function acquireHost(url) {
const host = hostFromUrl(url);
if (!host) return () => {};
while (true) {
const cur = hostInflight.get(host) || 0;
if (cur < maxHostInflight) {
hostInflight.set(host, cur + 1);
return () => {
const n = (hostInflight.get(host) || 1) - 1;
if (n <= 0) hostInflight.delete(host);
else hostInflight.set(host, n);
};
}
await sleep(50);
}
}
while (true) {
const cur = hostInflight.get(host) || 0;
if (cur < maxHostInflight) {
hostInflight.set(host, cur + 1);
return () => {
const n = (hostInflight.get(host) || 1) - 1;
if (n <= 0) hostInflight.delete(host);
else hostInflight.set(host, n);
};
}
await sleep(50);
}
}
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
async function throttleHost(url) {
const host = hostFromUrl(url);
if (!host) return;
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
async function throttleHost(url) {
const host = hostFromUrl(url);
if (!host) return;
while (true) {
const now = Date.now();
const next = hostNextOkAt.get(host) || 0;
const wait = next - now;
while (true) {
const now = Date.now();
const next = hostNextOkAt.get(host) || 0;
const wait = next - now;
if (wait > 0) {
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
await sleep(wait);
continue;
}
if (wait > 0) {
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
await sleep(wait);
continue;
}
// Reserve immediately to prevent concurrent pass-through
hostNextOkAt.set(host, now + minHostIntervalMs);
return;
}
}
// Reserve immediately to prevent concurrent pass-through
hostNextOkAt.set(host, now + minHostIntervalMs);
return;
}
}
function noteHost(url, extraDelayMs = 0) {
const host = hostFromUrl(url);
if (!host) return;
function noteHost(url, extraDelayMs = 0) {
const host = hostFromUrl(url);
if (!host) return;
const now = Date.now();
const current = hostNextOkAt.get(host) || 0;
const now = Date.now();
const current = hostNextOkAt.get(host) || 0;
// Extend (never shorten) any existing cooldown
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
hostNextOkAt.set(host, Math.max(current, target));
// Extend (never shorten) any existing cooldown
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
hostNextOkAt.set(host, Math.max(current, target));
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
}
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
}
async function fetchWithRetry(
url,
tag,
ua,
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {}
) {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
const reqId = ++reqSeq;
const start = Date.now();
async function fetchWithRetry(
url,
tag,
ua,
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {},
) {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
const reqId = ++reqSeq;
const start = Date.now();
inflight++;
logger?.dbg?.(`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`);
inflight++;
logger?.dbg?.(
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`,
);
const releaseHost = await acquireHost(url);
const releaseHost = await acquireHost(url);
try {
await throttleHost(url);
try {
await throttleHost(url);
const ctrl = new AbortController();
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
const ctrl = new AbortController();
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
const cookieHdr =
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
const cookieHdr =
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
const res = await fetch(url, {
method,
redirect: "follow",
headers: {
"user-agent": ua || defaultUa,
"accept-language": "en-US,en;q=0.9",
...(mode === "text"
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
: { accept: "application/json, text/plain, */*" }),
...(cookieHdr ? { cookie: cookieHdr } : {}),
...headers,
},
body,
signal: ctrl.signal,
}).finally(() => clearTimeout(t));
const res = await fetch(url, {
method,
redirect: "follow",
headers: {
"user-agent": ua || defaultUa,
"accept-language": "en-US,en;q=0.9",
...(mode === "text"
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
: { accept: "application/json, text/plain, */*" }),
...(cookieHdr ? { cookie: cookieHdr } : {}),
...headers,
},
body,
signal: ctrl.signal,
}).finally(() => clearTimeout(t));
const status = res.status;
const finalUrl = res.url || url;
const elapsed = Date.now() - start;
const status = res.status;
const finalUrl = res.url || url;
const elapsed = Date.now() - start;
// Always pace the host a bit after any response
noteHost(finalUrl);
if (cookies) cookieJar.storeFromResponse(url, res);
// Always pace the host a bit after any response
noteHost(finalUrl);
if (cookies) cookieJar.storeFromResponse(url, res);
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
if (status === 429) {
let raMs = retryAfterMs(res);
if (status === 429) {
let raMs = retryAfterMs(res);
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
noteHost(finalUrl, raMs);
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
throw new RetryableError("HTTP 429");
}
noteHost(finalUrl, raMs);
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
throw new RetryableError("HTTP 429");
}
if (status === 408 || (status >= 500 && status <= 599)) {
throw new RetryableError(`HTTP ${status}`);
}
if (status === 408 || (status >= 500 && status <= 599)) {
throw new RetryableError(`HTTP ${status}`);
}
if (status >= 400) {
const bodyTxt = await safeText(res);
throw new Error(
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`
);
}
if (status >= 400) {
const bodyTxt = await safeText(res);
throw new Error(`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`);
}
if (mode === "json") {
const txt = await res.text();
let json;
try {
json = JSON.parse(txt);
} catch (e) {
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
}
return { json, ms: elapsed, bytes: txt.length, status, finalUrl };
}
if (mode === "json") {
const txt = await res.text();
let json;
try {
json = JSON.parse(txt);
} catch (e) {
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
}
return { json, ms: elapsed, bytes: txt.length, status, finalUrl };
}
const text = await res.text();
if (!text || text.length < 200) {
throw new RetryableError(`Short HTML bytes=${text.length}`);
}
const text = await res.text();
if (!text || text.length < 200) {
throw new RetryableError(`Short HTML bytes=${text.length}`);
}
return { text, ms: elapsed, bytes: text.length, status, finalUrl };
} catch (e) {
const retryable = isRetryable(e);
const host = hostFromUrl(url);
const nextOk = hostNextOkAt.get(host) || 0;
return { text, ms: elapsed, bytes: text.length, status, finalUrl };
} catch (e) {
const retryable = isRetryable(e);
const host = hostFromUrl(url);
const nextOk = hostNextOkAt.get(host) || 0;
logger?.dbg?.(
`REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max(
0,
nextOk - Date.now()
)}ms`
);
logger?.dbg?.(
`REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max(
0,
nextOk - Date.now(),
)}ms`,
);
if (!retryable || attempt === maxRetries) throw e;
if (!retryable || attempt === maxRetries) throw e;
let delay = backoffMs(attempt);
if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now());
let delay = backoffMs(attempt);
if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now());
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
await sleep(delay);
} finally {
releaseHost();
inflight--;
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
}
}
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
await sleep(delay);
} finally {
releaseHost();
inflight--;
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
}
}
throw new Error("unreachable");
}
throw new Error("unreachable");
}
function fetchTextWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
}
function fetchTextWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
}
function fetchJsonWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
}
function fetchJsonWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
}
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
}
module.exports = { createHttpClient, RetryableError };

View file

@ -4,55 +4,55 @@ const { C, color } = require("../utils/ansi");
const { ts } = require("../utils/time");
function createLogger({ debug = false, colorize: wantColor = true } = {}) {
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
const enabled = Boolean(wantColor && isTTY);
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
const enabled = Boolean(wantColor && isTTY);
function ok(msg) {
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
}
function ok(msg) {
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
}
function warn(msg) {
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
}
function warn(msg) {
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
}
function err(msg) {
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
}
function err(msg) {
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
}
function info(msg) {
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
}
function info(msg) {
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
}
function dbg(msg) {
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
}
function dbg(msg) {
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
}
function dim(s) {
return color(s, C.dim, enabled);
}
function dim(s) {
return color(s, C.dim, enabled);
}
function bold(s) {
return color(s, C.bold, enabled);
}
function bold(s) {
return color(s, C.bold, enabled);
}
function paint(s, code) {
return color(s, code, enabled);
}
function paint(s, code) {
return color(s, code, enabled);
}
return {
debug,
isTTY,
colorize: enabled,
C,
ok,
warn,
err,
info,
dbg,
dim,
bold,
color: paint,
};
return {
debug,
isTTY,
colorize: enabled,
C,
ok,
warn,
err,
info,
dbg,
dim,
bold,
color: paint,
};
}
module.exports = { createLogger };

View file

@ -16,189 +16,169 @@ const { runAllStores } = require("./tracker/run_all");
const { renderFinalReport } = require("./tracker/report");
const { ensureDir } = require("./tracker/db");
const DEFAULT_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
const DEFAULT_UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
function resolveDir(p, fallback) {
const v = String(p || "").trim();
if (!v) return fallback;
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
const v = String(p || "").trim();
if (!v) return fallback;
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
}
function getFlagValue(argv, flag) {
// Supports:
// --stores=a,b
// --stores a,b
const idx = argv.indexOf(flag);
if (idx >= 0) return argv[idx + 1] || "";
const pref = `${flag}=`;
for (const a of argv) {
if (a.startsWith(pref)) return a.slice(pref.length);
}
return "";
// Supports:
// --stores=a,b
// --stores a,b
const idx = argv.indexOf(flag);
if (idx >= 0) return argv[idx + 1] || "";
const pref = `${flag}=`;
for (const a of argv) {
if (a.startsWith(pref)) return a.slice(pref.length);
}
return "";
}
function normToken(s) {
return String(s || "")
.toLowerCase()
.trim()
.replace(/[^a-z0-9]+/g, "");
return String(s || "")
.toLowerCase()
.trim()
.replace(/[^a-z0-9]+/g, "");
}
function parseStoresFilter(raw) {
const v = String(raw || "").trim();
if (!v) return [];
return v
.split(",")
.map((x) => x.trim())
.filter(Boolean);
const v = String(raw || "").trim();
if (!v) return [];
return v
.split(",")
.map((x) => x.trim())
.filter(Boolean);
}
function filterStoresOrThrow(stores, wantedListRaw) {
const wanted = parseStoresFilter(wantedListRaw);
if (!wanted.length) return stores;
const wanted = parseStoresFilter(wantedListRaw);
if (!wanted.length) return stores;
const wantedNorm = wanted.map(normToken).filter(Boolean);
const wantedNorm = wanted.map(normToken).filter(Boolean);
const matched = [];
const missing = [];
const matched = [];
const missing = [];
for (let i = 0; i < wanted.length; i++) {
const w = wanted[i];
const wn = wantedNorm[i];
if (!wn) continue;
for (let i = 0; i < wanted.length; i++) {
const w = wanted[i];
const wn = wantedNorm[i];
if (!wn) continue;
// match against key/name/host (normalized)
const hit = stores.find((s) => {
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
return candidates.includes(wn);
});
// match against key/name/host (normalized)
const hit = stores.find((s) => {
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
return candidates.includes(wn);
});
if (hit) matched.push(hit);
else missing.push(w);
}
if (hit) matched.push(hit);
else missing.push(w);
}
if (missing.length) {
const avail = stores
.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`)
.join(", ");
throw new Error(
`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`
);
}
if (missing.length) {
const avail = stores.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`).join(", ");
throw new Error(`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`);
}
// de-dupe by key (in case name+key both matched)
const uniq = [];
const seen = new Set();
for (const s of matched) {
if (seen.has(s.key)) continue;
seen.add(s.key);
uniq.push(s);
}
return uniq;
// de-dupe by key (in case name+key both matched)
const uniq = [];
const seen = new Set();
for (const s of matched) {
if (seen.has(s.key)) continue;
seen.add(s.key);
uniq.push(s);
}
return uniq;
}
async function main() {
if (typeof fetch !== "function") {
throw new Error(
"Global fetch() not found. Please use Node.js 18+ (or newer). "
);
}
if (typeof fetch !== "function") {
throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). ");
}
const argv = process.argv.slice(2);
const args = parseArgs(argv);
const argv = process.argv.slice(2);
const args = parseArgs(argv);
const logger = createLogger({ debug: args.debug, colorize: true });
const logger = createLogger({ debug: args.debug, colorize: true });
const config = {
debug: args.debug,
maxPages: args.maxPages,
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
staggerMs:
args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
discoveryGuess:
args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
discoveryStep:
args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
defaultUa: DEFAULT_UA,
defaultParseProducts: parseProductsSierra,
dbDir: resolveDir(
args.dataDir ?? process.env.DATA_DIR,
path.join(process.cwd(), "data", "db")
),
reportDir: resolveDir(
args.reportDir ?? process.env.REPORT_DIR,
path.join(process.cwd(), "reports")
),
};
const config = {
debug: args.debug,
maxPages: args.maxPages,
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
defaultUa: DEFAULT_UA,
defaultParseProducts: parseProductsSierra,
dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")),
reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")),
};
ensureDir(config.dbDir);
ensureDir(config.reportDir);
ensureDir(config.dbDir);
ensureDir(config.reportDir);
const http = createHttpClient({
maxRetries: config.maxRetries,
timeoutMs: config.timeoutMs,
defaultUa: config.defaultUa,
logger,
});
const stores = createStores({ defaultUa: config.defaultUa });
const http = createHttpClient({
maxRetries: config.maxRetries,
timeoutMs: config.timeoutMs,
defaultUa: config.defaultUa,
logger,
});
const stores = createStores({ defaultUa: config.defaultUa });
const storesFilterRaw =
getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
const storesFilterRaw = getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
if (storesFilterRaw) {
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
}
const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
if (storesFilterRaw) {
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
}
const report = await runAllStores(storesToRun, { config, logger, http });
const report = await runAllStores(storesToRun, { config, logger, http });
const meaningful =
(report?.totals?.newCount || 0) +
(report?.totals?.updatedCount || 0) +
(report?.totals?.removedCount || 0) +
(report?.totals?.restoredCount || 0) +
(report?.totals?.metaChangedCount || 0) >
0;
const meaningful =
(report?.totals?.newCount || 0) +
(report?.totals?.updatedCount || 0) +
(report?.totals?.removedCount || 0) +
(report?.totals?.restoredCount || 0) +
(report?.totals?.metaChangedCount || 0) >
0;
const reportTextColor = renderFinalReport(report, {
dbDir: config.dbDir,
colorize: logger.colorize,
});
process.stdout.write(reportTextColor);
const reportTextColor = renderFinalReport(report, {
dbDir: config.dbDir,
colorize: logger.colorize,
});
process.stdout.write(reportTextColor);
if (!meaningful) {
logger.ok("No meaningful changes; skipping report write.");
process.exitCode = 3; // special "no-op" code
return;
}
if (!meaningful) {
logger.ok("No meaningful changes; skipping report write.");
process.exitCode = 3; // special "no-op" code
return;
}
const reportTextPlain = renderFinalReport(report, {
dbDir: config.dbDir,
colorize: false,
});
const file = path.join(
config.reportDir,
`${isoTimestampFileSafe(new Date())}.txt`
);
try {
fs.writeFileSync(file, reportTextPlain, "utf8");
logger.ok(`Report saved: ${logger.dim(file)}`);
} catch (e) {
logger.warn(`Report save failed: ${e?.message || e}`);
}
const reportTextPlain = renderFinalReport(report, {
dbDir: config.dbDir,
colorize: false,
});
const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`);
try {
fs.writeFileSync(file, reportTextPlain, "utf8");
logger.ok(`Report saved: ${logger.dim(file)}`);
} catch (e) {
logger.warn(`Report save failed: ${e?.message || e}`);
}
}
module.exports = { main };
if (require.main === module) {
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
// no logger here; keep simple
console.error(msg);
process.exitCode = 1;
});
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
// no logger here; keep simple
console.error(msg);
process.exitCode = 1;
});
}

View file

@ -11,368 +11,374 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes || 0).padStart(8, " ");
return humanBytes(bytes || 0).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function toNum(v) {
const s = String(v ?? "").trim();
if (!s) return NaN;
const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN;
const s = String(v ?? "").trim();
if (!s) return NaN;
const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN;
}
function money(v) {
const n = toNum(v);
if (!Number.isFinite(n) || n <= 0) return "";
return `$${n.toFixed(2)}`;
const n = toNum(v);
if (!Number.isFinite(n) || n <= 0) return "";
return `$${n.toFixed(2)}`;
}
function pickBestPrice(p) {
const reg = toNum(p?.regular_price);
const sale = toNum(p?.sale_price);
const net = toNum(p?.net_price);
const reg = toNum(p?.regular_price);
const sale = toNum(p?.sale_price);
const net = toNum(p?.net_price);
// Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular.
if (Number.isFinite(sale) && sale > 0) {
if (p?.is_sale === true) return money(sale);
if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale);
// Some feeds put the current price in sale_price even without flags:
if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale);
}
// Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular.
if (Number.isFinite(sale) && sale > 0) {
if (p?.is_sale === true) return money(sale);
if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale);
// Some feeds put the current price in sale_price even without flags:
if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale);
}
if (Number.isFinite(net) && net > 0) return money(net);
if (Number.isFinite(reg) && reg > 0) return money(reg);
if (Number.isFinite(net) && net > 0) return money(net);
if (Number.isFinite(reg) && reg > 0) return money(reg);
return "";
return "";
}
function normAbsUrl(raw, base) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s.replace(/^\/+/, ""), base).toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s.replace(/^\/+/, ""), base).toString();
} catch {
return s;
}
}
function isInStock(p) {
// Keep this strict: user asked "only show in stock items".
// available_for_sale is the strongest signal; on_hand is a good secondary signal.
if (p && p.available_for_sale === false) return false;
// Keep this strict: user asked "only show in stock items".
// available_for_sale is the strongest signal; on_hand is a good secondary signal.
if (p && p.available_for_sale === false) return false;
const onHand = Number(p?.on_hand);
if (Number.isFinite(onHand)) return onHand > 0;
const onHand = Number(p?.on_hand);
if (Number.isFinite(onHand)) return onHand > 0;
// If on_hand is missing, fall back to available_for_sale truthiness.
return Boolean(p?.available_for_sale);
// If on_hand is missing, fall back to available_for_sale truthiness.
return Boolean(p?.available_for_sale);
}
function arcNormalizeImg(raw) {
const s = String(raw || "").trim();
if (!s) return "";
// already public
if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s;
// site-relative -> public CDN
const noProto = s.replace(/^https?:\/\/[^/]+/i, "");
const rel = noProto.replace(/^\/+/, "");
// common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg"
if (/^(custom\/|bc_lrs\/)/i.test(rel)) {
return `https://s.barnetnetwork.com/img/m/${rel}`;
}
// fallback: if it's any path, still try the CDN
if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`;
return "";
}
function arcItemToTracked(p, ctx) {
if (!p) return null;
if (!isInStock(p)) return null;
const url = normAbsUrl(p.url, `https://${ctx.store.host}/`);
if (!url) return null;
const name = cleanText(p.description || p.name || "");
if (!name) return null;
const price = pickBestPrice(p);
const rawCspcId = String(p?.cspcid ?? "").trim();
const hasCspcId = /^\d{1,11}$/.test(rawCspcId);
const s = String(raw || "").trim();
if (!s) return "";
const id = Number(p?.id);
const rawSku =
hasCspcId ? `id:${rawCspcId}` :
Number.isFinite(id) ? `id:${id}` :
"";
// already public
if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s;
const sku =
normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
// site-relative -> public CDN
const noProto = s.replace(/^https?:\/\/[^/]+/i, "");
const rel = noProto.replace(/^\/+/, "");
const img = arcNormalizeImg(p.image || p.image_url || p.img || "");
return { name, price, url, sku, img };
}
// common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg"
if (/^(custom\/|bc_lrs\/)/i.test(rel)) {
return `https://s.barnetnetwork.com/img/m/${rel}`;
}
// fallback: if it's any path, still try the CDN
if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`;
return "";
}
function arcItemToTracked(p, ctx) {
if (!p) return null;
if (!isInStock(p)) return null;
const url = normAbsUrl(p.url, `https://${ctx.store.host}/`);
if (!url) return null;
const name = cleanText(p.description || p.name || "");
if (!name) return null;
const price = pickBestPrice(p);
const rawCspcId = String(p?.cspcid ?? "").trim();
const hasCspcId = /^\d{1,11}$/.test(rawCspcId);
const id = Number(p?.id);
const rawSku = hasCspcId ? `id:${rawCspcId}` : Number.isFinite(id) ? `id:${id}` : "";
const sku = normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
const img = arcNormalizeImg(p.image || p.image_url || p.img || "");
return { name, price, url, sku, img };
}
function parseCategoryParamsFromStartUrl(startUrl) {
try {
const u = new URL(startUrl);
const category = u.searchParams.get("category") || "";
const sub = u.searchParams.get("sub_category") || "";
return { category, sub };
} catch {
return { category: "", sub: "" };
}
try {
const u = new URL(startUrl);
const category = u.searchParams.get("category") || "";
const sub = u.searchParams.get("sub_category") || "";
return { category, sub };
} catch {
return { category: "", sub: "" };
}
}
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
const prevSize = prevDb?.byUrl?.size || 0;
const discSize = discovered?.size || 0;
const prevSize = prevDb?.byUrl?.size || 0;
const discSize = discovered?.size || 0;
if (prevSize <= 0 || discSize <= 0) return false;
if (prevSize <= 0 || discSize <= 0) return false;
const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false;
const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false;
ctx.logger.warn?.(
`${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
);
ctx.logger.warn?.(
`${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
);
// Preserve prior active items not seen this run.
for (const [u, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue;
if (!discovered.has(u)) discovered.set(u, it);
}
return true;
// Preserve prior active items not seen this run.
for (const [u, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue;
if (!discovered.has(u)) discovered.set(u, it);
}
return true;
}
async function scanCategoryArcApi(ctx, prevDb, report) {
const t0 = Date.now();
// Warm cookies / session (Barnet-based shops sometimes need this)
try {
await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua);
} catch (_) {}
const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl);
const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim();
const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim();
if (!subCategory) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`);
return;
}
const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`);
const discovered = new Map();
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const hardCap = Math.min(5000, Math.max(1, maxPagesCap));
let donePages = 0;
let aborted = false;
// Pagination safety
let pageSize = 0; // inferred from first non-empty page
const seenPageFingerprints = new Set();
let stagnantPages = 0;
for (let page = 1; page <= hardCap; page++) {
const u = new URL(apiBase.toString());
u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc"));
u.searchParams.set("category", category);
u.searchParams.set("sub_category", subCategory);
u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Date.now()));
let r;
try {
r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json, */*",
"X-Requested-With": "XMLHttpRequest",
Referer: ctx.cat.startUrl,
},
});
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`);
aborted = true;
break;
}
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
donePages++;
const rawCount = arr.length;
// Log early (even for empty)
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(
3
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
ctx.http.inflightStr(),
11
)} | ${secStr(r.ms)}`
);
if (!rawCount) break;
// Infer page size from first non-empty page
if (!pageSize) pageSize = rawCount;
// Detect wrap/repeat: fingerprint by ids+urls (stable enough)
const fp = arr
.map((p) => `${p?.id || ""}:${p?.url || ""}`)
.sort()
.join("|");
if (fp && seenPageFingerprints.has(fp)) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`);
break;
}
if (fp) seenPageFingerprints.add(fp);
const before = discovered.size;
let kept = 0;
for (const p of arr) {
const it = arcItemToTracked(p, ctx);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
// Re-log with kept filled in (overwrite-style isnt possible; just emit a second line)
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(
3
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
ctx.http.inflightStr(),
11
)} | ${secStr(r.ms)}`
);
// Stop condition #1: last page (short page)
if (pageSize && rawCount < pageSize) break;
// Stop condition #2: no new uniques for 2 pages (safety)
if (discovered.size === before) stagnantPages++;
else stagnantPages = 0;
if (stagnantPages >= 2) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`);
break;
}
}
if (aborted) {
avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`);
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } =
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsedMs = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(1, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
const t0 = Date.now();
// Warm cookies / session (Barnet-based shops sometimes need this)
try {
await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua);
} catch (_) {}
const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl);
const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim();
const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim();
if (!subCategory) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`);
return;
}
const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`);
const discovered = new Map();
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const hardCap = Math.min(5000, Math.max(1, maxPagesCap));
let donePages = 0;
let aborted = false;
// Pagination safety
let pageSize = 0; // inferred from first non-empty page
const seenPageFingerprints = new Set();
let stagnantPages = 0;
for (let page = 1; page <= hardCap; page++) {
const u = new URL(apiBase.toString());
u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc"));
u.searchParams.set("category", category);
u.searchParams.set("sub_category", subCategory);
u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Date.now()));
let r;
try {
r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json, */*",
"X-Requested-With": "XMLHttpRequest",
Referer: ctx.cat.startUrl,
},
});
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`);
aborted = true;
break;
}
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
donePages++;
const rawCount = arr.length;
// Log early (even for empty)
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
.toString()
.padEnd(3)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
ctx.http.inflightStr(),
11,
)} | ${secStr(r.ms)}`,
);
if (!rawCount) break;
// Infer page size from first non-empty page
if (!pageSize) pageSize = rawCount;
// Detect wrap/repeat: fingerprint by ids+urls (stable enough)
const fp = arr
.map((p) => `${p?.id || ""}:${p?.url || ""}`)
.sort()
.join("|");
if (fp && seenPageFingerprints.has(fp)) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`);
break;
}
if (fp) seenPageFingerprints.add(fp);
const before = discovered.size;
let kept = 0;
for (const p of arr) {
const it = arcItemToTracked(p, ctx);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
// Re-log with kept filled in (overwrite-style isnt possible; just emit a second line)
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
.toString()
.padEnd(
3,
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
ctx.http.inflightStr(),
11,
)} | ${secStr(r.ms)}`,
);
// Stop condition #1: last page (short page)
if (pageSize && rawCount < pageSize) break;
// Stop condition #2: no new uniques for 2 pages (safety)
if (discovered.size === before) stagnantPages++;
else stagnantPages = 0;
if (stagnantPages >= 2) {
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`);
break;
}
}
if (aborted) {
avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`);
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsedMs = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(1, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "arc",
name: "ARC Liquor",
host: "kelownaharveyave.armstrong.coop",
shopId: "644-290",
ua: defaultUa,
scanCategory: scanCategoryArcApi,
categories: [
{
key: "spirits-rum",
label: "Spirits - Rum",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum",
arcCategory: "Spirits",
arcSubCategory: "Rum",
sortBy: "price_desc",
},
{
key: "spirits-scotch",
label: "Spirits - Scotch",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch",
arcCategory: "Spirits",
arcSubCategory: "Scotch",
sortBy: "price_desc",
},
{
key: "spirits-whiskey",
label: "Spirits - Whiskey",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey",
arcCategory: "Spirits",
arcSubCategory: "Whiskey",
sortBy: "price_desc",
},
],
};
return {
key: "arc",
name: "ARC Liquor",
host: "kelownaharveyave.armstrong.coop",
shopId: "644-290",
ua: defaultUa,
scanCategory: scanCategoryArcApi,
categories: [
{
key: "spirits-rum",
label: "Spirits - Rum",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum",
arcCategory: "Spirits",
arcSubCategory: "Rum",
sortBy: "price_desc",
},
{
key: "spirits-scotch",
label: "Spirits - Scotch",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch",
arcCategory: "Spirits",
arcSubCategory: "Scotch",
sortBy: "price_desc",
},
{
key: "spirits-whiskey",
label: "Spirits - Whiskey",
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey",
arcCategory: "Spirits",
arcSubCategory: "Whiskey",
sortBy: "price_desc",
},
],
};
}
module.exports = { createStore };

View file

@ -9,332 +9,360 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function cad(n) {
const x = Number(n);
if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`;
const x = Number(n);
if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`;
}
function asNumber(n) {
if (n == null) return NaN;
if (typeof n === "number") return n;
const t = String(n).trim();
if (!t) return NaN;
const x = Number(t.replace(/[^0-9.]/g, ""));
return x;
if (n == null) return NaN;
if (typeof n === "number") return n;
const t = String(n).trim();
if (!t) return NaN;
const x = Number(t.replace(/[^0-9.]/g, ""));
return x;
}
function bclTotalHits(json) {
const t = json?.hits?.total;
if (typeof t === "number") return t;
if (t && typeof t.value === "number") return t.value; // ES-style
return 0;
const t = json?.hits?.total;
if (typeof t === "number") return t;
if (t && typeof t.value === "number") return t.value; // ES-style
return 0;
}
function bclIsInStock(src) {
const candidates = [
src?.availability_override, // <-- add this
src?.availability,
src?.availabilityText,
src?.availabilityStatus,
src?.availability_status,
src?.stockStatus,
src?.stock_status,
src?.status,
src?.statusText,
]
.map((v) => (v == null ? "" : String(v)))
.filter(Boolean);
const candidates = [
src?.availability_override, // <-- add this
src?.availability,
src?.availabilityText,
src?.availabilityStatus,
src?.availability_status,
src?.stockStatus,
src?.stock_status,
src?.status,
src?.statusText,
]
.map((v) => (v == null ? "" : String(v)))
.filter(Boolean);
for (const s of candidates) {
if (/out of stock/i.test(s)) return false;
if (/\bin stock\b/i.test(s)) return true;
if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07"
}
for (const s of candidates) {
if (/out of stock/i.test(s)) return false;
if (/\bin stock\b/i.test(s)) return true;
if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07"
}
const units = Number(src?.availableUnits);
if (Number.isFinite(units)) return units > 0;
const units = Number(src?.availableUnits);
if (Number.isFinite(units)) return units > 0;
return true;
return true;
}
function bclNormalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bcliquorstores.com/").toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bcliquorstores.com/").toString();
} catch {
return s;
}
}
function bclPickImage(src) {
const cands = [
src?.imageUrl,
src?.imageURL,
src?.image,
src?.thumbnail,
src?.thumbnailUrl,
src?.thumbnailURL,
src?.primaryImage,
src?.primaryImageUrl,
];
const cands = [
src?.imageUrl,
src?.imageURL,
src?.image,
src?.thumbnail,
src?.thumbnailUrl,
src?.thumbnailURL,
src?.primaryImage,
src?.primaryImageUrl,
];
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
}
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
}
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
for (const a of arrs) {
if (!Array.isArray(a) || !a.length) continue;
const v = a[0];
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
if (v && typeof v === "object") {
const s = String(v.src || v.url || "").trim();
if (s) return bclNormalizeAbsUrl(s);
}
}
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
for (const a of arrs) {
if (!Array.isArray(a) || !a.length) continue;
const v = a[0];
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
if (v && typeof v === "object") {
const s = String(v.src || v.url || "").trim();
if (s) return bclNormalizeAbsUrl(s);
}
}
return "";
return "";
}
function bclHitToItem(hit) {
const src = hit?._source || null;
if (!src) return null;
const src = hit?._source || null;
if (!src) return null;
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
if (!skuRaw) return null;
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
if (!skuRaw) return null;
// SKU in URL (requested)
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
// SKU in URL (requested)
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
const name = String(src.name || "").trim();
if (!name) return null;
const name = String(src.name || "").trim();
if (!name) return null;
// Sale support: pick currentPrice when present; otherwise regularPrice.
const current = asNumber(src.currentPrice);
const regular = asNumber(src.regularPrice);
const price = cad(Number.isFinite(current) ? current : regular);
// Sale support: pick currentPrice when present; otherwise regularPrice.
const current = asNumber(src.currentPrice);
const regular = asNumber(src.regularPrice);
const price = cad(Number.isFinite(current) ? current : regular);
// SKU key:
// - Keep CSPC 6-digit when present (rare for BCL, but safe)
// - Otherwise upgrade to an explicit soft key: id:<digits>
//
// ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits>
// only fall back to raw (NOT u:) if its genuinely non-numeric.
let sku = normalizeCspc(skuRaw);
if (!sku) {
const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc.
sku = m ? `id:${m[0]}` : `id:${skuRaw}`;
}
// SKU key:
// - Keep CSPC 6-digit when present (rare for BCL, but safe)
// - Otherwise upgrade to an explicit soft key: id:<digits>
//
// ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits>
// only fall back to raw (NOT u:) if its genuinely non-numeric.
let sku = normalizeCspc(skuRaw);
if (!sku) {
const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc.
sku = m ? `id:${m[0]}` : `id:${skuRaw}`;
}
const inStock = bclIsInStock(src);
if (!inStock) return null;
const inStock = bclIsInStock(src);
if (!inStock) return null;
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
// Also use https.
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
skuRaw
)}.jpg`;
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
// Also use https.
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
skuRaw,
)}.jpg`;
return { name, price, url, sku, img };
return { name, price, url, sku, img };
}
async function bclFetchBrowsePage(ctx, page1, size) {
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
const category = "spirits";
const sort = "featuredProducts:desc";
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
const category = "spirits";
const sort = "featuredProducts:desc";
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
u.searchParams.set("category", category);
u.searchParams.set("type", type);
u.searchParams.set("sort", sort);
u.searchParams.set("size", String(size));
u.searchParams.set("page", String(page1));
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
u.searchParams.set("category", category);
u.searchParams.set("type", type);
u.searchParams.set("sort", sort);
u.searchParams.set("size", String(size));
u.searchParams.set("page", String(page1));
const referer =
`https://www.bcliquorstores.com/product-catalogue?` +
`category=${encodeURIComponent(category)}` +
`&type=${encodeURIComponent(type)}` +
`&sort=${encodeURIComponent(sort)}` +
`&page=${encodeURIComponent(String(page1))}`;
const referer =
`https://www.bcliquorstores.com/product-catalogue?` +
`category=${encodeURIComponent(category)}` +
`&type=${encodeURIComponent(type)}` +
`&sort=${encodeURIComponent(sort)}` +
`&page=${encodeURIComponent(String(page1))}`;
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json, text/plain, */*",
Referer: referer,
Origin: "https://www.bcliquorstores.com",
},
});
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json, text/plain, */*",
Referer: referer,
Origin: "https://www.bcliquorstores.com",
},
});
}
async function scanCategoryBCLAjax(ctx, prevDb, report) {
const t0 = Date.now();
const size = 24;
const t0 = Date.now();
const size = 24;
let first;
try {
first = await bclFetchBrowsePage(ctx, 1, size);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
let first;
try {
first = await bclFetchBrowsePage(ctx, 1, size);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
return;
}
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const total = bclTotalHits(first?.json);
const totalPages = Math.max(1, Math.ceil(total / size));
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
const total = bclTotalHits(first?.json);
const totalPages = Math.max(1, Math.ceil(total / size));
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
ctx.logger.ok(
`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pageNums = [];
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
const pageNums = [];
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
let donePages = 0;
let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(
pageNums,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page1, idx) => {
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
const perPageItems = await require("../utils/async").parallelMapStaggered(
pageNums,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page1, idx) => {
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
const items = [];
for (const h of hits) {
const it = bclHitToItem(h);
if (it) items.push(it);
}
const items = [];
for (const h of hits) {
const it = bclHitToItem(h);
if (it) items.push(it);
}
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
items.length,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
items.length,
3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
return items;
}
);
return items;
},
);
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "bcl",
name: "BCL",
host: "www.bcliquorstores.com",
ua: defaultUa,
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
categories: [
{
key: "whisky",
label: "Whisky / Whiskey",
// informational only; scan uses ajax/browse
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
bclType: "whisky / whiskey",
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
bclType: "rum",
},
],
};
return {
key: "bcl",
name: "BCL",
host: "www.bcliquorstores.com",
ua: defaultUa,
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
categories: [
{
key: "whisky",
label: "Whisky / Whiskey",
// informational only; scan uses ajax/browse
startUrl:
"https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
bclType: "whisky / whiskey",
},
{
key: "rum",
label: "Rum",
startUrl:
"https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
bclType: "rum",
},
],
};
}
module.exports = { createStore };

View file

@ -14,380 +14,416 @@ const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2";
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
function usd(n) {
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
}
function bswExtractCollectionIdFromHtml(html) {
const s = String(html || "");
const patterns = [
/collection_ids%3A(\d{6,})/i,
/collection_ids\s*:\s*(\d{6,})/i,
/"collection_ids"\s*:\s*(\d{6,})/i,
/"collection_id"\s*:\s*(\d{6,})/i,
/collection_id\s*=\s*(\d{6,})/i,
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
/data-collection-id=["'](\d{6,})["']/i,
];
for (const re of patterns) {
const m = s.match(re);
if (m && m[1]) return Number.parseInt(m[1], 10);
}
return null;
const s = String(html || "");
const patterns = [
/collection_ids%3A(\d{6,})/i,
/collection_ids\s*:\s*(\d{6,})/i,
/"collection_ids"\s*:\s*(\d{6,})/i,
/"collection_id"\s*:\s*(\d{6,})/i,
/collection_id\s*=\s*(\d{6,})/i,
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
/data-collection-id=["'](\d{6,})["']/i,
];
for (const re of patterns) {
const m = s.match(re);
if (m && m[1]) return Number.parseInt(m[1], 10);
}
return null;
}
function bswFormatPrice(value, hintCents) {
if (value === null || value === undefined) return "";
if (value === null || value === undefined) return "";
if (typeof value === "string") {
const t = value.trim();
if (!t) return "";
if (t.includes("$")) return t.replace(/\s+/g, "");
const n = Number(t.replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return t;
return usd(n);
}
if (typeof value === "string") {
const t = value.trim();
if (!t) return "";
if (t.includes("$")) return t.replace(/\s+/g, "");
const n = Number(t.replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return t;
return usd(n);
}
if (typeof value === "number") {
let n = value;
if (typeof value === "number") {
let n = value;
if (hintCents) n = n / 100;
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
if (hintCents) n = n / 100;
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
return usd(n);
}
return usd(n);
}
return "";
return "";
}
function bswPickPrice(hit) {
const pick = (val, cents) => ({ val, cents });
const pick = (val, cents) => ({ val, cents });
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
if (hit && hit.price != null) return pick(hit.price, false);
if (hit && hit.price_min != null) return pick(hit.price_min, false);
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
if (hit && hit.min_price != null) return pick(hit.min_price, false);
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
if (hit && hit.price != null) return pick(hit.price, false);
if (hit && hit.price_min != null) return pick(hit.price_min, false);
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
if (hit && hit.min_price != null) return pick(hit.min_price, false);
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
const v = hit.variants[0];
if (v.price_cents != null) return pick(v.price_cents, true);
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
if (v.price != null) return pick(v.price, false);
}
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
const v = hit.variants[0];
if (v.price_cents != null) return pick(v.price_cents, true);
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
if (v.price != null) return pick(v.price, false);
}
return pick(null, false);
return pick(null, false);
}
function bswHitToItem(hit) {
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
const url =
(hit && (hit.url || hit.product_url)) ||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
const url =
(hit && (hit.url || hit.product_url)) ||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
const price = bswFormatPrice(priceVal, hintCents);
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
const price = bswFormatPrice(priceVal, hintCents);
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
const img = bswPickImage(hit);
const img = bswPickImage(hit);
if (!name || !url) return null;
return { name, price, url, sku, img };
if (!name || !url) return null;
return { name, price, url, sku, img };
}
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
const params =
`facets=%5B%22price%22%2C%22*%22%5D` +
`&filters=${encodeURIComponent(filtersExpr)}` +
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
`&page=${encodeURIComponent(String(page0))}` +
`&query=` +
`&clickAnalytics=true` +
`&maxValuesPerFacet=100` +
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
const params =
`facets=%5B%22price%22%2C%22*%22%5D` +
`&filters=${encodeURIComponent(filtersExpr)}` +
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
`&page=${encodeURIComponent(String(page0))}` +
`&query=` +
`&clickAnalytics=true` +
`&maxValuesPerFacet=100` +
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
method: "POST",
headers: {
Accept: "*/*",
"content-type": "application/x-www-form-urlencoded",
Origin: "https://www.bswliquor.com",
Referer: "https://www.bswliquor.com/",
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
},
body: JSON.stringify(bodyObj),
});
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
method: "POST",
headers: {
Accept: "*/*",
"content-type": "application/x-www-form-urlencoded",
Origin: "https://www.bswliquor.com",
Referer: "https://www.bswliquor.com/",
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
},
body: JSON.stringify(bodyObj),
});
}
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function bswNormalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bswliquor.com/").toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bswliquor.com/").toString();
} catch {
return s;
}
}
function bswNormalizeImg(v) {
if (!v) return "";
if (typeof v === "string") return bswNormalizeAbsUrl(v);
if (typeof v === "object") {
const cands = [
v.src,
v.url,
v.originalSrc,
v.original_src,
v.original,
v.secure_url,
v.large,
v.medium,
v.small,
];
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
}
}
return "";
if (!v) return "";
if (typeof v === "string") return bswNormalizeAbsUrl(v);
if (typeof v === "object") {
const cands = [
v.src,
v.url,
v.originalSrc,
v.original_src,
v.original,
v.secure_url,
v.large,
v.medium,
v.small,
];
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
}
}
return "";
}
function bswPickImage(hit) {
const cands = [
hit?.image,
hit?.image_url,
hit?.imageUrl,
hit?.imageURL,
hit?.featured_image,
hit?.featured_image_url,
hit?.featuredImage,
hit?.featuredImageUrl,
hit?.product_image,
hit?.product_image_url,
hit?.productImage,
hit?.productImageUrl,
hit?.thumbnail,
hit?.thumbnail_url,
hit?.thumbnailUrl,
];
const cands = [
hit?.image,
hit?.image_url,
hit?.imageUrl,
hit?.imageURL,
hit?.featured_image,
hit?.featured_image_url,
hit?.featuredImage,
hit?.featuredImageUrl,
hit?.product_image,
hit?.product_image_url,
hit?.productImage,
hit?.productImageUrl,
hit?.thumbnail,
hit?.thumbnail_url,
hit?.thumbnailUrl,
];
for (const c of cands) {
const s = bswNormalizeImg(c);
if (s) return s;
}
for (const c of cands) {
const s = bswNormalizeImg(c);
if (s) return s;
}
if (Array.isArray(hit?.images)) {
for (const im of hit.images) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
if (Array.isArray(hit?.images)) {
for (const im of hit.images) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
if (Array.isArray(hit?.media)) {
for (const im of hit.media) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
if (Array.isArray(hit?.media)) {
for (const im of hit.media) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
return "";
return "";
}
async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
const t0 = Date.now();
const t0 = Date.now();
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
if (!collectionId) {
try {
const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua);
collectionId = bswExtractCollectionIdFromHtml(html);
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`);
}
}
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
if (!collectionId) {
try {
const { text: html } = await ctx.http.fetchTextWithRetry(
ctx.cat.startUrl,
`bsw:html:${ctx.cat.key}`,
ctx.store.ua,
);
collectionId = bswExtractCollectionIdFromHtml(html);
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
} catch (e) {
ctx.logger.warn(
`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`,
);
}
}
if (!collectionId) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
if (!collectionId) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
return;
}
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const ruleContext = ctx.cat.bswRuleContext || "";
const hitsPerPage = 50;
const ruleContext = ctx.cat.bswRuleContext || "";
const hitsPerPage = 50;
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
const result0 = first?.json?.results?.[0] || null;
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
const result0 = first?.json?.results?.[0] || null;
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
const totalPages = Math.max(1, nbPages);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
const totalPages = Math.max(1, nbPages);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pageIdxs = [];
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
const pageIdxs = [];
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
let donePages = 0;
let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => {
const pnum = idx + 1;
const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
const perPageItems = await require("../utils/async").parallelMapStaggered(
pageIdxs,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page0, idx) => {
const pnum = idx + 1;
const r =
page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
const res0 = r?.json?.results?.[0] || null;
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
const res0 = r?.json?.results?.[0] || null;
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
const items = [];
for (const h of hits) {
const it = bswHitToItem(h);
if (it) items.push(it);
}
const items = [];
for (const h of hits) {
const it = bswHitToItem(h);
if (it) items.push(it);
}
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
items.length,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
items.length,
3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
return items;
});
return items;
},
);
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "bsw",
name: "BSW",
host: "www.bswliquor.com",
ua: defaultUa,
scanCategory: scanCategoryBSWAlgolia,
categories: [
{
key: "scotch-whisky",
label: "Scotch Whisky",
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
bswRuleContext: "scotch-whisky",
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
bswRuleContext: "rum",
},
{
key: "whisky",
label: "Whisky",
startUrl: "https://www.bswliquor.com/collections/whisky?page=1",
bswRuleContext: "whisky",
},
],
};
return {
key: "bsw",
name: "BSW",
host: "www.bswliquor.com",
ua: defaultUa,
scanCategory: scanCategoryBSWAlgolia,
categories: [
{
key: "scotch-whisky",
label: "Scotch Whisky",
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
bswRuleContext: "scotch-whisky",
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
bswRuleContext: "rum",
},
{
key: "whisky",
label: "Whisky",
startUrl: "https://www.bswliquor.com/collections/whisky?page=1",
bswRuleContext: "whisky",
},
],
};
}
module.exports = { createStore };

View file

@ -11,20 +11,20 @@ const { addCategoryResultToReport } = require("../tracker/report");
/* ---------------- formatting ---------------- */
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
}
function pageStr(i, total) {
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
/* ---------------- co-op specifics ---------------- */
@ -33,327 +33,352 @@ const BASE = "https://shoponlinewhisky-wine.coopwinespiritsbeer.com";
const REFERER = `${BASE}/worldofwhisky`;
function coopHeaders(ctx, sourcepage) {
const coop = ctx.store.coop;
return {
Accept: "application/json, text/javascript, */*; q=0.01",
"Content-Type": "application/json",
Origin: BASE,
Referer: REFERER,
const coop = ctx.store.coop;
return {
Accept: "application/json, text/javascript, */*; q=0.01",
"Content-Type": "application/json",
Origin: BASE,
Referer: REFERER,
// these 4 are required on their API calls (matches browser)
SessionKey: coop.sessionKey,
chainID: coop.chainId,
storeID: coop.storeId,
appVersion: coop.appVersion,
// these 4 are required on their API calls (matches browser)
SessionKey: coop.sessionKey,
chainID: coop.chainId,
storeID: coop.storeId,
appVersion: coop.appVersion,
AUTH_TOKEN: "null",
CONNECTION_ID: "null",
SESSION_ID: coop.sessionId || "null",
TIMESTAMP: String(Date.now()),
sourcepage,
};
AUTH_TOKEN: "null",
CONNECTION_ID: "null",
SESSION_ID: coop.sessionId || "null",
TIMESTAMP: String(Date.now()),
sourcepage,
};
}
async function coopFetchText(ctx, url, label, { headers } = {}) {
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: headers || {},
});
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: headers || {},
});
}
function extractVar(html, re) {
const m = String(html || "").match(re);
return m ? String(m[1] || "").trim() : "";
}
async function ensureCoopBootstrap(ctx) {
const coop = ctx.store.coop;
if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return;
const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", {
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
Referer: REFERER,
},
});
const html = r?.text || "";
if (r?.status !== 200 || !html) {
throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`);
}
// Values are in <script> var SESSIONKEY = "..."; etc.
coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i);
coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i);
coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i);
coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i);
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
throw new Error(
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`,
);
}
}
function extractVar(html, re) {
const m = String(html || "").match(re);
return m ? String(m[1] || "").trim() : "";
}
async function ensureCoopBootstrap(ctx) {
const coop = ctx.store.coop;
if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return;
const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", {
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
Referer: REFERER,
},
});
const html = r?.text || "";
if (r?.status !== 200 || !html) {
throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`);
}
// Values are in <script> var SESSIONKEY = "..."; etc.
coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i);
coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i);
coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i);
coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i);
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
throw new Error(
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`
);
}
}
async function ensureCoopSession(ctx) {
const coop = ctx.store.coop;
if (coop.sessionId) return;
await ensureCoopBootstrap(ctx);
const coop = ctx.store.coop;
if (coop.sessionId) return;
await ensureCoopBootstrap(ctx);
const r = await ctx.http.fetchJsonWithRetry(
`${BASE}/api/account/createsession`,
`coop:createsession`,
ctx.store.ua,
{
method: "POST",
headers: coopHeaders(ctx, "/worldofwhisky"),
// browser sends Content-Length: 0; easiest equivalent:
body: "",
}
);
const r = await ctx.http.fetchJsonWithRetry(
`${BASE}/api/account/createsession`,
`coop:createsession`,
ctx.store.ua,
{
method: "POST",
headers: coopHeaders(ctx, "/worldofwhisky"),
// browser sends Content-Length: 0; easiest equivalent:
body: "",
},
);
const sid =
r?.json?.SessionID ||
r?.json?.sessionID ||
r?.json?.sessionId ||
r?.json?.SessionId ||
"";
const sid = r?.json?.SessionID || r?.json?.sessionID || r?.json?.sessionId || r?.json?.SessionId || "";
if (!sid) {
throw new Error(
`createSession: missing SessionID (status=${r?.status})`
);
}
if (!sid) {
throw new Error(`createSession: missing SessionID (status=${r?.status})`);
}
coop.sessionId = sid;
coop.anonymousUserId = r?.json?.AnonymousUserID ?? null;
coop.sessionId = sid;
coop.anonymousUserId = r?.json?.AnonymousUserID ?? null;
}
function normalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, `${BASE}/`).toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, `${BASE}/`).toString();
} catch {
return s;
}
}
function productUrlFromId(productId) {
return `${REFERER}#/product/${encodeURIComponent(String(productId))}`;
return `${REFERER}#/product/${encodeURIComponent(String(productId))}`;
}
function productFromApi(p) {
if (!p || p.IsActive === false) return null;
const name = String(p.Name || "").trim();
if (!name) return null;
const productId = p.ProductID;
if (!productId) return null;
const url = productUrlFromId(productId);
const price =
p?.CountDetails?.PriceText ||
(Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
if (!p || p.IsActive === false) return null;
const upc = String(p.UPC || "").trim();
const name = String(p.Name || "").trim();
if (!name) return null;
let rawKey = "";
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
const productId = p.ProductID;
if (!productId) return null;
const url = productUrlFromId(productId);
const price = p?.CountDetails?.PriceText || (Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
const upc = String(p.UPC || "").trim();
let rawKey = "";
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
const img = normalizeAbsUrl(p.ImageURL);
return {
name,
price,
url,
sku,
upc,
productId,
productStoreId: p.ProductStoreID || null,
img,
};
}
const img = normalizeAbsUrl(p.ImageURL);
return {
name,
price,
url,
sku,
upc,
productId,
productStoreId: p.ProductStoreID || null,
img,
};
}
/* ---------------- scanner ---------------- */
async function fetchCategoryPage(ctx, categoryId, page) {
await ensureCoopSession(ctx);
await ensureCoopSession(ctx);
const doReq = () =>
ctx.http.fetchJsonWithRetry(
`${BASE}/api/v2/products/category/${categoryId}`,
`coop:${ctx.cat.key}:p${page}`,
ctx.store.ua,
{
method: "POST",
headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`),
body: JSON.stringify({
page,
Filters: {
Filters: [],
LastSelectedFilter: null,
SearchWithinTerm: null,
},
orderby: null,
}),
}
);
const doReq = () =>
ctx.http.fetchJsonWithRetry(
`${BASE}/api/v2/products/category/${categoryId}`,
`coop:${ctx.cat.key}:p${page}`,
ctx.store.ua,
{
method: "POST",
headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`),
body: JSON.stringify({
page,
Filters: {
Filters: [],
LastSelectedFilter: null,
SearchWithinTerm: null,
},
orderby: null,
}),
},
);
let r = await doReq();
let r = await doReq();
// one fast retry on invalid_session: refresh SessionID and repeat
if (r?.json?.type === "invalid_session") {
ctx.store.coop.sessionId = "";
await ensureCoopSession(ctx);
r = await doReq();
}
// one fast retry on invalid_session: refresh SessionID and repeat
if (r?.json?.type === "invalid_session") {
ctx.store.coop.sessionId = "";
await ensureCoopSession(ctx);
r = await doReq();
}
return r;
return r;
}
function avoidMassRemoval(prevDb, discovered, ctx) {
const prev = prevDb?.size || 0;
const curr = discovered.size;
if (!prev || !curr) return;
if (curr / prev >= 0.6) return;
const prev = prevDb?.size || 0;
const curr = discovered.size;
if (!prev || !curr) return;
if (curr / prev >= 0.6) return;
ctx.logger.warn(
`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`
);
ctx.logger.warn(`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`);
for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v);
}
for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v);
}
}
async function scanCategoryCoop(ctx, prevDb, report) {
const t0 = Date.now();
const discovered = new Map();
const t0 = Date.now();
const discovered = new Map();
const maxPages =
ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
let done = 0;
let done = 0;
for (let page = 1; page <= maxPages; page++) {
let r;
try {
r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page);
} catch (e) {
ctx.logger.warn(
`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}`
);
break;
}
for (let page = 1; page <= maxPages; page++) {
let r;
try {
r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}`);
break;
}
const arr = Array.isArray(r?.json?.Products?.Result)
? r.json.Products.Result
: [];
const arr = Array.isArray(r?.json?.Products?.Result) ? r.json.Products.Result : [];
done++;
done++;
let kept = 0;
for (const p of arr) {
const it = productFromApi(p);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
let kept = 0;
for (const p of arr) {
const it = productFromApi(p);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String(
r.status || ""
).padEnd(3)} | items=${padLeft(kept, 3)} | bytes=${kbStr(
r.bytes
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String(r.status || "").padEnd(
3,
)} | items=${padLeft(kept, 3)} | bytes=${kbStr(
r.bytes,
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
if (!arr.length) break;
}
if (!arr.length) break;
}
if (prevDb) avoidMassRemoval(prevDb, discovered, ctx);
if (prevDb) avoidMassRemoval(prevDb, discovered, ctx);
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`);
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } =
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: done,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: done,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems
);
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
/* ---------------- store ---------------- */
function createStore(defaultUa) {
return {
key: "coop",
name: "Co-op World of Whisky",
host: "shoponlinewhisky-wine.coopwinespiritsbeer.com",
ua: defaultUa,
scanCategory: scanCategoryCoop,
return {
key: "coop",
name: "Co-op World of Whisky",
host: "shoponlinewhisky-wine.coopwinespiritsbeer.com",
ua: defaultUa,
scanCategory: scanCategoryCoop,
// put your captured values here (or pull from env)
coop: {
sessionKey: "",
chainId: "",
storeId: "",
appVersion: "",
sessionId: "", // set by ensureCoopSession()
anonymousUserId: null,
},
// put your captured values here (or pull from env)
coop: {
sessionKey: "",
chainId: "",
storeId: "",
appVersion: "",
sessionId: "", // set by ensureCoopSession()
anonymousUserId: null,
},
categories: [
{ key: "canadian-whisky", label: "Canadian Whisky", coopSlug: "canadian_whisky", coopCategoryId: 4, startUrl: `${REFERER}#/category/canadian_whisky` },
{ key: "bourbon-whiskey", label: "Bourbon Whiskey", coopSlug: "bourbon_whiskey", coopCategoryId: 9, startUrl: `${REFERER}#/category/bourbon_whiskey` },
{ key: "scottish-single-malts", label: "Scottish Single Malts", coopSlug: "scottish_single_malts", coopCategoryId: 6, startUrl: `${REFERER}#/category/scottish_single_malts` },
{ key: "scottish-blends", label: "Scottish Whisky Blends", coopSlug: "scottish_whisky_blends", coopCategoryId: 5, startUrl: `${REFERER}#/category/scottish_whisky_blends` },
{ key: "american-whiskey", label: "American Whiskey", coopSlug: "american_whiskey", coopCategoryId: 8, startUrl: `${REFERER}#/category/american_whiskey` },
{ key: "world-whisky", label: "World Whisky", coopSlug: "world_international", coopCategoryId: 10, startUrl: `${REFERER}#/category/world_international` },
{ key: "rum", label: "Rum", coopSlug: "spirits_rum", coopCategoryId: 24, startUrl: `${REFERER}#/category/spirits_rum` },
],
};
categories: [
{
key: "canadian-whisky",
label: "Canadian Whisky",
coopSlug: "canadian_whisky",
coopCategoryId: 4,
startUrl: `${REFERER}#/category/canadian_whisky`,
},
{
key: "bourbon-whiskey",
label: "Bourbon Whiskey",
coopSlug: "bourbon_whiskey",
coopCategoryId: 9,
startUrl: `${REFERER}#/category/bourbon_whiskey`,
},
{
key: "scottish-single-malts",
label: "Scottish Single Malts",
coopSlug: "scottish_single_malts",
coopCategoryId: 6,
startUrl: `${REFERER}#/category/scottish_single_malts`,
},
{
key: "scottish-blends",
label: "Scottish Whisky Blends",
coopSlug: "scottish_whisky_blends",
coopCategoryId: 5,
startUrl: `${REFERER}#/category/scottish_whisky_blends`,
},
{
key: "american-whiskey",
label: "American Whiskey",
coopSlug: "american_whiskey",
coopCategoryId: 8,
startUrl: `${REFERER}#/category/american_whiskey`,
},
{
key: "world-whisky",
label: "World Whisky",
coopSlug: "world_international",
coopCategoryId: 10,
startUrl: `${REFERER}#/category/world_international`,
},
{
key: "rum",
label: "Rum",
coopSlug: "spirits_rum",
coopCategoryId: 24,
startUrl: `${REFERER}#/category/spirits_rum`,
},
],
};
}
module.exports = { createStore };

View file

@ -13,142 +13,123 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function craftCellarsIsEmptyListingPage(html) {
const s = String(html || "");
if (/collection--empty\b/i.test(s)) return true;
if (/No products found/i.test(s)) return true;
return false;
const s = String(html || "");
if (/collection--empty\b/i.test(s)) return true;
if (/No products found/i.test(s)) return true;
return false;
}
function canonicalizeCraftProductUrl(raw) {
try {
const u = new URL(String(raw));
u.search = "";
u.hash = "";
return u.toString();
} catch {
return String(raw || "");
}
try {
const u = new URL(String(raw));
u.search = "";
u.hash = "";
return u.toString();
} catch {
return String(raw || "");
}
}
function extractShopifyCardPrice(block) {
const b = String(block || "");
const dollars = (txt) =>
[...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) =>
m[0].replace(/\s+/g, "")
);
const b = String(block || "");
const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
const saleRegion = b.split(/sale price/i)[1] || "";
const saleD = dollars(saleRegion);
if (saleD.length) return saleD[0];
const saleRegion = b.split(/sale price/i)[1] || "";
const saleD = dollars(saleRegion);
if (saleD.length) return saleD[0];
const regRegion = b.split(/regular price/i)[1] || "";
const regD = dollars(regRegion);
if (regD.length) return regD[0];
const regRegion = b.split(/regular price/i)[1] || "";
const regD = dollars(regRegion);
if (regD.length) return regD[0];
const any = dollars(b);
return any[0] || "";
const any = dollars(b);
return any[0] || "";
}
function parseProductsCraftCellars(html, ctx) {
const s = String(html || "");
const s = String(html || "");
const g1 =
s.match(
/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i
)?.[0] || "";
const g2 =
s.match(
/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i
)?.[0] || "";
const g1 = s.match(/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
const g2 = s.match(/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
const gridCandidate = g1.length > g2.length ? g1 : g2;
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
const gridCandidate = g1.length > g2.length ? g1 : g2;
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
return parseProductsCraftCellarsInner(grid, ctx);
return parseProductsCraftCellarsInner(grid, ctx);
}
function parseProductsCraftCellarsInner(html, ctx) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map(
(m) => m[0]
);
if (blocks.length < 5) {
blocks = [
...s.matchAll(
/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi
),
].map((m) => m[0]);
}
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]);
if (blocks.length < 5) {
blocks = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map(
(m) => m[0],
);
}
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
for (const block of blocks) {
const href =
block.match(
/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i
)?.[1] ||
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
for (const block of blocks) {
const href =
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] ||
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
url = canonicalizeCraftProductUrl(url);
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
url = canonicalizeCraftProductUrl(url);
const nameHtml =
block.match(
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i
)?.[1] ||
block.match(
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i
)?.[1] ||
block.match(
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i
)?.[1];
const nameHtml =
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i)?.[1] ||
block.match(
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i,
)?.[1] ||
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
if (!name) continue;
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
if (!name) continue;
const price = extractShopifyCardPrice(block);
const img = extractFirstImgUrl(block, base);
const price = extractShopifyCardPrice(block);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img });
}
items.push({ name, price, url, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function usdFromShopifyPriceStr(s) {
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
})}`;
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
})}`;
}
function cfgNum(v, fallback) {
return Number.isFinite(v) ? v : fallback;
return Number.isFinite(v) ? v : fallback;
}
/* ---------- NEW: product page SKU extractor ---------- */
function extractCraftSkuFromProductPageHtml(html) {
const s = String(html || "");
const s = String(html || "");
// allow any content between </strong> and <span> (including newlines, other tags)
const m =
s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) ||
s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i);
// allow any content between </strong> and <span> (including newlines, other tags)
const m =
s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) ||
s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i);
const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : "";
return normalizeCspc(raw);
const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : "";
return normalizeCspc(raw);
}
/**
@ -158,283 +139,233 @@ function extractCraftSkuFromProductPageHtml(html) {
* - product page HTML is final SKU fallback
*/
async function scanCategoryCraftCellars(ctx, prevDb, report) {
const t0 = Date.now();
const t0 = Date.now();
const perPageDelayMs =
Math.max(
0,
cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))
) || 0;
const perPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))) || 0;
const perJsonPageDelayMs = Math.max(
0,
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
);
const perJsonPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs));
const htmlMap = new Map();
const htmlMap = new Map();
const maxPages =
ctx.config.maxPages === null
? 200
: Math.min(ctx.config.maxPages, 200);
const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200);
let htmlPagesFetched = 0;
let emptyStreak = 0;
let htmlPagesFetched = 0;
let emptyStreak = 0;
for (let p = 1; p <= maxPages; p++) {
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
for (let p = 1; p <= maxPages; p++) {
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
const { text: html } = await ctx.http.fetchTextWithRetry(
pageUrl,
`craft:html:${ctx.cat.key}:p${p}`,
ctx.store.ua
);
htmlPagesFetched++;
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
const { text: html } = await ctx.http.fetchTextWithRetry(
pageUrl,
`craft:html:${ctx.cat.key}:p${p}`,
ctx.store.ua,
);
htmlPagesFetched++;
if (craftCellarsIsEmptyListingPage(html)) break;
if (craftCellarsIsEmptyListingPage(html)) break;
const items = parseProductsCraftCellars(html, ctx);
if (!items.length) {
emptyStreak++;
if (emptyStreak >= 2) break;
continue;
}
emptyStreak = 0;
const items = parseProductsCraftCellars(html, ctx);
if (!items.length) {
emptyStreak++;
if (emptyStreak >= 2) break;
continue;
}
emptyStreak = 0;
for (const it of items) {
const url = canonicalizeCraftProductUrl(it.url);
if (!url) continue;
htmlMap.set(url, {
name: it.name || "",
price: it.price || "",
url,
img: it.img || "",
});
}
}
for (const it of items) {
const url = canonicalizeCraftProductUrl(it.url);
if (!url) continue;
htmlMap.set(url, {
name: it.name || "",
price: it.price || "",
url,
img: it.img || "",
});
}
}
if (!htmlMap.size) {
ctx.logger.warn(
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery`
);
}
if (!htmlMap.size) {
ctx.logger.warn(`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery`);
}
const jsonMap = new Map();
const jsonMap = new Map();
if (htmlMap.size) {
const start = new URL(ctx.cat.startUrl);
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
if (!m)
throw new Error(
`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`
);
const collectionHandle = m[1];
if (htmlMap.size) {
const start = new URL(ctx.cat.startUrl);
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`);
const collectionHandle = m[1];
const limit = 250;
let jsonPage = 1;
let jsonPagesFetched = 0;
const limit = 250;
let jsonPage = 1;
let jsonPagesFetched = 0;
while (true) {
if (jsonPage > 1 && perJsonPageDelayMs > 0)
await sleep(perJsonPageDelayMs);
while (true) {
if (jsonPage > 1 && perJsonPageDelayMs > 0) await sleep(perJsonPageDelayMs);
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
const r = await ctx.http.fetchJsonWithRetry(
url,
`craft:coljson:${ctx.cat.key}:p${jsonPage}`,
ctx.store.ua
);
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
const products = Array.isArray(r?.json?.products)
? r.json.products
: [];
jsonPagesFetched++;
const products = Array.isArray(r?.json?.products) ? r.json.products : [];
jsonPagesFetched++;
if (!products.length) break;
if (!products.length) break;
for (const p of products) {
const handle = String(p?.handle || "");
if (!handle) continue;
for (const p of products) {
const handle = String(p?.handle || "");
if (!handle) continue;
const prodUrl = canonicalizeCraftProductUrl(
`https://${ctx.store.host}/products/${handle}`
);
if (!htmlMap.has(prodUrl)) continue;
const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`);
if (!htmlMap.has(prodUrl)) continue;
const variants = Array.isArray(p?.variants) ? p.variants : [];
const v =
variants.find((x) => x && x.available === true) ||
variants[0] ||
null;
const variants = Array.isArray(p?.variants) ? p.variants : [];
const v = variants.find((x) => x && x.available === true) || variants[0] || null;
const sku = normalizeCspc(v?.sku || "");
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
const sku = normalizeCspc(v?.sku || "");
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
let img = "";
const images = Array.isArray(p?.images) ? p.images : [];
if (images[0]) {
img =
typeof images[0] === "string"
? images[0]
: String(images[0]?.src || images[0]?.url || "");
}
if (!img && p?.image)
img = String(p.image?.src || p.image?.url || p.image || "");
img = String(img || "").trim();
if (img.startsWith("//")) img = `https:${img}`;
let img = "";
const images = Array.isArray(p?.images) ? p.images : [];
if (images[0]) {
img = typeof images[0] === "string" ? images[0] : String(images[0]?.src || images[0]?.url || "");
}
if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || "");
img = String(img || "").trim();
if (img.startsWith("//")) img = `https:${img}`;
jsonMap.set(prodUrl, { sku, price, img });
}
jsonMap.set(prodUrl, { sku, price, img });
}
if (products.length < limit) break;
if (++jsonPage > 200) break;
}
if (products.length < limit) break;
if (++jsonPage > 200) break;
}
ctx.logger.ok(
`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`
);
}
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`);
}
const discovered = new Map();
for (const [url, it] of htmlMap.entries()) {
const j = jsonMap.get(url);
const prev = prevDb?.byUrl?.get(url) || null;
const discovered = new Map();
for (const [url, it] of htmlMap.entries()) {
const j = jsonMap.get(url);
const prev = prevDb?.byUrl?.get(url) || null;
discovered.set(url, {
name: it.name,
price: j?.price || it.price || "",
url,
// reuse cached SKU unless we found something better this run
sku: pickBetterSku(j?.sku || "", prev?.sku || ""),
// reuse cached image if we didn't find one
img: (j?.img || it.img || prev?.img || ""),
});
}
discovered.set(url, {
name: it.name,
price: j?.price || it.price || "",
url,
// reuse cached SKU unless we found something better this run
sku: pickBetterSku(j?.sku || "", prev?.sku || ""),
// reuse cached image if we didn't find one
img: j?.img || it.img || prev?.img || "",
});
}
/* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */
const perProductSkuDelayMs = Math.max(
0,
cfgNum(
ctx?.cat?.skuPageDelayMs,
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
)
);
/* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */
const perProductSkuDelayMs = Math.max(
0,
cfgNum(ctx?.cat?.skuPageDelayMs, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)),
);
let skuPagesFetched = 0;
let skuPagesFetched = 0;
for (const it of discovered.values()) {
// only hit product pages when missing/synthetic
if (!needsSkuDetail(it.sku)) continue;
for (const it of discovered.values()) {
// only hit product pages when missing/synthetic
if (!needsSkuDetail(it.sku)) continue;
if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs);
if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs);
try {
const { text } = await ctx.http.fetchTextWithRetry(
it.url,
`craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url)
.toString("base64")
.slice(0, 24)}`,
ctx.store.ua
);
skuPagesFetched++;
try {
const { text } = await ctx.http.fetchTextWithRetry(
it.url,
`craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url).toString("base64").slice(0, 24)}`,
ctx.store.ua,
);
skuPagesFetched++;
const sku = extractCraftSkuFromProductPageHtml(text);
if (sku) it.sku = sku;
} catch {
/* best effort */
}
}
const sku = extractCraftSkuFromProductPageHtml(text);
if (sku) it.sku = sku;
} catch {
/* best effort */
}
}
ctx.logger.ok(
`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`
);
ctx.logger.ok(`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`);
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`
);
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const {
merged,
newItems,
updatedItems,
removedItems,
restoredItems,
} = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: htmlPagesFetched,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: htmlPagesFetched,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems
);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "craftcellars",
name: "Craft Cellars",
host: "craftcellars.ca",
ua: defaultUa,
return {
key: "craftcellars",
name: "Craft Cellars",
host: "craftcellars.ca",
ua: defaultUa,
scanCategory: scanCategoryCraftCellars,
scanCategory: scanCategoryCraftCellars,
parseProducts: parseProductsCraftCellars,
makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: craftCellarsIsEmptyListingPage,
parseProducts: parseProductsCraftCellars,
makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: craftCellarsIsEmptyListingPage,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl:
"https://craftcellars.ca/collections/whisky?filter.v.availability=1",
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
skuPageDelayMs: 12000,
},
{
key: "rum",
label: "Rum",
startUrl:
"https://craftcellars.ca/collections/rum?filter.v.availability=1",
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
skuPageDelayMs: 12000,
},
],
};
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
skuPageDelayMs: 12000,
},
{
key: "rum",
label: "Rum",
startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
skuPageDelayMs: 12000,
},
],
};
}
module.exports = { createStore };

View file

@ -6,12 +6,12 @@ const { normalizeCspc, pickBetterSku, needsSkuDetail } = require("../utils/sku")
const { makePageUrl } = require("../utils/url");
function looksInStock(block) {
const s = String(block || "");
if (/\boutofstock\b/i.test(s)) return false;
if (/\bin-stock\b/i.test(s)) return true;
if (/\binstock\b/i.test(s)) return true;
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
return /\bin-stock\b/i.test(s);
const s = String(block || "");
if (/\boutofstock\b/i.test(s)) return false;
if (/\bin-stock\b/i.test(s)) return true;
if (/\binstock\b/i.test(s)) return true;
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
return /\bin-stock\b/i.test(s);
}
// Gull product tiles commonly contain two amounts:
@ -19,116 +19,111 @@ function looksInStock(block) {
// - deposit (e.g. 0.10) inside the "price suffix"
// We extract all amounts and pick the last one >= 1.00 (sale price if present).
function extractGullPriceFromBlock(block) {
const s = String(block || "");
const nums = [];
const s = String(block || "");
const nums = [];
// Match WooCommerce "Price amount" blocks, pull out the BDI contents,
// then strip tags/entities and parse as float.
const re =
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
// Match WooCommerce "Price amount" blocks, pull out the BDI contents,
// then strip tags/entities and parse as float.
const re =
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
for (const m of s.matchAll(re)) {
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
if (Number.isFinite(n)) nums.push(n);
}
for (const m of s.matchAll(re)) {
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
if (Number.isFinite(n)) nums.push(n);
}
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
const big = nums.filter((n) => n >= 1.0);
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
const big = nums.filter((n) => n >= 1.0);
if (!big.length) return "";
if (!big.length) return "";
// If sale price exists, Woo often renders old then new; taking the last >=1
// typically yields the current price.
const chosen = big[big.length - 1];
// If sale price exists, Woo often renders old then new; taking the last >=1
// typically yields the current price.
const chosen = big[big.length - 1];
// Normalize formatting
return `$${chosen.toFixed(2)}`;
// Normalize formatting
return `$${chosen.toFixed(2)}`;
}
// Gull SKUs are often NOT 6 digits (e.g. 67424).
// If it's not 6 digits, represent as id:<digits> to avoid normalizeCspc turning it into u:SHA.
function normalizeGullSku(raw) {
const s = cleanText(decodeHtml(String(raw || ""))).trim();
const s = cleanText(decodeHtml(String(raw || ""))).trim();
// already in a stable prefixed form
if (/^(id:|u:)/i.test(s)) return s;
// already in a stable prefixed form
if (/^(id:|u:)/i.test(s)) return s;
// digits-only SKU (from page / tile)
const digits = s.match(/\b(\d{3,10})\b/)?.[1] || "";
if (digits) {
if (digits.length === 6) return normalizeCspc(digits);
return `id:${digits}`;
}
// digits-only SKU (from page / tile)
const digits = s.match(/\b(\d{3,10})\b/)?.[1] || "";
if (digits) {
if (digits.length === 6) return normalizeCspc(digits);
return `id:${digits}`;
}
// fall back to existing normalizer (may yield u:...)
return normalizeCspc(s);
// fall back to existing normalizer (may yield u:...)
return normalizeCspc(s);
}
// When we fall back to normalizeCspc(url), we may end up with a generated u:XXXXXXXX.
function isGeneratedUrlSku(sku) {
const s = String(sku || "");
// you have u:8hex in the DB, so accept 8+
return /^u:[0-9a-f]{8,128}$/i.test(s);
const s = String(sku || "");
// you have u:8hex in the DB, so accept 8+
return /^u:[0-9a-f]{8,128}$/i.test(s);
}
// Extract SKU from Gull product page HTML.
function extractGullSkuFromProductPage(html) {
const s = String(html || "");
const s = String(html || "");
// Most reliable: <span class="sku">67424</span>
const m1 = s.match(
/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i
);
if (m1?.[1]) return normalizeGullSku(m1[1]);
// Most reliable: <span class="sku">67424</span>
const m1 = s.match(/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i);
if (m1?.[1]) return normalizeGullSku(m1[1]);
// Fallback: "SKU: 67424" text
const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i);
if (m2?.[1]) return normalizeGullSku(m2[1]);
// Fallback: "SKU: 67424" text
const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i);
if (m2?.[1]) return normalizeGullSku(m2[1]);
return "";
return "";
}
// Serial limiter: ensures at least minIntervalMs between request starts.
function createMinIntervalLimiter(minIntervalMs) {
let lastStart = 0;
let chain = Promise.resolve();
let lastStart = 0;
let chain = Promise.resolve();
return async function schedule(fn) {
chain = chain.then(async () => {
const now = Date.now();
const waitMs = Math.max(0, lastStart + minIntervalMs - now);
if (waitMs) await new Promise((r) => setTimeout(r, waitMs));
lastStart = Date.now();
return fn();
});
return chain;
};
return async function schedule(fn) {
chain = chain.then(async () => {
const now = Date.now();
const waitMs = Math.max(0, lastStart + minIntervalMs - now);
if (waitMs) await new Promise((r) => setTimeout(r, waitMs));
lastStart = Date.now();
return fn();
});
return chain;
};
}
async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
let attempt = 0;
let attempt = 0;
while (true) {
const res = await fetchFn(url, { headers });
while (true) {
const res = await fetchFn(url, { headers });
if (res.status !== 429) {
if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`);
return await res.text();
}
if (res.status !== 429) {
if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`);
return await res.text();
}
if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`);
if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`);
// Respect Retry-After if present; otherwise progressive backoff.
const ra =
res.headers && typeof res.headers.get === "function"
? res.headers.get("retry-after")
: null;
// Respect Retry-After if present; otherwise progressive backoff.
const ra = res.headers && typeof res.headers.get === "function" ? res.headers.get("retry-after") : null;
const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1);
await new Promise((r) => setTimeout(r, waitSec * 1000));
attempt++;
}
const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1);
await new Promise((r) => setTimeout(r, waitSec * 1000));
attempt++;
}
}
/**
@ -137,142 +132,133 @@ async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
*
* NEW: accepts prevDb so we can skip fetch if URL already has a good SKU cached.
*/
async function hydrateGullSkus(
items,
{ fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}
) {
if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
async function hydrateGullSkus(items, { fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}) {
if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
const schedule = createMinIntervalLimiter(minIntervalMs);
const schedule = createMinIntervalLimiter(minIntervalMs);
const headers = {
"user-agent": ua || "Mozilla/5.0",
accept: "text/html,application/xhtml+xml",
};
const headers = {
"user-agent": ua || "Mozilla/5.0",
accept: "text/html,application/xhtml+xml",
};
for (const it of items || []) {
if (!it || !it.url) continue;
for (const it of items || []) {
if (!it || !it.url) continue;
// NEW: if DB already has a good SKU, reuse it and skip fetch
const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev?.sku && !needsSkuDetail(prev.sku)) {
it.sku = pickBetterSku(it.sku, prev.sku);
continue;
}
// NEW: if DB already has a good SKU, reuse it and skip fetch
const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev?.sku && !needsSkuDetail(prev.sku)) {
it.sku = pickBetterSku(it.sku, prev.sku);
continue;
}
if (!isGeneratedUrlSku(it.sku)) continue; // only where required
if (!isGeneratedUrlSku(it.sku)) continue; // only where required
const html = await schedule(() =>
fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries })
);
const html = await schedule(() => fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries }));
const realSku = extractGullSkuFromProductPage(html);
if (realSku) it.sku = pickBetterSku(realSku, it.sku);
}
const realSku = extractGullSkuFromProductPage(html);
if (realSku) it.sku = pickBetterSku(realSku, it.sku);
}
return items;
return items;
}
function parseProductsGull(html, ctx) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
// split on <li class="product ...">
const parts = s.split(
/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i
);
if (parts.length <= 1) return items;
// split on <li class="product ...">
const parts = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
if (parts.length <= 1) return items;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
for (let i = 1; i < parts.length; i++) {
const block = '<li class="product"' + parts[i];
for (let i = 1; i < parts.length; i++) {
const block = '<li class="product"' + parts[i];
if (!looksInStock(block)) continue;
if (!looksInStock(block)) continue;
const hrefM = block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i
);
if (!hrefM || !hrefM[1]) continue;
const hrefM = block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i,
);
if (!hrefM || !hrefM[1]) continue;
let url;
try {
url = new URL(decodeHtml(hrefM[1]), base).toString();
} catch {
continue;
}
let url;
try {
url = new URL(decodeHtml(hrefM[1]), base).toString();
} catch {
continue;
}
const titleM = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
);
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) continue;
const titleM = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
);
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) continue;
const price = extractGullPriceFromBlock(block);
const price = extractGullPriceFromBlock(block);
const skuRaw =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] ||
url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:...
const skuRaw =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] ||
url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:...
const sku = normalizeGullSku(skuRaw);
const sku = normalizeGullSku(skuRaw);
const img = extractFirstImgUrl(block, base);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img });
}
items.push({ name, price, url, sku, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "gull",
name: "Gull Liquor",
host: "gullliquorstore.com",
ua: defaultUa,
parseProducts: parseProductsGull,
return {
key: "gull",
name: "Gull Liquor",
host: "gullliquorstore.com",
ua: defaultUa,
parseProducts: parseProductsGull,
// Optional hook callers can use to post-process items:
// only hits product pages when sku is u:...
hydrateSkus: hydrateGullSkus,
productPageMinIntervalMs: 12000, // slow by default; Gull is strict
// Optional hook callers can use to post-process items:
// only hits product pages when sku is u:...
hydrateSkus: hydrateGullSkus,
productPageMinIntervalMs: 12000, // slow by default; Gull is strict
makePageUrl, // enables /page/N/ paging
categories: [
{
key: "whisky",
label: "Whisky",
startUrl:
"https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
discoveryStartPage: 3,
discoveryStep: 2,
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
},
{
key: "rum",
label: "Rum",
startUrl:
"https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
discoveryStartPage: 3,
discoveryStep: 2,
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
},
],
};
makePageUrl, // enables /page/N/ paging
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
discoveryStartPage: 3,
discoveryStep: 2,
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
},
{
key: "rum",
label: "Rum",
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
discoveryStartPage: 3,
discoveryStep: 2,
pageConcurrency: 1,
pageStaggerMs: 10000,
discoveryDelayMs: 10000,
},
],
};
}
module.exports = {
createStore,
parseProductsGull,
hydrateGullSkus,
extractGullSkuFromProductPage,
isGeneratedUrlSku,
normalizeGullSku,
createStore,
parseProductsGull,
hydrateGullSkus,
extractGullSkuFromProductPage,
isGeneratedUrlSku,
normalizeGullSku,
};

View file

@ -18,24 +18,24 @@ const { createStore: createWillowPark } = require("./willowpark");
const { createStore: createArc } = require("./arc");
function createStores({ defaultUa } = {}) {
return [
createKWM(defaultUa),
createCraftCellars(defaultUa),
createSierra(defaultUa),
createGull(defaultUa),
createCoop(defaultUa),
createStrath(defaultUa),
createBCL(defaultUa),
createBSW(defaultUa),
createWillowPark(defaultUa),
createVessel(defaultUa),
createMaltsAndGrains(defaultUa),
createKegNCork(defaultUa),
createTudor(defaultUa),
createVintage(defaultUa),
createLegacy(defaultUa),
createArc(defaultUa),
];
return [
createKWM(defaultUa),
createCraftCellars(defaultUa),
createSierra(defaultUa),
createGull(defaultUa),
createCoop(defaultUa),
createStrath(defaultUa),
createBCL(defaultUa),
createBSW(defaultUa),
createWillowPark(defaultUa),
createVessel(defaultUa),
createMaltsAndGrains(defaultUa),
createKegNCork(defaultUa),
createTudor(defaultUa),
createVintage(defaultUa),
createLegacy(defaultUa),
createArc(defaultUa),
];
}
module.exports = { createStores, parseProductsSierra };

View file

@ -4,75 +4,74 @@ const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../uti
const { makePageUrlQueryParam } = require("../utils/url");
function makePageUrlKegNCork(baseUrl, pageNum) {
return makePageUrlQueryParam(baseUrl, "page", pageNum);
return makePageUrlQueryParam(baseUrl, "page", pageNum);
}
function parseProductsKegNCork(html, ctx) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
for (let i = 1; i < blocks.length; i++) {
const block = "<li" + blocks[i];
for (let i = 1; i < blocks.length; i++) {
const block = "<li" + blocks[i];
const mTitle = block.match(
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i
);
if (!mTitle) continue;
const mTitle = block.match(
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i,
);
if (!mTitle) continue;
const url = decodeHtml(mTitle[1]).trim();
const name = cleanText(decodeHtml(mTitle[2]));
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
const url = decodeHtml(mTitle[1]).trim();
const name = cleanText(decodeHtml(mTitle[2]));
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
let price = "";
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
if (mPrice && mPrice[1]) {
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
if (p) price = p.startsWith("$") ? p : `$${p}`;
} else {
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
}
let price = "";
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
if (mPrice && mPrice[1]) {
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
if (p) price = p.startsWith("$") ? p : `$${p}`;
} else {
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
}
const img = extractFirstImgUrl(block, base);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img });
}
items.push({ name, price, url, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "kegncork",
name: "Keg N Cork",
host: "kegncork.com",
ua: defaultUa,
parseProducts: parseProductsKegNCork,
makePageUrl: makePageUrlKegNCork,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://kegncork.com/whisky/?page=1",
discoveryStartPage: 5,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kegncork.com/rum/?page=1",
discoveryStartPage: 1,
},
],
};
return {
key: "kegncork",
name: "Keg N Cork",
host: "kegncork.com",
ua: defaultUa,
parseProducts: parseProductsKegNCork,
makePageUrl: makePageUrlKegNCork,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://kegncork.com/whisky/?page=1",
discoveryStartPage: 5,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kegncork.com/rum/?page=1",
discoveryStartPage: 1,
},
],
};
}
module.exports = { createStore };

View file

@ -6,184 +6,186 @@ const { normalizeCspc } = require("../utils/sku");
const { normalizeBaseUrl } = require("../utils/url");
function makePageUrlKWM(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) {
u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString();
}
u.searchParams.set("page", String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) {
u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString();
}
u.searchParams.set("page", String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
function extractDivBlocksByExactClass(html, className, maxBlocks) {
const out = [];
const s = String(html || "");
const out = [];
const s = String(html || "");
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
let m;
while ((m = re.exec(s))) {
if (out.length >= maxBlocks) break;
let m;
while ((m = re.exec(s))) {
if (out.length >= maxBlocks) break;
const startTagEnd = m.index + m[0].length;
let i = startTagEnd;
let depth = 1;
const startTagEnd = m.index + m[0].length;
let i = startTagEnd;
let depth = 1;
while (i < s.length) {
const nextOpen = s.indexOf("<div", i);
const nextClose = s.indexOf("</div>", i);
if (nextClose === -1) break;
while (i < s.length) {
const nextOpen = s.indexOf("<div", i);
const nextClose = s.indexOf("</div>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) {
out.push(s.slice(m.index, nextClose + 6));
re.lastIndex = nextClose + 6;
break;
}
i = nextClose + 6;
}
}
return out;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) {
out.push(s.slice(m.index, nextClose + 6));
re.lastIndex = nextClose + 6;
break;
}
i = nextClose + 6;
}
}
return out;
}
function kwmExtractProductLinkHref(block) {
let m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
let m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
if (m && m[1]) return m[1].trim();
if (m && m[1]) return m[1].trim();
m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
return m && m[1] ? m[1].trim() : "";
return m && m[1] ? m[1].trim() : "";
}
function kwmExtractName(block) {
const dataItem = extractHtmlAttr(block, "data-item");
if (dataItem) return sanitizeName(dataItem);
const dataItem = extractHtmlAttr(block, "data-item");
if (dataItem) return sanitizeName(dataItem);
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
if (m && m[1]) return sanitizeName(stripTags(m[1]));
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
if (m && m[1]) return sanitizeName(stripTags(m[1]));
return "";
return "";
}
function kwmExtractFirstDivByClass(html, className) {
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<div", i);
const nextClose = html.indexOf("</div>", i);
if (nextClose === -1) break;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<div", i);
const nextClose = html.indexOf("</div>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 6;
}
return "";
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 6;
}
return "";
}
function kwmExtractPrice(block) {
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
if (m && m[1]) {
const raw = String(m[1]).trim();
const n = raw.replace(/[^0-9.]/g, "");
if (n) return `$${Number(n).toFixed(2)}`;
}
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
if (m && m[1]) {
const raw = String(m[1]).trim();
const n = raw.replace(/[^0-9.]/g, "");
if (n) return `$${Number(n).toFixed(2)}`;
}
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
if (!priceDiv) return "";
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
if (!priceDiv) return "";
const cleaned = String(priceDiv).replace(/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
const cleaned = String(priceDiv).replace(
/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi,
" ",
);
const txt = cleanText(decodeHtml(stripTags(cleaned)));
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
const txt = cleanText(decodeHtml(stripTags(cleaned)));
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
return "";
return "";
}
function parseProductsKWM(html, ctx) {
const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
const items = [];
for (const block of blocks) {
if (/OUT OF STOCK/i.test(block)) continue;
const items = [];
for (const block of blocks) {
if (/OUT OF STOCK/i.test(block)) continue;
const href = kwmExtractProductLinkHref(block);
if (!href) continue;
const href = kwmExtractProductLinkHref(block);
if (!href) continue;
let url;
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
let url;
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
const name = kwmExtractName(block);
if (!name) continue;
const name = kwmExtractName(block);
if (!name) continue;
const price = kwmExtractPrice(block);
const sku = normalizeCspc(url);
const price = kwmExtractPrice(block);
const sku = normalizeCspc(url);
const img = extractFirstImgUrl(block, base);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img });
}
items.push({ name, price, url, sku, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "kwm",
name: "Kensington Wine Market",
host: "kensingtonwinemarket.com",
ua: defaultUa,
parseProducts: parseProductsKWM,
makePageUrl: makePageUrlKWM,
categories: [
{
key: "scotch",
label: "Scotch",
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
discoveryStartPage: 200,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
discoveryStartPage: 20,
},
],
};
return {
key: "kwm",
name: "Kensington Wine Market",
host: "kensingtonwinemarket.com",
ua: defaultUa,
parseProducts: parseProductsKWM,
makePageUrl: makePageUrlKWM,
categories: [
{
key: "scotch",
label: "Scotch",
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
discoveryStartPage: 200,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
discoveryStartPage: 20,
},
],
};
}
module.exports = { createStore };

View file

@ -9,57 +9,57 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function normalizeLegacySku(rawSku, { storeLabel, url }) {
const raw = String(rawSku ?? "").trim();
if (!raw) return "";
const raw = String(rawSku ?? "").trim();
if (!raw) return "";
const cspc = normalizeCspc(raw);
if (cspc) return cspc;
const cspc = normalizeCspc(raw);
if (cspc) return cspc;
const m = raw.match(/\b(\d{1,11})\b/);
if (m && m[1]) return `id:${m[1]}`;
const m = raw.match(/\b(\d{1,11})\b/);
if (m && m[1]) return `id:${m[1]}`;
return normalizeSkuKey(raw, { storeLabel, url });
return normalizeSkuKey(raw, { storeLabel, url });
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function cad(n) {
const x = Number(n);
if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`;
const x = Number(n);
if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`;
}
function normalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.legacyliquorstore.com/").toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.legacyliquorstore.com/").toString();
} catch {
return s;
}
}
const LEGACY_GQL_URL = "https://production-storefront-api-hagnfhf3sq-uc.a.run.app/graphql";
@ -140,184 +140,201 @@ query(
`;
function pickInStockVariant(p) {
const vars = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vars) {
const q = Number(v?.quantity);
if (Number.isFinite(q) && q > 0) return v;
}
return null;
const vars = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vars) {
const q = Number(v?.quantity);
if (Number.isFinite(q) && q > 0) return v;
}
return null;
}
function legacyProductToItem(p, ctx) {
const v = pickInStockVariant(p);
if (!v) return null;
const v = pickInStockVariant(p);
if (!v) return null;
const slug = String(p?.slug || "").trim();
if (!slug) return null;
const slug = String(p?.slug || "").trim();
if (!slug) return null;
const base = "https://www.legacyliquorstore.com";
// Matches observed pattern: /LL/product/spirits/<category>/<slug>
const url = new URL(`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`, base).toString();
const base = "https://www.legacyliquorstore.com";
// Matches observed pattern: /LL/product/spirits/<category>/<slug>
const url = new URL(
`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`,
base,
).toString();
const nameRaw =
String(v?.fullName || "").trim() ||
[String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | ");
const name = String(nameRaw || "").trim();
if (!name) return null;
const nameRaw =
String(v?.fullName || "").trim() ||
[String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | ");
const name = String(nameRaw || "").trim();
if (!name) return null;
const price =
cad(v?.price) ||
cad(p?.priceFrom) ||
cad(p?.priceTo) ||
"";
const price = cad(v?.price) || cad(p?.priceFrom) || cad(p?.priceTo) || "";
const sku = normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) || normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||"";
const img = normalizeAbsUrl(v?.image || "");
const sku =
normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) ||
normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||
"";
const img = normalizeAbsUrl(v?.image || "");
return { name, price, url, sku, img };
return { name, price, url, sku, img };
}
async function legacyFetchPage(ctx, pageCursor, pageLimit) {
const body = {
query: PRODUCTS_QUERY,
variables: {
allTags: ctx.cat.allTags || null,
anyTags: null,
collectionSlug: null,
countries: null,
isBestSeller: null,
isNewArrival: null,
isFeatured: null,
isFeaturedOnHomepage: null,
isOnSale: null,
isStaffPick: null,
pageCursor: pageCursor || null,
pageLimit: pageLimit,
pointsMin: null,
priceMin: null,
priceMax: null,
quantityMin: null,
regions: null,
brandValue: null,
searchValue: null,
sortOrder: "asc",
sortBy: "name",
storeId: "LL",
},
};
const body = {
query: PRODUCTS_QUERY,
variables: {
allTags: ctx.cat.allTags || null,
anyTags: null,
collectionSlug: null,
countries: null,
isBestSeller: null,
isNewArrival: null,
isFeatured: null,
isFeaturedOnHomepage: null,
isOnSale: null,
isStaffPick: null,
pageCursor: pageCursor || null,
pageLimit: pageLimit,
pointsMin: null,
priceMin: null,
priceMax: null,
quantityMin: null,
regions: null,
brandValue: null,
searchValue: null,
sortOrder: "asc",
sortBy: "name",
storeId: "LL",
},
};
return await ctx.http.fetchJsonWithRetry(LEGACY_GQL_URL, `legacy:${ctx.cat.key}:${pageCursor || "first"}`, ctx.store.ua, {
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: "https://www.legacyliquorstore.com",
Referer: "https://www.legacyliquorstore.com/",
},
body: JSON.stringify(body),
});
return await ctx.http.fetchJsonWithRetry(
LEGACY_GQL_URL,
`legacy:${ctx.cat.key}:${pageCursor || "first"}`,
ctx.store.ua,
{
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: "https://www.legacyliquorstore.com",
Referer: "https://www.legacyliquorstore.com/",
},
body: JSON.stringify(body),
},
);
}
async function scanCategoryLegacyLiquor(ctx, prevDb, report) {
const t0 = Date.now();
const pageLimit = 100;
const t0 = Date.now();
const pageLimit = 100;
const discovered = new Map();
const discovered = new Map();
let cursor = null;
let page = 0;
let done = 0;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
let cursor = null;
let page = 0;
let done = 0;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
while (page < maxPagesCap) {
page++;
while (page < maxPagesCap) {
page++;
let r;
try {
r = await legacyFetchPage(ctx, cursor, pageLimit);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`);
break;
}
let r;
try {
r = await legacyFetchPage(ctx, cursor, pageLimit);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`);
break;
}
const items = r?.json?.data?.products?.items;
const next = r?.json?.data?.products?.nextPageCursor;
const items = r?.json?.data?.products?.items;
const next = r?.json?.data?.products?.nextPageCursor;
const arr = Array.isArray(items) ? items : [];
let kept = 0;
const arr = Array.isArray(items) ? items : [];
let kept = 0;
for (const p of arr) {
const it = legacyProductToItem(p, ctx);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
for (const p of arr) {
const it = legacyProductToItem(p, ctx);
if (!it) continue;
discovered.set(it.url, it);
kept++;
}
done++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft(
kept,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
done++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft(
kept,
3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
if (!next || !arr.length) break;
if (next === cursor) break; // safety
cursor = next;
}
if (!next || !arr.length) break;
if (next === cursor) break; // safety
cursor = next;
}
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(1, page),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(1, page),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "legacyliquor",
name: "Legacy Liquor",
host: "www.legacyliquorstore.com",
ua: defaultUa,
scanCategory: scanCategoryLegacyLiquor,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky",
allTags: ["spirits", "whisky"],
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum",
allTags: ["spirits", "rum"],
},
],
};
return {
key: "legacyliquor",
name: "Legacy Liquor",
host: "www.legacyliquorstore.com",
ua: defaultUa,
scanCategory: scanCategoryLegacyLiquor,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky",
allTags: ["spirits", "whisky"],
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum",
allTags: ["spirits", "rum"],
},
],
};
}
module.exports = { createStore };

View file

@ -5,103 +5,104 @@ const { normalizeCspc } = require("../utils/sku");
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
function allowMaltsExcludeGinTequilaMezcal(item) {
if (item && item.inStock === false) return false;
if (item && item.inStock === false) return false;
const cats = Array.isArray(item?.cats) ? item.cats : [];
const has = (re) => cats.some((c) => re.test(String(c || "")));
const cats = Array.isArray(item?.cats) ? item.cats : [];
const has = (re) => cats.some((c) => re.test(String(c || "")));
if (has(/\bgin\b/i)) return false;
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
if (has(/\bgin\b/i)) return false;
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
return true;
return true;
}
function parseProductsMaltsAndGrains(html, ctx) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
for (const block of blocks) {
const classAttr = extractHtmlAttr(block, "class");
for (const block of blocks) {
const classAttr = extractHtmlAttr(block, "class");
const isOut =
/\boutofstock\b/i.test(classAttr) ||
/ast-shop-product-out-of-stock/i.test(block) ||
/>\s*out of stock\s*</i.test(block);
if (isOut) continue;
const isOut =
/\boutofstock\b/i.test(classAttr) ||
/ast-shop-product-out-of-stock/i.test(block) ||
/>\s*out of stock\s*</i.test(block);
if (isOut) continue;
const cats = [];
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
const v = String(m[1] || "").trim().toLowerCase();
if (v) cats.push(v);
}
const cats = [];
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
const v = String(m[1] || "")
.trim()
.toLowerCase();
if (v) cats.push(v);
}
let href =
block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i
)?.[1] ||
block.match(
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i
)?.[2] ||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
let href =
block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i,
)?.[1] ||
block.match(
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i,
)?.[2] ||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
if (!href) continue;
if (!href) continue;
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
if (!/^https?:\/\//i.test(url)) continue;
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
if (!/^https?:\/\//i.test(url)) continue;
const mTitle = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
);
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
if (!name) continue;
const mTitle = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
);
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
if (!name) continue;
const price = extractPriceFromTmbBlock(block);
const price = extractPriceFromTmbBlock(block);
const sku = normalizeCspc(
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
""
);
const sku = normalizeCspc(
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
"",
);
const img = extractFirstImgUrl(block, base);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img, cats, inStock: true });
}
items.push({ name, price, url, sku, img, cats, inStock: true });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "maltsandgrains",
name: "Malts & Grains",
host: "maltsandgrains.store",
ua: defaultUa,
parseProducts: parseProductsMaltsAndGrains,
categories: [
{
key: "all-minus-gin-tequila-mezcal",
label: "All Spirits",
startUrl: "https://maltsandgrains.store/shop/page/1/",
discoveryStartPage: 15,
allowUrl: allowMaltsExcludeGinTequilaMezcal,
},
],
};
return {
key: "maltsandgrains",
name: "Malts & Grains",
host: "maltsandgrains.store",
ua: defaultUa,
parseProducts: parseProductsMaltsAndGrains,
categories: [
{
key: "all-minus-gin-tequila-mezcal",
label: "All Spirits",
startUrl: "https://maltsandgrains.store/shop/page/1/",
discoveryStartPage: 15,
allowUrl: allowMaltsExcludeGinTequilaMezcal,
},
],
};
}
module.exports = { createStore };

View file

@ -10,226 +10,222 @@ const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { addCategoryResultToReport } = require("../tracker/report");
function allowSierraUrlRumWhisky(item) {
const u = (item && item.url) ? String(item.url) : "";
const s = u.toLowerCase();
if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false;
return /\b(rum|whisk(?:e)?y)\b/.test(s);
const u = item && item.url ? String(item.url) : "";
const s = u.toLowerCase();
if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false;
return /\b(rum|whisk(?:e)?y)\b/.test(s);
}
// Keep old name referenced historically in this store config
const allowSierraSpiritsLiquorUrlRumWhisky = allowSierraUrlRumWhisky;
function formatWooStorePrice(prices) {
if (!prices) return null;
if (!prices) return null;
const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2;
const raw = prices.price ?? prices.regular_price ?? prices.sale_price;
if (raw == null) return null;
const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2;
const raw = prices.price ?? prices.regular_price ?? prices.sale_price;
if (raw == null) return null;
const n = Number(String(raw).replace(/[^\d]/g, ""));
if (!Number.isFinite(n)) return null;
const n = Number(String(raw).replace(/[^\d]/g, ""));
if (!Number.isFinite(n)) return null;
const value = (n / Math.pow(10, minor)).toFixed(minor);
const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$";
const suffix = prices.currency_suffix ?? "";
return `${prefix}${value}${suffix}`;
const value = (n / Math.pow(10, minor)).toFixed(minor);
const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$";
const suffix = prices.currency_suffix ?? "";
return `${prefix}${value}${suffix}`;
}
function parseWooStoreProductsJson(payload, ctx) {
const items = [];
const items = [];
let data = null;
try {
data = JSON.parse(payload);
} catch (_) {
return items;
}
let data = null;
try {
data = JSON.parse(payload);
} catch (_) {
return items;
}
if (!Array.isArray(data)) return items;
if (!Array.isArray(data)) return items;
for (const p of data) {
const url = (p && p.permalink) ? String(p.permalink) : "";
if (!url) continue;
for (const p of data) {
const url = p && p.permalink ? String(p.permalink) : "";
if (!url) continue;
const name = (p && p.name) ? cleanText(decodeHtml(String(p.name))) : "";
if (!name) continue;
const name = p && p.name ? cleanText(decodeHtml(String(p.name))) : "";
if (!name) continue;
const price = formatWooStorePrice(p.prices);
const price = formatWooStorePrice(p.prices);
const rawSku =
(typeof p?.sku === "string" && p.sku.trim()) ? p.sku.trim()
: (p && (p.id ?? p.id === 0)) ? String(p.id)
: "";
const rawSku =
typeof p?.sku === "string" && p.sku.trim() ? p.sku.trim() : p && (p.id ?? p.id === 0) ? String(p.id) : "";
const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku;
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku;
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img =
(p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src)
? String(p.images[0].src)
: null;
const img =
p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src ? String(p.images[0].src) : null;
const item = { name, price, url, sku, img };
const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item);
}
items.push(item);
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function parseWooProductsHtml(html, ctx) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
const parts = s.split(/<li\b/i);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
const parts = s.split(/<li\b/i);
for (let i = 1; i < parts.length; i++) {
const chunk = "<li" + parts[i];
for (let i = 1; i < parts.length; i++) {
const chunk = "<li" + parts[i];
if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue;
if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue;
if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue;
if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue;
const endIdx = chunk.search(/<\/li>/i);
const block = (endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk);
const endIdx = chunk.search(/<\/li>/i);
const block = endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk;
const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map(m => m[1]);
const href = hrefs.find(h => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || "";
if (!href) continue;
const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map((m) => m[1]);
const href = hrefs.find((h) => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || "";
if (!href) continue;
const url = new URL(decodeHtml(href), base).toString();
const url = new URL(decodeHtml(href), base).toString();
const nameHtml =
block.match(/<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i)?.[1] ||
block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] ||
"";
const name = cleanText(decodeHtml(nameHtml));
if (!name) continue;
const nameHtml =
block.match(
/<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
)?.[1] ||
block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] ||
"";
const name = cleanText(decodeHtml(nameHtml));
if (!name) continue;
const price = extractPriceFromTmbBlock(block);
const price = extractPriceFromTmbBlock(block);
const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] ||
"";
const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] ||
"";
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
? `id:${String(rawSku).trim()}`
: String(rawSku || "").trim();
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
? `id:${String(rawSku).trim()}`
: String(rawSku || "").trim();
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base);
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base);
const item = { name, price, url, sku, img };
const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item);
}
items.push(item);
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function parseProductsSierra(body, ctx) {
const s = String(body || "");
const t = s.trimStart();
const s = String(body || "");
const t = s.trimStart();
if (t.startsWith("[") || t.startsWith("{")) {
const jsonItems = parseWooStoreProductsJson(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`);
return jsonItems;
}
if (t.startsWith("[") || t.startsWith("{")) {
const jsonItems = parseWooStoreProductsJson(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`);
return jsonItems;
}
const blocks = s.split(/<div class="tmb\b/i);
ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
const blocks = s.split(/<div class="tmb\b/i);
ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
if (blocks.length > 1) {
const items = [];
for (let i = 1; i < blocks.length; i++) {
const block = "<div class=\"tmb" + blocks[i];
if (blocks.length > 1) {
const items = [];
for (let i = 1; i < blocks.length; i++) {
const block = '<div class="tmb' + blocks[i];
const titleMatch = block.match(
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i
);
if (!titleMatch) continue;
const titleMatch = block.match(
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i,
);
if (!titleMatch) continue;
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
const name = cleanText(decodeHtml(titleMatch[2]));
if (!name) continue;
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
const name = cleanText(decodeHtml(titleMatch[2]));
if (!name) continue;
const price = extractPriceFromTmbBlock(block);
const price = extractPriceFromTmbBlock(block);
const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
"";
const rawSku =
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
"";
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
? `id:${String(rawSku).trim()}`
: rawSku;
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim()) ? `id:${String(rawSku).trim()}` : rawSku;
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base);
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
const img = extractFirstImgUrl(block, base);
const item = { name, price, url, sku, img };
const item = { name, price, url, sku, img };
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
const allowUrl = ctx?.cat?.allowUrl;
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
items.push(item);
}
items.push(item);
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
const woo = parseWooProductsHtml(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`);
return woo;
const woo = parseWooProductsHtml(s, ctx);
ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`);
return woo;
}
function extractProductCatTermId(html) {
const s = String(html || "");
// Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..."
const m =
s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) ||
s.match(/\bterm-(\d{1,10})\b/i);
if (!m) return null;
const n = Number(m[1]);
return Number.isFinite(n) ? n : null;
const s = String(html || "");
// Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..."
const m = s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) || s.match(/\bterm-(\d{1,10})\b/i);
if (!m) return null;
const n = Number(m[1]);
return Number.isFinite(n) ? n : null;
}
async function getWooCategoryIdForCat(ctx) {
// allow manual override if you ever want it
if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId;
// allow manual override if you ever want it
if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId;
// cache per category object
if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId;
// cache per category object
if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId;
// infer from the HTML category page so startUrl stays stable (DB filenames stay stable)
const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua);
const id = extractProductCatTermId(text);
// infer from the HTML category page so startUrl stays stable (DB filenames stay stable)
const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua);
const id = extractProductCatTermId(text);
if (!id) {
ctx.logger.warn(`${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`);
ctx.cat._wooCategoryId = null;
return null;
}
if (!id) {
ctx.logger.warn(
`${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`,
);
ctx.cat._wooCategoryId = null;
return null;
}
ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`);
ctx.cat._wooCategoryId = id;
return id;
ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`);
ctx.cat._wooCategoryId = id;
return id;
}
/**
@ -237,163 +233,157 @@ async function getWooCategoryIdForCat(ctx) {
* while keeping original startUrl (so DB hashes and "source" stay unchanged).
*/
async function scanCategoryWooStoreApi(ctx, prevDb, report) {
const { logger } = ctx;
const t0 = Date.now();
const { logger } = ctx;
const t0 = Date.now();
const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100;
const discovered = new Map();
const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100;
const discovered = new Map();
const catId = await getWooCategoryIdForCat(ctx);
if (!catId) return;
const catId = await getWooCategoryIdForCat(ctx);
if (!catId) return;
const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`);
apiBase.searchParams.set("per_page", String(perPage));
apiBase.searchParams.set("category", String(catId));
const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`);
apiBase.searchParams.set("per_page", String(perPage));
apiBase.searchParams.set("category", String(catId));
const hardCap = 500;
let page = 1;
const hardCap = 500;
let page = 1;
while (page <= hardCap) {
apiBase.searchParams.set("page", String(page));
const pageUrl = apiBase.toString();
while (page <= hardCap) {
apiBase.searchParams.set("page", String(page));
const pageUrl = apiBase.toString();
const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry(
pageUrl,
`page:${ctx.store.key}:${ctx.cat.key}:${page}`,
ctx.store.ua
);
const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry(
pageUrl,
`page:${ctx.store.key}:${ctx.cat.key}:${page}`,
ctx.store.ua,
);
// IMPORTANT:
// Parse WITHOUT allowUrl so pagination is based on real API page size
const ctxNoFilter =
typeof ctx?.cat?.allowUrl === "function"
? { ...ctx, cat: { ...ctx.cat, allowUrl: null } }
: ctx;
// IMPORTANT:
// Parse WITHOUT allowUrl so pagination is based on real API page size
const ctxNoFilter =
typeof ctx?.cat?.allowUrl === "function" ? { ...ctx, cat: { ...ctx.cat, allowUrl: null } } : ctx;
const itemsAll =
(ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
const itemsAll = (ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
const rawCount = itemsAll.length;
const rawCount = itemsAll.length;
// Now apply allowUrl AFTER pagination logic
const items = [];
const allow = ctx?.cat?.allowUrl;
for (const it of itemsAll) {
if (typeof allow === "function" && !allow(it)) continue;
items.push(it);
}
// Now apply allowUrl AFTER pagination logic
const items = [];
const allow = ctx?.cat?.allowUrl;
for (const it of itemsAll) {
if (typeof allow === "function" && !allow(it)) continue;
items.push(it);
}
logger.ok(
`${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s`
);
logger.ok(
`${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s`,
);
// Stop only when the API page itself is empty
if (!rawCount) break;
// Stop only when the API page itself is empty
if (!rawCount) break;
for (const it of items) discovered.set(it.url, it);
for (const it of items) discovered.set(it.url, it);
// Last page if API returned fewer than perPage
if (rawCount < perPage) break;
// Last page if API returned fewer than perPage
if (rawCount < perPage) break;
page++;
}
page++;
}
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const {
merged,
newItems,
updatedItems,
removedItems,
restoredItems,
metaChangedItems,
} = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsedMs = Date.now() - t0;
const elapsedMs = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(0, page),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs,
});
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: Math.max(0, page),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems
);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
const ua = defaultUa;
const ua = defaultUa;
return {
key: "sierrasprings",
name: "Sierra Springs",
host: "sierraspringsliquor.ca",
ua,
parseProducts: parseProductsSierra,
return {
key: "sierrasprings",
name: "Sierra Springs",
host: "sierraspringsliquor.ca",
ua,
parseProducts: parseProductsSierra,
// store-only override (no changes outside this file)
scanCategory: scanCategoryWooStoreApi,
// store-only override (no changes outside this file)
scanCategory: scanCategoryWooStoreApi,
// RESTORED: original 4 categories, unchanged startUrl so DB hashes match
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
discoveryStartPage: 1,
perPage: 100,
},
{
key: "fine-rare",
label: "Fine & Rare",
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
discoveryStartPage: 1,
perPage: 100,
},
{
key: "spirits-liquor",
label: "Spirits / Liquor",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/",
discoveryStartPage: 1,
perPage: 100,
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
},
{
key: "spirits",
label: "Spirits",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
discoveryStartPage: 1,
perPage: 100,
},
],
};
// RESTORED: original 4 categories, unchanged startUrl so DB hashes match
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
discoveryStartPage: 1,
perPage: 100,
},
{
key: "fine-rare",
label: "Fine & Rare",
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
discoveryStartPage: 1,
perPage: 100,
},
{
key: "spirits-liquor",
label: "Spirits / Liquor",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/",
discoveryStartPage: 1,
perPage: 100,
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
},
{
key: "spirits",
label: "Spirits",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
discoveryStartPage: 1,
perPage: 100,
},
],
};
}
module.exports = { createStore, parseProductsSierra };

View file

@ -10,502 +10,508 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function extractArticles(html) {
const s = String(html || "");
const parts = s.split(/<article\b/i);
if (parts.length <= 1) return [];
const out = [];
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
return out;
const s = String(html || "");
const parts = s.split(/<article\b/i);
if (parts.length <= 1) return [];
const out = [];
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
return out;
}
function normalizePrice(str) {
const s = String(str || "");
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
if (!m) return "";
const raw = m[0].replace(/\s+/g, "");
return raw.replace(/,/g, "");
const s = String(str || "");
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
if (!m) return "";
const raw = m[0].replace(/\s+/g, "");
return raw.replace(/,/g, "");
}
function pickPriceFromArticle(articleHtml) {
const a = String(articleHtml || "");
const noMember = a.replace(
/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi,
" "
);
const a = String(articleHtml || "");
const noMember = a.replace(/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi, " ");
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
if (ins && ins[1]) return normalizePrice(ins[1]);
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
if (ins && ins[1]) return normalizePrice(ins[1]);
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
if (reg && reg[1]) return normalizePrice(reg[1]);
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
if (reg && reg[1]) return normalizePrice(reg[1]);
const priceDiv = noMember.match(
/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i
);
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
const priceDiv = noMember.match(/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i);
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
return normalizePrice(scope);
return normalizePrice(scope);
}
function extractProductIdFromArticle(articleHtml) {
const a = String(articleHtml || "");
const a = String(articleHtml || "");
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bpost-(\d{1,10})\b/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bpost-(\d{1,10})\b/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
return 0;
return 0;
}
function extractSkuFromArticle(articleHtml) {
const a = String(articleHtml || "");
const a = String(articleHtml || "");
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
if (m && m[1]) return m[1];
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
if (m && m[1]) return m[1];
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
if (m && m[1]) return m[1];
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
if (m && m[1]) return m[1];
return "";
return "";
}
function idFromImageUrl(imgUrl) {
const s = String(imgUrl || "");
// /1487-1_... or /1487_... or /1487-... => 1487
const m = s.match(/\/(\d{1,11})(?=[-_])/);
return m && m[1] ? `id:${m[1]}` : "";
const s = String(imgUrl || "");
// /1487-1_... or /1487_... or /1487-... => 1487
const m = s.match(/\/(\d{1,11})(?=[-_])/);
return m && m[1] ? `id:${m[1]}` : "";
}
function looksInStock(articleHtml) {
const a = String(articleHtml || "");
const a = String(articleHtml || "");
if (/\boutofstock\b/i.test(a)) return false;
if (/Currently\s+Unavailable/i.test(a)) return false;
if (/\boutofstock\b/i.test(a)) return false;
if (/Currently\s+Unavailable/i.test(a)) return false;
if (/\binstock\b/i.test(a)) return true;
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
if (/\binstock\b/i.test(a)) return true;
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
return /\binstock\b/i.test(a);
return /\binstock\b/i.test(a);
}
function parseProductFromArticle(articleHtml) {
const a = String(articleHtml || "");
const a = String(articleHtml || "");
if (!looksInStock(a)) return null;
if (!looksInStock(a)) return null;
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
if (!hrefM || !hrefM[1]) return null;
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
if (!hrefM || !hrefM[1]) return null;
let url;
try {
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
} catch {
return null;
}
let url;
try {
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
} catch {
return null;
}
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
const name = cleanText([title, sub].filter(Boolean).join(" - "));
if (!name) return null;
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
const name = cleanText([title, sub].filter(Boolean).join(" - "));
if (!name) return null;
const price = pickPriceFromArticle(a);
const productId = extractProductIdFromArticle(a);
const price = pickPriceFromArticle(a);
const productId = extractProductIdFromArticle(a);
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
const skuFromHtml = extractSkuFromArticle(a);
const skuFromImg = idFromImageUrl(img);
const fallbackSku = normalizeCspc(url) || "";
return {
name,
price,
url,
sku: skuFromHtml || skuFromImg || fallbackSku,
productId,
img,
};
const skuFromHtml = extractSkuFromArticle(a);
const skuFromImg = idFromImageUrl(img);
const fallbackSku = normalizeCspc(url) || "";
return {
name,
price,
url,
sku: skuFromHtml || skuFromImg || fallbackSku,
productId,
img,
};
}
/* ---------------- Store API paging ---------------- */
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
const u = new URL(startUrl);
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
const u = new URL(startUrl);
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
api.searchParams.set("order", "desc");
api.searchParams.set("orderby", "date");
api.searchParams.set("order", "desc");
api.searchParams.set("orderby", "date");
const stock = u.searchParams.get("_sfm__stock_status");
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
const stock = u.searchParams.get("_sfm__stock_status");
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
const pr = u.searchParams.get("_sfm__regular_price");
if (pr) {
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
if (m) {
api.searchParams.set("min_price", m[1]);
api.searchParams.set("max_price", m[2]);
}
}
const pr = u.searchParams.get("_sfm__regular_price");
if (pr) {
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
if (m) {
api.searchParams.set("min_price", m[1]);
api.searchParams.set("max_price", m[2]);
}
}
return api;
return api;
}
function hasCategorySlug(p, wanted) {
const w = String(wanted || "").trim().toLowerCase();
if (!w) return true;
const w = String(wanted || "")
.trim()
.toLowerCase();
if (!w) return true;
const cats = Array.isArray(p?.categories) ? p.categories : [];
for (const c of cats) {
const slug = String(c?.slug || "").trim().toLowerCase();
if (slug === w) return true;
}
return false;
const cats = Array.isArray(p?.categories) ? p.categories : [];
for (const c of cats) {
const slug = String(c?.slug || "")
.trim()
.toLowerCase();
if (slug === w) return true;
}
return false;
}
function normalizeProductUrl(p) {
const u = String(p?.permalink || p?.link || "").trim();
return u && u.startsWith("http") ? u : "";
const u = String(p?.permalink || p?.link || "").trim();
return u && u.startsWith("http") ? u : "";
}
function normalizeProductName(p) {
// Store API "name" can contain HTML entities like &#8211; and sometimes markup like <em>
const raw = String(p?.name || "");
return cleanText(decodeHtml(stripTags(raw)));
// Store API "name" can contain HTML entities like &#8211; and sometimes markup like <em>
const raw = String(p?.name || "");
return cleanText(decodeHtml(stripTags(raw)));
}
function normalizeProductImage(p) {
const imgs = Array.isArray(p?.images) ? p.images : [];
for (const im of imgs) {
if (!im) continue;
const raw =
(typeof im === "string" ? im : "") ||
(typeof im?.src === "string" ? im.src : "") ||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
(typeof im?.url === "string" ? im.url : "");
const s = String(raw || "").trim();
if (!s) continue;
if (s.startsWith("//")) return `https:${s}`;
return s;
}
const imgs = Array.isArray(p?.images) ? p.images : [];
for (const im of imgs) {
if (!im) continue;
const raw =
(typeof im === "string" ? im : "") ||
(typeof im?.src === "string" ? im.src : "") ||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
(typeof im?.url === "string" ? im.url : "");
const s = String(raw || "").trim();
if (!s) continue;
if (s.startsWith("//")) return `https:${s}`;
return s;
}
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
if (!direct) return "";
return direct.startsWith("//") ? `https:${direct}` : direct;
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
if (!direct) return "";
return direct.startsWith("//") ? `https:${direct}` : direct;
}
function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
const mu = Number(minorUnit);
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
const v = String(valueStr || "").trim();
if (!/^\d+$/.test(v)) return "";
const mu = Number(minorUnit);
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
const v = String(valueStr || "").trim();
if (!/^\d+$/.test(v)) return "";
// Use integer math to avoid float rounding issues
const pad = "0".repeat(mu);
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
const frac = mu === 0 ? "" : s.slice(s.length - mu);
return mu === 0 ? whole : `${whole}.${frac}`;
// Use integer math to avoid float rounding issues
const pad = "0".repeat(mu);
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
const frac = mu === 0 ? "" : s.slice(s.length - mu);
return mu === 0 ? whole : `${whole}.${frac}`;
}
function normalizeProductPrice(p) {
const prices = p?.prices;
const prices = p?.prices;
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
if (prices && typeof prices === "object") {
const minor = prices.currency_minor_unit;
const sale = String(prices.sale_price || "").trim();
const regular = String(prices.regular_price || "").trim();
const chosen = sale || regular;
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
if (prices && typeof prices === "object") {
const minor = prices.currency_minor_unit;
const sale = String(prices.sale_price || "").trim();
const regular = String(prices.regular_price || "").trim();
const chosen = sale || regular;
if (chosen) {
let numeric = chosen;
if (chosen) {
let numeric = chosen;
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
const converted = toMoneyStringFromMinorUnits(chosen, minor);
if (converted) numeric = converted;
}
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
const converted = toMoneyStringFromMinorUnits(chosen, minor);
if (converted) numeric = converted;
}
const num = Number(numeric);
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
}
}
const num = Number(numeric);
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
}
}
const raw = String(p?.price || p?.price_html || "").trim();
const norm = normalizePrice(raw);
return norm;
const raw = String(p?.price || p?.price_html || "").trim();
const norm = normalizePrice(raw);
return norm;
}
function normalizeProductSku(p) {
const sku = String(p?.sku || "").trim();
if (/^\d{6}$/.test(sku)) return sku;
return "";
const sku = String(p?.sku || "").trim();
if (/^\d{6}$/.test(sku)) return sku;
return "";
}
function normalizeProductId(p) {
const id = Number(p?.id);
return Number.isFinite(id) ? id : 0;
const id = Number(p?.id);
return Number.isFinite(id) ? id : 0;
}
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
const u = new URL(apiBaseUrl.toString());
u.searchParams.set("page", String(page));
u.searchParams.set("per_page", String(perPage));
const u = new URL(apiBaseUrl.toString());
u.searchParams.set("page", String(page));
u.searchParams.set("per_page", String(perPage));
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json",
Referer: ctx.cat.startUrl,
},
});
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json",
Referer: ctx.cat.startUrl,
},
});
}
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
if (prevSize <= 0 || discSize <= 0) return false;
if (prevSize <= 0 || discSize <= 0) return false;
const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false;
const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false;
ctx.logger.warn?.(
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
);
ctx.logger.warn?.(
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
);
if (prevDb && typeof prevDb.entries === "function") {
for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v);
}
return true;
}
if (prevDb && typeof prevDb.entries === "function") {
for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v);
}
return true;
}
return false;
return false;
}
async function scanCategoryStrath(ctx, prevDb, report) {
const t0 = Date.now();
const t0 = Date.now();
// Listing HTML (seed + sanity)
let html = "";
let listingFinalUrl = ctx.cat.startUrl;
let listingStatus = 0;
let listingBytes = 0;
let listingMs = 0;
// Listing HTML (seed + sanity)
let html = "";
let listingFinalUrl = ctx.cat.startUrl;
let listingStatus = 0;
let listingBytes = 0;
let listingMs = 0;
try {
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
html = r.text || "";
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
listingStatus = r.status || 0;
listingBytes = r.bytes || 0;
listingMs = r.ms || 0;
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
}
try {
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
html = r.text || "";
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
listingStatus = r.status || 0;
listingBytes = r.bytes || 0;
listingMs = r.ms || 0;
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
}
const discovered = new Map();
const discovered = new Map();
const listingArticles = extractArticles(html);
let listingItems = 0;
for (const art of listingArticles) {
const it = parseProductFromArticle(art);
if (it) {
discovered.set(it.url, it);
listingItems++;
}
}
const listingArticles = extractArticles(html);
let listingItems = 0;
for (const art of listingArticles) {
const it = parseProductFromArticle(art);
if (it) {
discovered.set(it.url, it);
listingItems++;
}
}
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
listingItems,
3
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`
);
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
listingItems,
3,
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`,
);
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
const perPage = 100;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const perPage = 100;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase();
const wantedSlug = String(ctx.cat.apiCategorySlug || "")
.trim()
.toLowerCase();
let donePages = 0;
let emptyMatchPages = 0;
let donePages = 0;
let emptyMatchPages = 0;
for (let page = 1; page <= maxPagesCap; page++) {
let r;
try {
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
} catch (e) {
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
break;
}
for (let page = 1; page <= maxPagesCap; page++) {
let r;
try {
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
} catch (e) {
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
break;
}
const arr = Array.isArray(r?.json) ? r.json : [];
donePages++;
const arr = Array.isArray(r?.json) ? r.json : [];
donePages++;
if (!arr.length) break;
if (!arr.length) break;
let kept = 0;
let kept = 0;
for (const p of arr) {
const stock = String(p?.stock_status || "").toLowerCase();
if (stock && stock !== "instock") continue;
for (const p of arr) {
const stock = String(p?.stock_status || "").toLowerCase();
if (stock && stock !== "instock") continue;
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
const url = normalizeProductUrl(p);
if (!url) continue;
const url = normalizeProductUrl(p);
if (!url) continue;
const name = normalizeProductName(p);
if (!name) continue;
const name = normalizeProductName(p);
if (!name) continue;
const price = normalizeProductPrice(p);
const sku = normalizeProductSku(p);
const productId = normalizeProductId(p);
const price = normalizeProductPrice(p);
const sku = normalizeProductSku(p);
const productId = normalizeProductId(p);
const prev = discovered.get(url) || null;
const prev = discovered.get(url) || null;
const apiImg = normalizeProductImage(p) || "";
const img = apiImg || (prev && prev.img) || "";
const apiImg = normalizeProductImage(p) || "";
const img = apiImg || (prev && prev.img) || "";
const skuFromApiImg = idFromImageUrl(apiImg);
const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || "";
const newSku = sku || fallbackSku;
const mergedSku = pickBetterSku(newSku, prev && prev.sku);
const skuFromApiImg = idFromImageUrl(apiImg);
const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || "";
discovered.set(url, {
name,
price,
url,
sku: mergedSku,
productId,
img,
});
kept++;
}
const newSku = sku || fallbackSku;
const mergedSku = pickBetterSku(newSku, prev && prev.sku);
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
kept,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
discovered.set(url, {
name,
price,
url,
sku: mergedSku,
productId,
img,
});
kept++;
}
if (wantedSlug) {
if (kept === 0) emptyMatchPages++;
else emptyMatchPages = 0;
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
kept,
3,
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
// If filter is tight (rum), stop after 2 empty pages in a row.
if (emptyMatchPages >= 2) break;
}
if (wantedSlug) {
if (kept === 0) emptyMatchPages++;
else emptyMatchPages = 0;
if (arr.length < perPage) break;
}
// If filter is tight (rum), stop after 2 empty pages in a row.
if (emptyMatchPages >= 2) break;
}
if (prevDb && typeof prevDb.size === "number") {
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
}
if (arr.length < perPage) break;
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
if (prevDb && typeof prevDb.size === "number") {
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
}
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1 + Math.max(0, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1 + Math.max(0, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "strath",
name: "Strath Liquor",
host: "www.strathliquor.com",
ua: defaultUa,
scanCategory: scanCategoryStrath,
categories: [
{
key: "whisky",
label: "Whisky",
apiCategorySlug: "whisky",
startUrl:
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
},
{
key: "spirits-rum",
label: "Spirits - Rum",
apiCategorySlug: "rum",
startUrl:
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
},
],
};
return {
key: "strath",
name: "Strath Liquor",
host: "www.strathliquor.com",
ua: defaultUa,
scanCategory: scanCategoryStrath,
categories: [
{
key: "whisky",
label: "Whisky",
apiCategorySlug: "whisky",
startUrl:
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
},
{
key: "spirits-rum",
label: "Spirits - Rum",
apiCategorySlug: "rum",
startUrl:
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
},
],
};
}
module.exports = { createStore };

View file

@ -19,192 +19,191 @@ const GQL_URL = "https://production-storefront-api-mlwv4nj3rq-uc.a.run.app/graph
/* ---------------- formatting ---------------- */
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
}
function pageStr(i, total) {
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
}
/* ---------------- helpers ---------------- */
function money(n) {
const x = Number(n);
return Number.isFinite(x) ? `$${x.toFixed(2)}` : "";
const x = Number(n);
return Number.isFinite(x) ? `$${x.toFixed(2)}` : "";
}
function firstNonEmptyStr(...vals) {
for (const v of vals) {
const s = typeof v === "string" ? v.trim() : "";
if (s) return s;
if (Array.isArray(v)) {
for (const a of v) {
if (typeof a === "string" && a.trim()) return a.trim();
if (a && typeof a === "object") {
const u = String(a.url || a.src || a.image || "").trim();
if (u) return u;
}
}
}
}
return "";
for (const v of vals) {
const s = typeof v === "string" ? v.trim() : "";
if (s) return s;
if (Array.isArray(v)) {
for (const a of v) {
if (typeof a === "string" && a.trim()) return a.trim();
if (a && typeof a === "object") {
const u = String(a.url || a.src || a.image || "").trim();
if (u) return u;
}
}
}
}
return "";
}
function normalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, `${BASE}/`).toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, `${BASE}/`).toString();
} catch {
return s;
}
}
// Treat u:* as synthetic (URL-hash fallback) and eligible for repair.
function isSyntheticSku(sku) {
const s = String(sku || "").trim();
return !s || /^u:/i.test(s);
const s = String(sku || "").trim();
return !s || /^u:/i.test(s);
}
// If SKU is <6 chars, namespace it (per your request) to reduce collisions.
// Also: DO NOT run numeric SKUs through normalizeCspc (some normalizers hash arbitrary strings).
function normalizeTudorSku(rawSku) {
const s = String(rawSku || "").trim();
if (!s) return "";
const s = String(rawSku || "").trim();
if (!s) return "";
if (/^id:/i.test(s)) return s;
if (/^u:/i.test(s)) return s;
if (/^id:/i.test(s)) return s;
if (/^u:/i.test(s)) return s;
// numeric SKU like 67433
if (/^\d+$/.test(s)) {
return s.length < 6 ? `id:${s}` : s;
}
// numeric SKU like 67433
if (/^\d+$/.test(s)) {
return s.length < 6 ? `id:${s}` : s;
}
// short alnum SKU -> namespace
if (s.length < 6) return `id:${s}`;
// short alnum SKU -> namespace
if (s.length < 6) return `id:${s}`;
// for other formats, keep your existing normalization
// (if normalizeCspc returns empty, fall back to the raw string)
return normalizeCspc(s) || s;
// for other formats, keep your existing normalization
// (if normalizeCspc returns empty, fall back to the raw string)
return normalizeCspc(s) || s;
}
function tudorProductUrl(ctx, slug) {
// Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug>
const root = ctx?.cat?.tudorRootSlug || "spirits";
const sub = ctx?.cat?.tudorSubSlug || "";
const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`;
return new URL(path, BASE).toString();
// Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug>
const root = ctx?.cat?.tudorRootSlug || "spirits";
const sub = ctx?.cat?.tudorSubSlug || "";
const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`;
return new URL(path, BASE).toString();
}
function parseVolumeMl(v) {
const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase();
const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase();
// Match "1.75L", "1L", "750ML", etc.
const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/);
if (!m) return null;
// Match "1.75L", "1L", "750ML", etc.
const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/);
if (!m) return null;
const n = Number(m[1]);
if (!Number.isFinite(n)) return null;
const n = Number(m[1]);
if (!Number.isFinite(n)) return null;
return m[2] === "L" ? Math.round(n * 1000) : Math.round(n);
return m[2] === "L" ? Math.round(n * 1000) : Math.round(n);
}
function tudorPickVariant(p) {
const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.filter((v) => Number(v?.quantity) > 0);
const pool = inStock.length ? inStock : vs;
if (!pool.length) return null;
if (pool.length === 1) return pool[0];
const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.filter((v) => Number(v?.quantity) > 0);
const pool = inStock.length ? inStock : vs;
if (!pool.length) return null;
if (pool.length === 1) return pool[0];
let best = pool[0];
let bestVol = parseVolumeMl(best);
let bestPrice = Number(best?.price);
let best = pool[0];
let bestVol = parseVolumeMl(best);
let bestPrice = Number(best?.price);
for (let i = 1; i < pool.length; i++) {
const v = pool[i];
const vol = parseVolumeMl(v);
const price = Number(v?.price);
for (let i = 1; i < pool.length; i++) {
const v = pool[i];
const vol = parseVolumeMl(v);
const price = Number(v?.price);
const volA = bestVol == null ? -1 : bestVol;
const volB = vol == null ? -1 : vol;
const volA = bestVol == null ? -1 : bestVol;
const volB = vol == null ? -1 : vol;
// 1) largest volume wins
if (volB > volA) {
best = v;
bestVol = vol;
bestPrice = price;
continue;
}
if (volB < volA) continue;
// 1) largest volume wins
if (volB > volA) {
best = v;
bestVol = vol;
bestPrice = price;
continue;
}
if (volB < volA) continue;
// 2) tie-break: higher price wins
const priceA = Number.isFinite(bestPrice) ? bestPrice : -1;
const priceB = Number.isFinite(price) ? price : -1;
if (priceB > priceA) {
best = v;
bestVol = vol;
bestPrice = price;
}
}
// 2) tie-break: higher price wins
const priceA = Number.isFinite(bestPrice) ? bestPrice : -1;
const priceB = Number.isFinite(price) ? price : -1;
if (priceB > priceA) {
best = v;
bestVol = vol;
bestPrice = price;
}
}
return best;
return best;
}
function parseDisplayPriceFromHtml(html) {
const s = String(html || "");
const s = String(html || "");
// Narrow to the main price container first (avoid grabbing retail-price)
const block =
s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) ||
s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i);
// Narrow to the main price container first (avoid grabbing retail-price)
const block =
s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) ||
s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i);
const hay = block ? block[1] : s;
const hay = block ? block[1] : s;
// Remove retail-price spans so we pick the live price first
const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
// Remove retail-price spans so we pick the live price first
const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/);
if (!m) return null;
const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/);
if (!m) return null;
const n = Number(m[1]);
return Number.isFinite(n) ? n : null;
const n = Number(m[1]);
return Number.isFinite(n) ? n : null;
}
function pickAnySkuFromProduct(p) {
const vs = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vs) {
const s = String(v?.sku || "").trim();
if (s) return s;
}
return "";
const vs = Array.isArray(p?.variants) ? p.variants : [];
for (const v of vs) {
const s = String(v?.sku || "").trim();
if (s) return s;
}
return "";
}
function pickInStockVariantWithFallback(p) {
const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.find((v) => Number(v?.quantity) > 0);
return inStock || vs[0] || null;
const vs = Array.isArray(p?.variants) ? p.variants : [];
const inStock = vs.find((v) => Number(v?.quantity) > 0);
return inStock || vs[0] || null;
}
/* ---------------- GraphQL ---------------- */
async function tudorGql(ctx, label, query, variables) {
return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, {
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: BASE,
Referer: `${BASE}/`,
},
body: JSON.stringify({ query, variables }),
});
return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, {
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: BASE,
Referer: `${BASE}/`,
},
body: JSON.stringify({ query, variables }),
});
}
/* ---------------- GQL queries ---------------- */
@ -300,63 +299,63 @@ const PRODUCTS_BY_SKU_QUERY = `
`;
async function fetchProductsPage(ctx, cursor) {
const vars = {
storeId: STORE_ID,
allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug],
anyTags: null,
pageCursor: cursor || null,
pageLimit: 100,
sortBy: "name",
sortOrder: "asc",
priceMin: null,
priceMax: null,
quantityMin: null,
};
const vars = {
storeId: STORE_ID,
allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug],
anyTags: null,
pageCursor: cursor || null,
pageLimit: 100,
sortBy: "name",
sortOrder: "asc",
priceMin: null,
priceMax: null,
quantityMin: null,
};
const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars);
const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars);
if (r?.status !== 200 || !r?.json?.data?.products) {
const errs = Array.isArray(r?.json?.errors) ? r.json.errors : [];
const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`;
throw new Error(`Tudor products query failed: ${msg}`);
}
if (r?.status !== 200 || !r?.json?.data?.products) {
const errs = Array.isArray(r?.json?.errors) ? r.json.errors : [];
const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`;
throw new Error(`Tudor products query failed: ${msg}`);
}
return r.json.data.products;
return r.json.data.products;
}
/* ---------------- GQL bySku helper (image-only within budget) ---------------- */
async function fetchProductBySku(ctx, sku) {
const s = String(sku || "").trim();
if (!s) return null;
const s = String(sku || "").trim();
if (!s) return null;
if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map();
if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s);
if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map();
if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s);
const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, {
sku: s,
storeId: STORE_ID,
});
const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, {
sku: s,
storeId: STORE_ID,
});
let out = null;
if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) {
out = r.json.data.productsBySku.items[0] || null;
}
let out = null;
if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) {
out = r.json.data.productsBySku.items[0] || null;
}
ctx._tudorSkuCache.set(s, out);
return out;
ctx._tudorSkuCache.set(s, out);
return out;
}
async function supplementImageFromSku(ctx, skuProbe) {
const prod = await fetchProductBySku(ctx, skuProbe);
if (!prod) return null;
const prod = await fetchProductBySku(ctx, skuProbe);
if (!prod) return null;
const v = pickInStockVariantWithFallback(prod);
const img = normalizeAbsUrl(
firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds)
);
const v = pickInStockVariantWithFallback(prod);
const img = normalizeAbsUrl(
firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds),
);
return img ? { img } : null;
return img ? { img } : null;
}
/* ---------------- HTML product page fallback (SKU + optional image) ---------------- */
@ -366,333 +365,335 @@ const DETAIL_HTML_BUDGET_DEFAULT = 200;
const DETAIL_GQL_BUDGET_DEFAULT = 10;
function parseSkuFromHtml(html) {
const s = String(html || "");
const s = String(html || "");
// 1) Visible block: <div class="sku ...">SKU: 67433</div>
const m1 =
s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) ||
s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
if (m1 && m1[1]) return String(m1[1]).trim();
// 1) Visible block: <div class="sku ...">SKU: 67433</div>
const m1 = s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) || s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
if (m1 && m1[1]) return String(m1[1]).trim();
// 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"`
const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i);
return m2 && m2[1] ? String(m2[1]).trim() : "";
// 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"`
const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i);
return m2 && m2[1] ? String(m2[1]).trim() : "";
}
function parseOgImageFromHtml(html) {
const s = String(html || "");
const m =
s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) ||
s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i);
return m ? String(m[1] || "").trim() : "";
const s = String(html || "");
const m =
s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) ||
s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i);
return m ? String(m[1] || "").trim() : "";
}
async function tudorFetchHtml(ctx, label, url) {
// Use ctx.http so pacing/throttle is respected.
if (ctx?.http?.fetchTextWithRetry) {
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`,
},
});
}
// Use ctx.http so pacing/throttle is respected.
if (ctx?.http?.fetchTextWithRetry) {
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`,
},
});
}
// Best-effort fallback if your wrapper has a generic fetchWithRetry.
if (ctx?.http?.fetchWithRetry) {
const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`,
},
});
// Best-effort fallback if your wrapper has a generic fetchWithRetry.
if (ctx?.http?.fetchWithRetry) {
const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, {
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml",
Referer: `${BASE}/`,
},
});
const body = r?.text ?? r?.body ?? r?.data ?? "";
const text =
typeof body === "string"
? body
: Buffer.isBuffer(body)
? body.toString("utf8")
: body && typeof body === "object" && typeof body.toString === "function"
? body.toString()
: "";
const body = r?.text ?? r?.body ?? r?.data ?? "";
const text =
typeof body === "string"
? body
: Buffer.isBuffer(body)
? body.toString("utf8")
: body && typeof body === "object" && typeof body.toString === "function"
? body.toString()
: "";
return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms };
}
return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms };
}
throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry).");
throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry).");
}
async function tudorDetailFromProductPage(ctx, url) {
if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map();
if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url);
if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map();
if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url);
let out = null;
try {
const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url);
if (r?.status === 200 && typeof r?.text === "string" && r.text.length) {
const rawSku = parseSkuFromHtml(r.text);
const sku = normalizeTudorSku(rawSku);
const img = normalizeAbsUrl(parseOgImageFromHtml(r.text));
const priceNum = parseDisplayPriceFromHtml(r.text);
let out = null;
try {
const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url);
if (r?.status === 200 && typeof r?.text === "string" && r.text.length) {
const rawSku = parseSkuFromHtml(r.text);
const sku = normalizeTudorSku(rawSku);
const img = normalizeAbsUrl(parseOgImageFromHtml(r.text));
const priceNum = parseDisplayPriceFromHtml(r.text);
out = { sku, img, priceNum };
}
} catch {
out = null;
}
out = { sku, img, priceNum };
}
} catch {
out = null;
}
ctx._tudorHtmlCache.set(url, out);
return out;
ctx._tudorHtmlCache.set(url, out);
return out;
}
/* ---------------- item builder (fast, no extra calls) ---------------- */
function tudorItemFromProductFast(p, ctx) {
if (!p) return null;
if (!p) return null;
const name = cleanText(p?.name || "");
const slug = String(p?.slug || "").trim();
if (!name || !slug) return null;
const name = cleanText(p?.name || "");
const slug = String(p?.slug || "").trim();
if (!name || !slug) return null;
const v = tudorPickVariant(p);
if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock
const v = tudorPickVariant(p);
if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock
const url = tudorProductUrl(ctx, slug);
const url = tudorProductUrl(ctx, slug);
// NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products
const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo);
// NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products
const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo);
const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p);
const sku = normalizeTudorSku(skuRaw);
const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p);
const sku = normalizeTudorSku(skuRaw);
const img = normalizeAbsUrl(
firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds)
);
const img = normalizeAbsUrl(firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds));
// NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price
const variants = Array.isArray(p?.variants)
? p.variants.map((x) => ({
sku: String(x?.sku || "").trim(),
price: x?.price,
retailPrice: x?.retailPrice,
quantity: x?.quantity,
}))
: [];
// NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price
const variants = Array.isArray(p?.variants)
? p.variants.map((x) => ({
sku: String(x?.sku || "").trim(),
price: x?.price,
retailPrice: x?.retailPrice,
quantity: x?.quantity,
}))
: [];
return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants };
return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants };
}
/* ---------------- repair (second pass, budgeted) ---------------- */
async function tudorRepairItem(ctx, it) {
// Determine if we need HTML for precision:
// - Missing/synthetic SKU (existing behavior)
// - OR multi-variant product where fast-path may choose the wrong variant for this URL
const inStockVariants = Array.isArray(it._variants)
? it._variants.filter((v) => Number(v?.quantity) > 0)
: [];
// Determine if we need HTML for precision:
// - Missing/synthetic SKU (existing behavior)
// - OR multi-variant product where fast-path may choose the wrong variant for this URL
const inStockVariants = Array.isArray(it._variants) ? it._variants.filter((v) => Number(v?.quantity) > 0) : [];
const hasMultiInStock = inStockVariants.length >= 2;
const hasMultiInStock = inStockVariants.length >= 2;
// 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs
if (isSyntheticSku(it.sku) || hasMultiInStock) {
const d = await tudorDetailFromProductPage(ctx, it.url);
// 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs
if (isSyntheticSku(it.sku) || hasMultiInStock) {
const d = await tudorDetailFromProductPage(ctx, it.url);
// Prefer real SKU from HTML
if (d?.sku && !isSyntheticSku(d.sku)) {
it.sku = d.sku;
}
// Prefer real SKU from HTML
if (d?.sku && !isSyntheticSku(d.sku)) {
it.sku = d.sku;
}
// Fill image if missing
if (!it.img && d?.img) it.img = d.img;
// Fill image if missing
if (!it.img && d?.img) it.img = d.img;
// Price precision:
// - Best: match HTML SKU to a GQL variant sku => exact numeric variant price
// - Fallback: use displayed HTML price
const htmlSkuDigits = String(d?.sku || "").replace(/^id:/i, "").trim();
// Price precision:
// - Best: match HTML SKU to a GQL variant sku => exact numeric variant price
// - Fallback: use displayed HTML price
const htmlSkuDigits = String(d?.sku || "")
.replace(/^id:/i, "")
.trim();
if (htmlSkuDigits && inStockVariants.length) {
const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits);
if (match && Number.isFinite(Number(match.price))) {
it.price = money(match.price);
} else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum);
}
} else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum);
}
}
if (htmlSkuDigits && inStockVariants.length) {
const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits);
if (match && Number.isFinite(Number(match.price))) {
it.price = money(match.price);
} else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum);
}
} else if (Number.isFinite(d?.priceNum)) {
it.price = money(d.priceNum);
}
}
// 2) Missing image -> limited productsBySku (existing behavior)
if (!it.img) {
const skuProbe = String(it._skuProbe || "").trim();
if (skuProbe) {
const supp = await supplementImageFromSku(ctx, skuProbe);
if (supp?.img) it.img = supp.img;
}
}
// 2) Missing image -> limited productsBySku (existing behavior)
if (!it.img) {
const skuProbe = String(it._skuProbe || "").trim();
if (skuProbe) {
const supp = await supplementImageFromSku(ctx, skuProbe);
if (supp?.img) it.img = supp.img;
}
}
// Final fallback ONLY after repair attempts (stability)
if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || "";
// Final fallback ONLY after repair attempts (stability)
if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || "";
return it;
return it;
}
/* ---------------- scanner ---------------- */
async function scanCategoryTudor(ctx, prevDb, report) {
const t0 = Date.now();
const discovered = new Map();
const t0 = Date.now();
const discovered = new Map();
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
let cursor = null;
let done = 0;
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
let cursor = null;
let done = 0;
const needsDetail = [];
const needsDetail = [];
for (let page = 1; page <= maxPages; page++) {
const tPage = Date.now();
for (let page = 1; page <= maxPages; page++) {
const tPage = Date.now();
const prod = await fetchProductsPage(ctx, cursor);
const arr = Array.isArray(prod?.items) ? prod.items : [];
const prod = await fetchProductsPage(ctx, cursor);
const arr = Array.isArray(prod?.items) ? prod.items : [];
let kept = 0;
for (const p of arr) {
const it = tudorItemFromProductFast(p, ctx);
if (!it) continue;
let kept = 0;
for (const p of arr) {
const it = tudorItemFromProductFast(p, ctx);
if (!it) continue;
// NEW: seed from cached DB to avoid repeating detail HTML
const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev) {
it.sku = pickBetterSku(it.sku, prev.sku);
if (!it.img && prev.img) it.img = prev.img;
}
// NEW: seed from cached DB to avoid repeating detail HTML
const prev = prevDb?.byUrl?.get(it.url) || null;
if (prev) {
it.sku = pickBetterSku(it.sku, prev.sku);
if (!it.img && prev.img) it.img = prev.img;
}
// queue only; do not do detail calls inline
if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it);
// queue only; do not do detail calls inline
if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it);
discovered.set(it.url, it);
kept++;
}
discovered.set(it.url, it);
kept++;
}
done++;
done++;
const ms = Date.now() - tPage;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft(
kept,
3
)} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
);
const ms = Date.now() - tPage;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft(
kept,
3,
)} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
);
cursor = prod?.nextPageCursor || null;
if (!cursor || !arr.length) break;
}
cursor = prod?.nextPageCursor || null;
if (!cursor || !arr.length) break;
}
// second pass: repair with budgets
const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget)
? ctx.config.tudorHtmlBudget
: DETAIL_HTML_BUDGET_DEFAULT;
// second pass: repair with budgets
const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget)
? ctx.config.tudorHtmlBudget
: DETAIL_HTML_BUDGET_DEFAULT;
const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget)
? ctx.config.tudorGqlBudget
: DETAIL_GQL_BUDGET_DEFAULT;
const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget)
? ctx.config.tudorGqlBudget
: DETAIL_GQL_BUDGET_DEFAULT;
let htmlUsed = 0;
let gqlUsed = 0;
let htmlUsed = 0;
let gqlUsed = 0;
for (const it of needsDetail) {
const wantsHtml = isSyntheticSku(it.sku);
const wantsGql = !it.img && String(it._skuProbe || "").trim();
for (const it of needsDetail) {
const wantsHtml = isSyntheticSku(it.sku);
const wantsGql = !it.img && String(it._skuProbe || "").trim();
// enforce caps
if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue;
if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue;
// enforce caps
if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue;
if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue;
// count budgets pessimistically
if (wantsHtml) htmlUsed++;
if (wantsGql) gqlUsed++;
// count budgets pessimistically
if (wantsHtml) htmlUsed++;
if (wantsGql) gqlUsed++;
await tudorRepairItem(ctx, it);
discovered.set(it.url, it);
}
await tudorRepairItem(ctx, it);
discovered.set(it.url, it);
}
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})`
);
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: done,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: done,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
/* ---------------- store ---------------- */
function createStore(defaultUa) {
return {
key: "tudor",
name: "Tudor House",
host: HOST,
ua: defaultUa,
scanCategory: scanCategoryTudor,
categories: [
{
key: "rum",
label: "Rum",
startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`,
tudorRootSlug: "spirits",
tudorSubSlug: "rum",
tudorAllTags: ["spirits", "rum"],
},
{
key: "whiskey-scotch",
label: "Whiskey / Scotch",
startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`,
tudorRootSlug: "spirits",
tudorSubSlug: "whiskey-scotch",
tudorAllTags: ["spirits", "whiskey-scotch"],
},
{
key: "scotch-selections",
label: "Scotch Selections",
startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`,
tudorRootSlug: "spirits",
tudorSubSlug: "scotch-selections",
tudorAllTags: ["spirits", "scotch-selections"],
},
],
};
return {
key: "tudor",
name: "Tudor House",
host: HOST,
ua: defaultUa,
scanCategory: scanCategoryTudor,
categories: [
{
key: "rum",
label: "Rum",
startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`,
tudorRootSlug: "spirits",
tudorSubSlug: "rum",
tudorAllTags: ["spirits", "rum"],
},
{
key: "whiskey-scotch",
label: "Whiskey / Scotch",
startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`,
tudorRootSlug: "spirits",
tudorSubSlug: "whiskey-scotch",
tudorAllTags: ["spirits", "whiskey-scotch"],
},
{
key: "scotch-selections",
label: "Scotch Selections",
startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`,
tudorRootSlug: "spirits",
tudorSubSlug: "scotch-selections",
tudorAllTags: ["spirits", "scotch-selections"],
},
],
};
}
module.exports = { createStore };

View file

@ -5,169 +5,170 @@ const { normalizeCspc } = require("../utils/sku");
const { normalizeBaseUrl } = require("../utils/url");
function normalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://vesselliquor.com/").toString();
} catch {
return s;
}
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://vesselliquor.com/").toString();
} catch {
return s;
}
}
// Strip noisy Shopify/tracking params so URLs stay stable.
// Keep only "variant" since it can represent a distinct product configuration.
function normalizeShopifyProductUrl(rawUrl) {
try {
const u = new URL(String(rawUrl || ""));
u.hash = "";
try {
const u = new URL(String(rawUrl || ""));
u.hash = "";
const keep = new Set(["variant"]);
for (const k of [...u.searchParams.keys()]) {
if (!keep.has(k)) u.searchParams.delete(k);
}
const keep = new Set(["variant"]);
for (const k of [...u.searchParams.keys()]) {
if (!keep.has(k)) u.searchParams.delete(k);
}
if ([...u.searchParams.keys()].length === 0) u.search = "";
if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, "");
if ([...u.searchParams.keys()].length === 0) u.search = "";
if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, "");
return u.toString();
} catch {
return String(rawUrl || "");
}
return u.toString();
} catch {
return String(rawUrl || "");
}
}
function makeVesselPageUrl(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) u.searchParams.delete("page");
else u.searchParams.set("page", String(pageNum));
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString();
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) u.searchParams.delete("page");
else u.searchParams.set("page", String(pageNum));
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString();
}
function vesselLooksInStock(block) {
const s = String(block || "").toLowerCase();
if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false;
if (/\bdata-available=["']false["']/.test(s)) return false;
return true;
const s = String(block || "").toLowerCase();
if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false;
if (/\bdata-available=["']false["']/.test(s)) return false;
return true;
}
function vesselExtractPrice(block) {
const s = String(block || "");
const s = String(block || "");
const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)];
for (let i = saleTags.length - 1; i >= 0; i--) {
const txt = cleanText(decodeHtml(saleTags[i][1] || ""));
const m = txt.match(/\$\s*\d+(?:\.\d{2})?/);
if (m) return m[0].replace(/\s+/g, "");
}
const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)];
for (let i = saleTags.length - 1; i >= 0; i--) {
const txt = cleanText(decodeHtml(saleTags[i][1] || ""));
const m = txt.match(/\$\s*\d+(?:\.\d{2})?/);
if (m) return m[0].replace(/\s+/g, "");
}
// Fallback: read price-list but ignore compare-at (crossed-out)
const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, "");
const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i);
if (pl) {
const txt = cleanText(decodeHtml(pl[1] || ""));
const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (all.length) return all[all.length - 1][0].replace(/\s+/g, "");
}
// Fallback: read price-list but ignore compare-at (crossed-out)
const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, "");
const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i);
if (pl) {
const txt = cleanText(decodeHtml(pl[1] || ""));
const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (all.length) return all[all.length - 1][0].replace(/\s+/g, "");
}
return "";
return "";
}
function vesselExtractSkuFromImgOrBlock(imgUrl, block) {
const cspc = normalizeCspc(imgUrl) || "";
if (cspc) return cspc;
const cspc = normalizeCspc(imgUrl) || "";
if (cspc) return cspc;
try {
const u = new URL(String(imgUrl || ""));
const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i);
if (m && m[1]) return `id:${m[1]}`;
} catch {}
try {
const u = new URL(String(imgUrl || ""));
const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i);
if (m && m[1]) return `id:${m[1]}`;
} catch {}
const s = String(block || "");
const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i);
if (m2 && m2[1]) return `id:${m2[1]}`;
const s = String(block || "");
const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i);
if (m2 && m2[1]) return `id:${m2[1]}`;
return "";
return "";
}
function vesselCardToItem(block, base) {
if (!vesselLooksInStock(block)) return null;
if (!vesselLooksInStock(block)) return null;
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i);
if (!hrefM || !hrefM[1]) return null;
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i);
if (!hrefM || !hrefM[1]) return null;
let url = "";
try {
url = new URL(decodeHtml(hrefM[1]), base).toString();
url = normalizeShopifyProductUrl(url);
} catch {
return null;
}
let url = "";
try {
url = new URL(decodeHtml(hrefM[1]), base).toString();
url = normalizeShopifyProductUrl(url);
} catch {
return null;
}
const titleM =
block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) ||
block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i);
const titleM =
block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) ||
block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i);
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) return null;
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
if (!name) return null;
const img = normalizeAbsUrl(extractFirstImgUrl(block, base));
const price = vesselExtractPrice(block);
const img = normalizeAbsUrl(extractFirstImgUrl(block, base));
const price = vesselExtractPrice(block);
// Prefer numeric filename SKU like 67424.jpg (works for 5-digit too)
const sku = vesselExtractSkuFromImgOrBlock(img, block);
// Prefer numeric filename SKU like 67424.jpg (works for 5-digit too)
const sku = vesselExtractSkuFromImgOrBlock(img, block);
return { name, price, url, sku, img };
return { name, price, url, sku, img };
}
function parseProductsVessel(html, ctx) {
const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`;
const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`;
const parts = s.split(/<product-card\b/i);
if (parts.length <= 1) return [];
const parts = s.split(/<product-card\b/i);
if (parts.length <= 1) return [];
const items = [];
for (let i = 1; i < parts.length; i++) {
const block = "<product-card" + parts[i];
const it = vesselCardToItem(block, base);
if (it) items.push(it);
}
const items = [];
for (let i = 1; i < parts.length; i++) {
const block = "<product-card" + parts[i];
const it = vesselCardToItem(block, base);
if (it) items.push(it);
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "vessel",
name: "Vessel Liquor",
host: "vesselliquor.com",
ua: defaultUa,
return {
key: "vessel",
name: "Vessel Liquor",
host: "vesselliquor.com",
ua: defaultUa,
parseProducts: parseProductsVessel,
makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params)
parseProducts: parseProductsVessel,
makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params)
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1",
discoveryStartPage: 20,
discoveryStep: 10,
},
{
key: "rum-cane-spirit",
label: "Rum / Cane Spirit",
startUrl: "https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1",
discoveryStartPage: 20,
discoveryStep: 10,
},
],
};
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1",
discoveryStartPage: 20,
discoveryStep: 10,
},
{
key: "rum-cane-spirit",
label: "Rum / Cane Spirit",
startUrl:
"https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1",
discoveryStartPage: 20,
discoveryStep: 10,
},
],
};
}
module.exports = { createStore, parseProductsVessel };

View file

@ -9,20 +9,20 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const t = Math.round(s * 10) / 10;
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
}
function pageStr(i, total) {
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
const w = String(total).length;
return `${padLeft(i, w)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
const BASE = "https://shop.vintagespirits.ca";
@ -30,228 +30,250 @@ const SHOP_ID = "679-320"; // from your curl; can be made dynamic later
const IMG_BASE = "https://s.barnetnetwork.com/img/m/";
function asMoneyFromApi(it) {
// prefer explicit sale price when present
const sale = Number(it?.sale_price);
const regular = Number(it?.regular_price);
const net = Number(it?.net_price);
// prefer explicit sale price when present
const sale = Number(it?.sale_price);
const regular = Number(it?.regular_price);
const net = Number(it?.net_price);
const n =
(Number.isFinite(sale) && sale > 0 ? sale : NaN) ||
(Number.isFinite(net) && net > 0 ? net : NaN) ||
(Number.isFinite(regular) && regular > 0 ? regular : NaN);
const n =
(Number.isFinite(sale) && sale > 0 ? sale : NaN) ||
(Number.isFinite(net) && net > 0 ? net : NaN) ||
(Number.isFinite(regular) && regular > 0 ? regular : NaN);
if (!Number.isFinite(n)) return "";
return `$${n.toFixed(2)}`;
if (!Number.isFinite(n)) return "";
return `$${n.toFixed(2)}`;
}
function imgUrlFromApi(it) {
const p = String(it?.image || "").trim();
if (!p) return "";
if (/^https?:\/\//i.test(p)) return p;
if (p.startsWith("//")) return `https:${p}`;
// API provides "custom/goods/..."
return `${IMG_BASE}${p.replace(/^\/+/, "")}`;
const p = String(it?.image || "").trim();
if (!p) return "";
if (/^https?:\/\//i.test(p)) return p;
if (p.startsWith("//")) return `https:${p}`;
// API provides "custom/goods/..."
return `${IMG_BASE}${p.replace(/^\/+/, "")}`;
}
function vintageItemFromApi(it) {
if (!it) return null;
if (!it) return null;
// stock gate
if (!it.available_for_sale) return null;
const onHand = Number(it.on_hand);
if (Number.isFinite(onHand) && onHand <= 0) return null;
// stock gate
if (!it.available_for_sale) return null;
const onHand = Number(it.on_hand);
if (Number.isFinite(onHand) && onHand <= 0) return null;
const url = String(it.url || "").trim();
const name = String(it.description || "").trim();
if (!url || !name) return null;
const url = String(it.url || "").trim();
const name = String(it.description || "").trim();
if (!url || !name) return null;
const sku = normalizeCspc(it.cspcid || "");
const price = asMoneyFromApi(it);
const img = imgUrlFromApi(it);
const sku = normalizeCspc(it.cspcid || "");
const price = asMoneyFromApi(it);
const img = imgUrlFromApi(it);
return { name, price, url, sku, img };
return { name, price, url, sku, img };
}
function makeApiUrl(cat, page) {
const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`);
u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", "desc");
u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS"
u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM"
u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10)));
return u.toString();
const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`);
u.searchParams.set("p", String(page));
u.searchParams.set("show_on_web", "true");
u.searchParams.set("sort_by", "desc");
u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS"
u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM"
u.searchParams.set("varital_name", "");
u.searchParams.set("no_item_found", "No item found.");
u.searchParams.set("avail_for_sale", "false");
u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10)));
return u.toString();
}
async function fetchVintagePage(ctx, page) {
const url = makeApiUrl(ctx.cat, page);
return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "*/*",
Referer: ctx.cat.startUrl,
Origin: BASE,
},
// cookies not required in my testing; enable if you hit 403/empty
cookies: true,
});
const url = makeApiUrl(ctx.cat, page);
return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "*/*",
Referer: ctx.cat.startUrl,
Origin: BASE,
},
// cookies not required in my testing; enable if you hit 403/empty
cookies: true,
});
}
async function scanCategoryVintageApi(ctx, prevDb, report) {
const t0 = Date.now();
const t0 = Date.now();
let first;
try {
first = await fetchVintagePage(ctx, 1);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`);
let first;
try {
first = await fetchVintagePage(ctx, 1);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{
storeLabel: ctx.store.name,
},
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
return;
}
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
return;
}
const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`
);
ctx.logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(p);
const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(p);
let donePages = 0;
let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(
pages,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page, idx) => {
const r = page === 1 ? first : await fetchVintagePage(ctx, page);
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
const perPageItems = await require("../utils/async").parallelMapStaggered(
pages,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page, idx) => {
const r = page === 1 ? first : await fetchVintagePage(ctx, page);
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
const items = [];
for (const raw of arr) {
const it = vintageItemFromApi(raw);
if (it) items.push(it);
}
const items = [];
for (const raw of arr) {
const it = vintageItemFromApi(raw);
if (it) items.push(it);
}
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd(
3
)} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr(
r.bytes
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd(
3,
)} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr(
r.bytes,
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
);
return items;
}
);
return items;
},
);
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
ctx.logger.ok(
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
storeLabel: ctx.store.name,
});
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
function createStore(defaultUa) {
return {
key: "vintage",
name: "Vintage Spirits",
host: "shop.vintagespirits.ca",
ua: defaultUa,
scanCategory: scanCategoryVintageApi,
categories: [
{
key: "whisky-whiskey",
label: "Whisky & Whiskey",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY",
vsCategory: "40 SPIRITS",
vsSubCategory: "WHISKY & WHISKEY",
},
{
key: "single-malt-whisky",
label: "Single Malt Whisky",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY",
vsCategory: "40 SPIRITS",
vsSubCategory: "SINGLE MALT WHISKY",
},
{
key: "rum",
label: "Rum",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM",
vsCategory: "40 SPIRITS",
vsSubCategory: "RUM",
},
],
};
return {
key: "vintage",
name: "Vintage Spirits",
host: "shop.vintagespirits.ca",
ua: defaultUa,
scanCategory: scanCategoryVintageApi,
categories: [
{
key: "whisky-whiskey",
label: "Whisky & Whiskey",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY",
vsCategory: "40 SPIRITS",
vsSubCategory: "WHISKY & WHISKEY",
},
{
key: "single-malt-whisky",
label: "Single Malt Whisky",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY",
vsCategory: "40 SPIRITS",
vsSubCategory: "SINGLE MALT WHISKY",
},
{
key: "rum",
label: "Rum",
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM",
vsCategory: "40 SPIRITS",
vsSubCategory: "RUM",
},
],
};
}
module.exports = { createStore };

View file

@ -6,120 +6,116 @@ const { makePageUrlShopifyQueryPage } = require("../utils/url");
const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku");
function extractSkuFromUrlOrHref(hrefOrUrl) {
const s = String(hrefOrUrl || "");
// /products/<handle>-123456 or /collections/.../products/<handle>-123456
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
return m ? m[1] : "";
const s = String(hrefOrUrl || "");
// /products/<handle>-123456 or /collections/.../products/<handle>-123456
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
return m ? m[1] : "";
}
function extractSkuFromWillowBlock(block) {
const b = String(block || "");
const b = String(block || "");
// Image filename pattern:
// /products/710296-Zaya-Gran-Reserva-16-Year_160x.png
const m1 = b.match(/\/products\/(\d{6})[-_]/i);
if (m1) return m1[1];
// Image filename pattern:
// /products/710296-Zaya-Gran-Reserva-16-Year_160x.png
const m1 = b.match(/\/products\/(\d{6})[-_]/i);
if (m1) return m1[1];
// Generic fallback
const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/);
if (m2) return m2[1];
// Generic fallback
const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/);
if (m2) return m2[1];
return "";
return "";
}
function canonicalizeWillowUrl(raw) {
try {
const u = new URL(String(raw));
u.search = "";
u.hash = "";
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
if (m) u.pathname = `/products/${m[1]}`;
return u.toString();
} catch {
return String(raw || "");
}
try {
const u = new URL(String(raw));
u.search = "";
u.hash = "";
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
if (m) u.pathname = `/products/${m[1]}`;
return u.toString();
} catch {
return String(raw || "");
}
}
// Prefer exact decimal from visually-hidden spans.
// Fallback: reconstruct from $39<sup>99</sup>.
function extractWillowCardPrice(block) {
const b = String(block || "");
const b = String(block || "");
const current =
b.match(
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i
)?.[1] ||
b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
const current =
b.match(
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i,
)?.[1] || b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
if (current) return current.replace(/\s+/g, "");
if (current) return current.replace(/\s+/g, "");
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
return any ? any[0].replace(/\s+/g, "") : "";
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
return any ? any[0].replace(/\s+/g, "") : "";
}
function parseProductsWillowPark(html, ctx, finalUrl) {
const s = String(html || "");
const items = [];
const s = String(html || "");
const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
.map((m) => m.index)
.filter((i) => typeof i === "number");
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
.map((m) => m.index)
.filter((i) => typeof i === "number");
const blocks = [];
for (let i = 0; i < starts.length; i++) {
const a = starts[i];
const b = i + 1 < starts.length ? starts[i + 1] : s.length;
blocks.push(s.slice(a, b));
}
const blocks = [];
for (let i = 0; i < starts.length; i++) {
const a = starts[i];
const b = i + 1 < starts.length ? starts[i + 1] : s.length;
blocks.push(s.slice(a, b));
}
for (const block of blocks) {
const href =
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
for (const block of blocks) {
const href =
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
let url;
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
url = canonicalizeWillowUrl(url);
let url;
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
url = canonicalizeWillowUrl(url);
const titleHtml =
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] ||
"";
const name = cleanText(decodeHtml(stripTags(titleHtml)));
if (!name) continue;
const titleHtml =
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] ||
"";
const name = cleanText(decodeHtml(stripTags(titleHtml)));
if (!name) continue;
const price = extractWillowCardPrice(block);
const img = extractFirstImgUrl(block, base);
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
const price = extractWillowCardPrice(block);
const img = extractFirstImgUrl(block, base);
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
const sku =
extractSkuFromUrlOrHref(href) ||
extractSkuFromUrlOrHref(url) ||
extractSkuFromWillowBlock(block);
const sku = extractSkuFromUrlOrHref(href) || extractSkuFromUrlOrHref(url) || extractSkuFromWillowBlock(block);
items.push({ name, price, url, sku, img, pid });
}
items.push({ name, price, url, sku, img, pid });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function willowIsEmptyListingPage(html) {
const s = String(html || "");
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
if (/No products found/i.test(s)) return true;
if (/collection--empty\b/i.test(s)) return true;
return false;
const s = String(html || "");
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
if (/No products found/i.test(s)) return true;
if (/collection--empty\b/i.test(s)) return true;
return false;
}
/* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */
@ -137,102 +133,99 @@ query ($id: ID!) @inContext(country: CA) {
`;
function pickBestVariantSku(product) {
const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : [];
if (!vs.length) return "";
const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : [];
if (!vs.length) return "";
const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim());
if (inStock) return String(inStock.sku).trim();
const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim());
if (inStock) return String(inStock.sku).trim();
const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim());
if (forSale) return String(forSale.sku).trim();
const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim());
if (forSale) return String(forSale.sku).trim();
const any = vs.find((v) => String(v?.sku || "").trim());
return any ? String(any.sku).trim() : "";
const any = vs.find((v) => String(v?.sku || "").trim());
return any ? String(any.sku).trim() : "";
}
function extractStorefrontTokenFromHtml(html) {
const s = String(html || "");
const s = String(html || "");
// 1) script#shopify-features JSON: {"accessToken":"..."}
const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1];
if (j) {
try {
const obj = JSON.parse(j);
const t = String(obj?.accessToken || "").trim();
if (t) return t;
} catch {}
}
// 1) script#shopify-features JSON: {"accessToken":"..."}
const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1];
if (j) {
try {
const obj = JSON.parse(j);
const t = String(obj?.accessToken || "").trim();
if (t) return t;
} catch {}
}
// 2) meta name="shopify-checkout-api-token"
const m = s.match(
/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i
)?.[1];
return String(m || "").trim();
// 2) meta name="shopify-checkout-api-token"
const m = s.match(/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i)?.[1];
return String(m || "").trim();
}
async function willowGetStorefrontToken(ctx) {
if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken;
if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken;
const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua);
const t = extractStorefrontTokenFromHtml(r?.text || "");
if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML");
const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua);
const t = extractStorefrontTokenFromHtml(r?.text || "");
if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML");
ctx._willowStorefrontToken = t;
return t;
ctx._willowStorefrontToken = t;
return t;
}
async function willowGql(ctx, label, query, variables) {
const token = await willowGetStorefrontToken(ctx);
const token = await willowGetStorefrontToken(ctx);
const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, {
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: "https://www.willowpark.net",
Referer: "https://www.willowpark.net/",
"x-shopify-storefront-access-token": token,
},
body: JSON.stringify({ query, variables }),
});
const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, {
method: "POST",
headers: {
Accept: "application/json",
"content-type": "application/json",
Origin: "https://www.willowpark.net",
Referer: "https://www.willowpark.net/",
"x-shopify-storefront-access-token": token,
},
body: JSON.stringify({ query, variables }),
});
// If token is rejected, clear so a future attempt re-fetches it once.
if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = "";
return r;
// If token is rejected, clear so a future attempt re-fetches it once.
if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = "";
return r;
}
// If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:<NUM>.
// Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is.
function normalizeWillowGqlSku(rawSku) {
const s = String(rawSku || "").trim();
if (!s) return "";
const cspc = normalizeCspc(s);
if (cspc) return cspc; // 6-digit wins
if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s;
if (/^\d+$/.test(s)) return `id:${s}`;
return s;
const s = String(rawSku || "").trim();
if (!s) return "";
const cspc = normalizeCspc(s);
if (cspc) return cspc; // 6-digit wins
if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s;
if (/^\d+$/.test(s)) return `id:${s}`;
return s;
}
async function willowFetchSkuByPid(ctx, pid) {
const id = String(pid || "").trim();
if (!id) return "";
const id = String(pid || "").trim();
if (!id) return "";
if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map();
if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id);
if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map();
if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id);
const gid = `gid://shopify/Product/${id}`;
let sku = "";
const gid = `gid://shopify/Product/${id}`;
let sku = "";
try {
const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid });
if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product));
} catch {
sku = "";
}
try {
const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid });
if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product));
} catch {
sku = "";
}
ctx._willowSkuCacheByPid.set(id, sku);
return sku;
ctx._willowSkuCacheByPid.set(id, sku);
return sku;
}
/**
@ -240,58 +233,58 @@ async function willowFetchSkuByPid(ctx, pid) {
* Budgeted to avoid exploding requests.
*/
async function willowRepairDiscoveredItems(ctx, discovered, prevDb) {
const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200;
let used = 0;
const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200;
let used = 0;
for (const [url, it] of discovered.entries()) {
if (!it) continue;
for (const [url, it] of discovered.entries()) {
if (!it) continue;
// Seed from prev DB so we don't repair repeatedly if we already learned a good SKU.
const prev = prevDb?.byUrl?.get(url);
if (prev) it.sku = pickBetterSku(it.sku, prev.sku);
// Seed from prev DB so we don't repair repeatedly if we already learned a good SKU.
const prev = prevDb?.byUrl?.get(url);
if (prev) it.sku = pickBetterSku(it.sku, prev.sku);
if (!needsSkuDetail(it.sku)) continue;
if (used >= budget) break;
if (!needsSkuDetail(it.sku)) continue;
if (used >= budget) break;
const repaired = await willowFetchSkuByPid(ctx, it.pid);
if (repaired) it.sku = pickBetterSku(repaired, it.sku);
const repaired = await willowFetchSkuByPid(ctx, it.pid);
if (repaired) it.sku = pickBetterSku(repaired, it.sku);
discovered.set(url, it);
used++;
}
discovered.set(url, it);
used++;
}
ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`);
ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`);
}
function createStore(defaultUa) {
return {
key: "willowpark",
name: "Willow Park",
host: "www.willowpark.net",
ua: defaultUa,
return {
key: "willowpark",
name: "Willow Park",
host: "www.willowpark.net",
ua: defaultUa,
parseProducts: parseProductsWillowPark,
makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: willowIsEmptyListingPage,
parseProducts: parseProductsWillowPark,
makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: willowIsEmptyListingPage,
// Hook called by scanner (add 1-line call in scanner before merge)
repairDiscoveredItems: willowRepairDiscoveredItems,
// Hook called by scanner (add 1-line call in scanner before merge)
repairDiscoveredItems: willowRepairDiscoveredItems,
categories: [
{
key: "scotch",
label: "Scotch",
startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1",
discoveryStartPage: 5,
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
discoveryStartPage: 3,
},
],
};
categories: [
{
key: "scotch",
label: "Scotch",
startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1",
discoveryStartPage: 5,
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
discoveryStartPage: 3,
},
],
};
}
module.exports = { createStore, parseProductsWillowPark };

View file

@ -16,84 +16,86 @@ const STATUS_W = 4;
const PROG_W = 4;
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function actionCell(s) {
return padRightV(String(s), ACTION_W);
return padRightV(String(s), ACTION_W);
}
function statusCell(logger, statusRaw, okBool) {
const cell = padRightV(String(statusRaw || ""), STATUS_W);
if (!statusRaw) return cell;
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
const cell = padRightV(String(statusRaw || ""), STATUS_W);
if (!statusRaw) return cell;
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
}
function progCell(v) {
const raw = String(v ?? "----");
return padLeftV(raw, PROG_W);
const raw = String(v ?? "----");
return padLeftV(raw, PROG_W);
}
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`);
logger.ok(
`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`,
);
}
function makeCatPrefixers(stores, logger) {
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
function catPrefixRaw(store, cat) {
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
}
function catPrefixRaw(store, cat) {
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
}
function catPrefixOut(store, cat) {
return logger.bold(catPrefixRaw(store, cat));
}
function catPrefixOut(store, cat) {
return logger.bold(catPrefixRaw(store, cat));
}
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
}
function buildCategoryContext(store, cat, catPrefixOutFn, config) {
const baseUrl = normalizeBaseUrl(cat.startUrl);
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
return {
store,
cat,
baseUrl,
dbFile,
catPrefixOut: catPrefixOutFn(store, cat),
};
const baseUrl = normalizeBaseUrl(cat.startUrl);
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
return {
store,
cat,
baseUrl,
dbFile,
catPrefixOut: catPrefixOutFn(store, cat),
};
}
function loadCategoryDb(logger, ctx) {
const prevDb = readDb(ctx.dbFile);
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
return prevDb;
const prevDb = readDb(ctx.dbFile);
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
return prevDb;
}
function shouldTrackItem(ctx, finalUrl, item) {
const allow = ctx?.cat?.allowUrl;
if (typeof allow !== "function") return true;
return allow(item, ctx, finalUrl);
const allow = ctx?.cat?.allowUrl;
if (typeof allow !== "function") return true;
return allow(item, ctx, finalUrl);
}
/**
@ -104,285 +106,309 @@ function shouldTrackItem(ctx, finalUrl, item) {
* inside links that often have "page-numbers" class, but works even without it.
*/
function extractTotalPagesFromPaginationHtml(html) {
const s = String(html || "");
let max = 0;
const s = String(html || "");
let max = 0;
// /page/23/
for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// /page/23/
for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// ?paged=23
for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// ?paged=23
for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// Shopify: ?page=23
for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// Shopify: ?page=23
for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) {
const n = Number(m[1]);
if (Number.isFinite(n) && n > max) max = n;
}
// Sometimes themes render plain numbers without /page/ in href; keep it conservative:
// Only trust these if we already found at least one pagination-ish token.
if (max > 1) return max;
// Sometimes themes render plain numbers without /page/ in href; keep it conservative:
// Only trust these if we already found at least one pagination-ish token.
if (max > 1) return max;
return 0;
return 0;
}
async function pageHasProducts(ctx, url) {
const { http, config } = ctx;
try {
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
const { http, config } = ctx;
try {
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
}
if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
}
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const items = parser(text, ctx).length;
return { ok: items > 0, items };
} catch {
return { ok: false, items: 0 };
}
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const items = parser(text, ctx).length;
return { ok: items > 0, items };
} catch {
return { ok: false, items: 0 };
}
}
async function probePage(ctx, baseUrl, pageNum, state) {
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0;
if (delay > 0) await sleep(delay);
const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0;
if (delay > 0) await sleep(delay);
const t0 = Date.now();
const r = await pageHasProducts(ctx, url);
const ms = Date.now() - t0;
const t0 = Date.now();
const r = await pageHasProducts(ctx, url);
const ms = Date.now() - t0;
const prog = discoverProg(state);
const prog = discoverProg(state);
logProgressLine(
ctx.logger,
ctx,
`Discover probe page=${padLeftV(pageNum, 4)}`,
r.ok ? "OK" : "MISS",
Boolean(r.ok),
prog,
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
);
logProgressLine(
ctx.logger,
ctx,
`Discover probe page=${padLeftV(pageNum, 4)}`,
r.ok ? "OK" : "MISS",
Boolean(r.ok),
prog,
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
);
return r;
return r;
}
function discoverProg(state) {
if (!state || state.phase !== "binary") return " 0%";
const span = Math.max(1, state.hiMiss - state.loOk);
const initial = Math.max(1, state.binInitialSpan);
if (initial <= 1) return "100%";
if (!state || state.phase !== "binary") return " 0%";
const span = Math.max(1, state.hiMiss - state.loOk);
const initial = Math.max(1, state.binInitialSpan);
if (initial <= 1) return "100%";
const remaining = Math.max(0, span - 1);
const total = Math.max(1, initial - 1);
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
return `${padLeft(pct, 3)}%`;
const remaining = Math.max(0, span - 1);
const total = Math.max(1, initial - 1);
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
return `${padLeft(pct, 3)}%`;
}
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
state.phase = "binary";
state.loOk = loOk;
state.hiMiss = hiMiss;
state.binInitialSpan = Math.max(1, hiMiss - loOk);
state.phase = "binary";
state.loOk = loOk;
state.hiMiss = hiMiss;
state.binInitialSpan = Math.max(1, hiMiss - loOk);
while (hiMiss - loOk > 1) {
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
state.loOk = loOk;
state.hiMiss = hiMiss;
while (hiMiss - loOk > 1) {
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
state.loOk = loOk;
state.hiMiss = hiMiss;
const pm = await probePage(ctx, baseUrl, mid, state);
if (pm.ok) loOk = mid;
else hiMiss = mid;
}
const pm = await probePage(ctx, baseUrl, mid, state);
if (pm.ok) loOk = mid;
else hiMiss = mid;
}
state.loOk = loOk;
state.hiMiss = hiMiss;
return loOk;
state.loOk = loOk;
state.hiMiss = hiMiss;
return loOk;
}
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
// Fetch page 1 ONCE and try to extract total pages from pagination.
const url1 = makePageUrlForCtx(ctx, baseUrl, 1);
const t0 = Date.now();
const { text: html1, ms, status, bytes, finalUrl } = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua);
const pMs = Date.now() - t0;
// Fetch page 1 ONCE and try to extract total pages from pagination.
const url1 = makePageUrlForCtx(ctx, baseUrl, 1);
const t0 = Date.now();
const {
text: html1,
ms,
status,
bytes,
finalUrl,
} = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua);
const pMs = Date.now() - t0;
if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(html1, ctx, url1)) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
return 1;
}
}
if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(html1, ctx, url1)) {
ctx.logger.warn(
`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`,
);
return 1;
}
}
const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts;
const items1 = parser(html1, ctx, finalUrl).length;
const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts;
const items1 = parser(html1, ctx, finalUrl).length;
logProgressLine(
ctx.logger,
ctx,
`Discover probe page=${padLeftV(1, 4)}`,
items1 > 0 ? "OK" : "MISS",
items1 > 0,
discoverProg(state),
`items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}`
);
logProgressLine(
ctx.logger,
ctx,
`Discover probe page=${padLeftV(1, 4)}`,
items1 > 0 ? "OK" : "MISS",
items1 > 0,
discoverProg(state),
`items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}`,
);
if (items1 <= 0) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
return 1;
}
if (items1 <= 0) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
return 1;
}
const extracted = extractTotalPagesFromPaginationHtml(html1);
const extracted = extractTotalPagesFromPaginationHtml(html1);
// Shopify collections with filters often lie about pagination.
// If page 1 looks full, don't trust a tiny extracted count.
if (extracted && extracted >= 1) {
const looksTruncated =
extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
if (!looksTruncated) {
ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`);
return extracted;
}
ctx.logger.warn(
`${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe`
);
}
// Fallback to probing if pagination parse fails
const g = Math.max(2, guess);
const pg = await probePage(ctx, baseUrl, g, state);
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
// Shopify collections with filters often lie about pagination.
// If page 1 looks full, don't trust a tiny extracted count.
if (extracted && extracted >= 1) {
const looksTruncated = extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
let lastOk = g;
while (true) {
const probe = lastOk + step;
const pr = await probePage(ctx, baseUrl, probe, state);
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
lastOk = probe;
if (lastOk > 5000) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`);
return lastOk;
}
}
if (!looksTruncated) {
ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`);
return extracted;
}
ctx.logger.warn(
`${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe`,
);
}
// Fallback to probing if pagination parse fails
const g = Math.max(2, guess);
const pg = await probePage(ctx, baseUrl, g, state);
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
let lastOk = g;
while (true) {
const probe = lastOk + step;
const pr = await probePage(ctx, baseUrl, probe, state);
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
lastOk = probe;
if (lastOk > 5000) {
ctx.logger.warn(
`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`,
);
return lastOk;
}
}
}
async function discoverAndScanCategory(ctx, prevDb, report) {
const { logger, config } = ctx;
const { logger, config } = ctx;
if (typeof ctx.store.scanCategory === "function") {
await ctx.store.scanCategory(ctx, prevDb, report);
return;
}
if (typeof ctx.store.scanCategory === "function") {
await ctx.store.scanCategory(ctx, prevDb, report);
return;
}
const t0 = Date.now();
const t0 = Date.now();
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep;
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep;
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
logger.ok(
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
);
const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
let donePages = 0;
let donePages = 0;
const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency;
const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs;
const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency;
const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs;
const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => {
const pnum = idx + 1;
const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => {
const pnum = idx + 1;
const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry(
pageUrl,
`page:${ctx.store.key}:${ctx.cat.key}:${pnum}`,
ctx.store.ua
);
const {
text: html,
ms,
bytes,
status,
finalUrl,
} = await ctx.http.fetchTextWithRetry(pageUrl, `page:${ctx.store.key}:${ctx.cat.key}:${pnum}`, ctx.store.ua);
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const itemsRaw = parser(html, ctx, finalUrl);
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const itemsRaw = parser(html, ctx, finalUrl);
const items = [];
for (const it of itemsRaw) {
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
}
const items = [];
for (const it of itemsRaw) {
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
}
donePages++;
logProgressLine(
logger,
ctx,
`Page ${pageStr(pnum, pages.length)}`,
status ? String(status) : "",
status >= 200 && status < 400,
pctStr(donePages, pages.length),
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
);
donePages++;
logProgressLine(
logger,
ctx,
`Page ${pageStr(pnum, pages.length)}`,
status ? String(status) : "",
status >= 200 && status < 400,
pctStr(donePages, pages.length),
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
);
return items;
});
return items;
});
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
if (typeof ctx.store.repairDiscoveredItems === "function") {
await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb);
}
if (typeof ctx.store.repairDiscoveredItems === "function") {
await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb);
}
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } =
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
prevDb,
discovered,
{ storeLabel: ctx.store.name },
);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
const elapsed = Date.now() - t0;
logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
metaChangedCount: metaChangedItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
report.totals.metaChangedCount += metaChangedItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
addCategoryResultToReport(
report,
ctx.store.name,
ctx.cat.label,
newItems,
updatedItems,
removedItems,
restoredItems,
);
}
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };

View file

@ -8,87 +8,87 @@ const { normalizeSkuKey } = require("../utils/sku");
const { priceToNumber } = require("../utils/price");
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
fs.mkdirSync(dir, { recursive: true });
}
function dbPathFor(key, baseUrl, dbDir) {
ensureDir(dbDir);
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
return path.join(dbDir, `${safeKey}__${hash}.json`);
ensureDir(dbDir);
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
return path.join(dbDir, `${safeKey}__${hash}.json`);
}
function readDb(file) {
const byUrl = new Map();
try {
const txt = fs.readFileSync(file, "utf8");
const obj = JSON.parse(txt);
if (obj && Array.isArray(obj.items)) {
for (const it of obj.items) {
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
byUrl.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
img: String(it.img || it.image || it.thumb || "").trim(),
removed: Boolean(it.removed),
});
}
}
}
} catch {
// ignore missing or parse errors
}
return { byUrl };
const byUrl = new Map();
try {
const txt = fs.readFileSync(file, "utf8");
const obj = JSON.parse(txt);
if (obj && Array.isArray(obj.items)) {
for (const it of obj.items) {
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
byUrl.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
img: String(it.img || it.image || it.thumb || "").trim(),
removed: Boolean(it.removed),
});
}
}
}
} catch {
// ignore missing or parse errors
}
return { byUrl };
}
function writeJsonAtomic(file, obj) {
ensureDir(path.dirname(file));
const tmp = `${file}.tmp`;
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
fs.renameSync(tmp, file);
ensureDir(path.dirname(file));
const tmp = `${file}.tmp`;
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
fs.renameSync(tmp, file);
}
function buildDbObject(ctx, merged) {
const storeLabel = ctx?.store?.name || ctx?.store?.host || "";
const storeLabel = ctx?.store?.name || ctx?.store?.host || "";
return {
version: 6,
store: ctx.store.host,
storeLabel: ctx.store.name,
category: ctx.cat.key,
categoryLabel: ctx.cat.label,
source: ctx.baseUrl,
updatedAt: new Date().toISOString(),
count: merged.size,
items: [...merged.values()]
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
.map((it) => ({
name: it.name,
price: it.price || "",
// IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url)
sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "",
url: it.url,
img: String(it.img || "").trim(),
removed: Boolean(it.removed),
})),
};
return {
version: 6,
store: ctx.store.host,
storeLabel: ctx.store.name,
category: ctx.cat.key,
categoryLabel: ctx.cat.label,
source: ctx.baseUrl,
updatedAt: new Date().toISOString(),
count: merged.size,
items: [...merged.values()]
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
.map((it) => ({
name: it.name,
price: it.price || "",
// IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url)
sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "",
url: it.url,
img: String(it.img || "").trim(),
removed: Boolean(it.removed),
})),
};
}
function listDbFiles(dbDir) {
const out = [];
try {
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
const name = ent.name || "";
if (!name.endsWith(".json")) continue;
out.push(path.join(dbDir, name));
}
} catch {
// ignore
}
return out;
const out = [];
try {
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
const name = ent.name || "";
if (!name.endsWith(".json")) continue;
out.push(path.join(dbDir, name));
}
} catch {
// ignore
}
return out;
}
/**
@ -96,42 +96,43 @@ function listDbFiles(dbDir) {
* but DB rows remain raw/mined skuKey.
*/
function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) {
const cheapest = new Map(); // canonSku -> { storeLabel, priceNum }
const cheapest = new Map(); // canonSku -> { storeLabel, priceNum }
for (const file of listDbFiles(dbDir)) {
try {
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const file of listDbFiles(dbDir)) {
try {
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (it?.removed) continue;
for (const it of items) {
if (it?.removed) continue;
const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" });
if (!skuKey) continue;
const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" });
if (!skuKey) continue;
const canon = skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
const canon =
skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
const p = priceToNumber(it?.price || "");
if (!Number.isFinite(p) || p <= 0) continue;
const p = priceToNumber(it?.price || "");
if (!Number.isFinite(p) || p <= 0) continue;
const prev = cheapest.get(canon);
if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p });
}
} catch {
// ignore parse errors
}
}
const prev = cheapest.get(canon);
if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p });
}
} catch {
// ignore parse errors
}
}
return cheapest;
return cheapest;
}
module.exports = {
ensureDir,
dbPathFor,
readDb,
writeJsonAtomic,
buildDbObject,
listDbFiles,
buildCheapestSkuIndexFromAllDbs,
ensureDir,
dbPathFor,
readDb,
writeJsonAtomic,
buildDbObject,
listDbFiles,
buildCheapestSkuIndexFromAllDbs,
};

View file

@ -5,207 +5,206 @@ const { normalizeSkuKey, normalizeCspc, pickBetterSku } = require("../utils/sku"
const { normPrice } = require("../utils/price");
function normImg(v) {
const s = String(v || "").trim();
if (!s) return "";
if (/^data:/i.test(s)) return "";
if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs
return s;
const s = String(v || "").trim();
if (!s) return "";
if (/^data:/i.test(s)) return "";
if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs
return s;
}
function dbStoreLabel(prevDb) {
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
}
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
if (!effectiveStoreLabel) {
throw new Error(
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'"
);
}
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
if (!effectiveStoreLabel) {
throw new Error(
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'",
);
}
function normalizeSkuForDb(raw, url) {
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
}
function normalizeSkuForDb(raw, url) {
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
}
const merged = new Map(prevDb.byUrl);
const merged = new Map(prevDb.byUrl);
const newItems = [];
const updatedItems = [];
const removedItems = [];
const restoredItems = [];
const metaChangedItems = [];
const newItems = [];
const updatedItems = [];
const removedItems = [];
const restoredItems = [];
const metaChangedItems = [];
// Choose a deterministic "best" record among dup active SKU rows.
// Prefer: more complete fields, then lexicographically smallest URL.
function scoreItem(it) {
if (!it) return 0;
const name = String(it.name || "").trim();
const price = String(it.price || "").trim();
const url = String(it.url || "").trim();
const img = String(it.img || "").trim();
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
}
// Choose a deterministic "best" record among dup active SKU rows.
// Prefer: more complete fields, then lexicographically smallest URL.
function scoreItem(it) {
if (!it) return 0;
const name = String(it.name || "").trim();
const price = String(it.price || "").trim();
const url = String(it.url || "").trim();
const img = String(it.img || "").trim();
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
}
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
const sa = scoreItem(a);
const sb = scoreItem(b);
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
// tie-breaker: stable + deterministic
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
}
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
const sa = scoreItem(a);
const sb = scoreItem(b);
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
// tie-breaker: stable + deterministic
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
}
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
// Also track *all* urls per skuKey to cleanup dupes.
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
// Also track *all* urls per skuKey to cleanup dupes.
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
for (const [url, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue;
for (const [url, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue;
const skuKey = normalizeSkuForDb(it.sku, url);
if (!skuKey || /^u:/i.test(skuKey)) continue;
const skuKey = normalizeSkuForDb(it.sku, url);
if (!skuKey || /^u:/i.test(skuKey)) continue;
let set = prevUrlsBySkuKey.get(skuKey);
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
set.add(url);
let set = prevUrlsBySkuKey.get(skuKey);
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
set.add(url);
const cur = prevBySkuKey.get(skuKey);
const next = { url, item: it };
if (!cur) prevBySkuKey.set(skuKey, next);
else prevBySkuKey.set(skuKey, pickBetter(cur, next));
}
const cur = prevBySkuKey.get(skuKey);
const next = { url, item: it };
if (!cur) prevBySkuKey.set(skuKey, next);
else prevBySkuKey.set(skuKey, pickBetter(cur, next));
}
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
for (const [url, nowRaw] of discovered.entries()) {
let prev = prevDb.byUrl.get(url);
let prevUrlForThisItem = url;
for (const [url, nowRaw] of discovered.entries()) {
let prev = prevDb.byUrl.get(url);
let prevUrlForThisItem = url;
// URL not found in previous DB: try to match by non-synthetic skuKey.
if (!prev) {
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
const hit = prevBySkuKey.get(nowSkuKey);
if (hit && hit.url && hit.url !== url) {
prev = hit.item;
prevUrlForThisItem = hit.url;
// URL not found in previous DB: try to match by non-synthetic skuKey.
if (!prev) {
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
const hit = prevBySkuKey.get(nowSkuKey);
if (hit && hit.url && hit.url !== url) {
prev = hit.item;
prevUrlForThisItem = hit.url;
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
const allOld = prevUrlsBySkuKey.get(nowSkuKey);
if (allOld) {
for (const u of allOld) matchedPrevUrls.add(u);
} else {
matchedPrevUrls.add(hit.url);
}
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
const allOld = prevUrlsBySkuKey.get(nowSkuKey);
if (allOld) {
for (const u of allOld) matchedPrevUrls.add(u);
} else {
matchedPrevUrls.add(hit.url);
}
// Cleanup: remove any existing active duplicates for this skuKey from the merged map.
// We'll re-add the chosen record at the new URL below.
if (allOld) {
for (const u of allOld) {
if (u !== url && merged.has(u)) merged.delete(u);
}
} else {
if (merged.has(hit.url)) merged.delete(hit.url);
}
}
}
}
// Cleanup: remove any existing active duplicates for this skuKey from the merged map.
// We'll re-add the chosen record at the new URL below.
if (allOld) {
for (const u of allOld) {
if (u !== url && merged.has(u)) merged.delete(u);
}
} else {
if (merged.has(hit.url)) merged.delete(hit.url);
}
}
}
}
// Truly new (no URL match, no skuKey match)
if (!prev) {
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
const now = {
...nowRaw,
sku: nowSku,
img: normImg(nowRaw.img),
removed: false,
};
newItems.push(now);
merged.set(url, now);
continue;
}
// Truly new (no URL match, no skuKey match)
if (!prev) {
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
const now = {
...nowRaw,
sku: nowSku,
img: normImg(nowRaw.img),
removed: false,
};
newItems.push(now);
merged.set(url, now);
continue;
}
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
if (prevUrlForThisItem === url && prev.removed) {
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
if (prevUrlForThisItem === url && prev.removed) {
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
const now = {
...nowRaw,
sku: nowSku,
img: normImg(nowRaw.img) || normImg(prev.img),
removed: false,
};
const now = {
...nowRaw,
sku: nowSku,
img: normImg(nowRaw.img) || normImg(prev.img),
removed: false,
};
restoredItems.push({
url,
name: now.name || prev.name || "",
price: now.price || prev.price || "",
sku: now.sku || "",
});
restoredItems.push({
url,
name: now.name || prev.name || "",
price: now.price || prev.price || "",
sku: now.sku || "",
});
merged.set(url, now);
continue;
}
merged.set(url, now);
continue;
}
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price);
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price);
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
const prevImg = normImg(prev.img);
let nowImg = normImg(nowRaw.img);
if (!nowImg) nowImg = prevImg;
const prevImg = normImg(prev.img);
let nowImg = normImg(nowRaw.img);
if (!nowImg) nowImg = prevImg;
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
const priceChanged = prevPrice !== nowPrice;
const skuChanged = prevSku !== nowSku;
const imgChanged = prevImg !== nowImg;
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
const priceChanged = prevPrice !== nowPrice;
const skuChanged = prevSku !== nowSku;
const imgChanged = prevImg !== nowImg;
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
}
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
}
if (priceChanged) {
updatedItems.push({
url,
name: nowRaw.name || prev.name || "",
sku: nowSku || "",
oldPrice: prev.price || "",
newPrice: nowRaw.price || "",
});
} else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
// Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful.
metaChangedItems.push({
url,
name: nowRaw.name || prev.name || "",
sku: nowSku || "",
});
}
}
if (priceChanged) {
updatedItems.push({
url,
name: nowRaw.name || prev.name || "",
sku: nowSku || "",
oldPrice: prev.price || "",
newPrice: nowRaw.price || "",
});
} else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
// Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful.
metaChangedItems.push({
url,
name: nowRaw.name || prev.name || "",
sku: nowSku || "",
});
}
}
for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue;
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
if (!prev.removed) {
const removed = { ...prev, removed: true };
merged.set(url, removed);
removedItems.push({
url,
name: prev.name || "",
price: prev.price || "",
sku: normalizeCspc(prev.sku) || "",
});
}
}
for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue;
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
if (!prev.removed) {
const removed = { ...prev, removed: true };
merged.set(url, removed);
removedItems.push({
url,
name: prev.name || "",
price: prev.price || "",
sku: normalizeCspc(prev.sku) || "",
});
}
}
return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems };
return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems };
}
module.exports = { mergeDiscoveredIntoDb };

View file

@ -8,247 +8,279 @@ const { buildCheapestSkuIndexFromAllDbs } = require("./db");
const { loadSkuMap } = require("../utils/sku_map");
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function createReport() {
return {
startedAt: new Date(),
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 },
newItems: [],
updatedItems: [],
removedItems: [],
restoredItems: [],
};
return {
startedAt: new Date(),
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 },
newItems: [],
updatedItems: [],
removedItems: [],
restoredItems: [],
};
}
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
const reportCatLabel = `${storeName} | ${catLabel}`;
const reportCatLabel = `${storeName} | ${catLabel}`;
for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
for (const it of newItems)
report.newItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
for (const it of restoredItems)
report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
for (const it of restoredItems)
report.restoredItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
for (const u of updatedItems) {
report.updatedItems.push({
catLabel: reportCatLabel,
name: u.name,
sku: u.sku || "",
oldPrice: u.oldPrice,
newPrice: u.newPrice,
url: u.url,
});
}
for (const u of updatedItems) {
report.updatedItems.push({
catLabel: reportCatLabel,
name: u.name,
sku: u.sku || "",
oldPrice: u.oldPrice,
newPrice: u.newPrice,
url: u.url,
});
}
for (const it of removedItems)
report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
for (const it of removedItems)
report.removedItems.push({
catLabel: reportCatLabel,
name: it.name,
price: it.price || "",
sku: it.sku || "",
url: it.url,
});
}
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
const paint = (s, code) => color(s, code, colorize);
const paint = (s, code) => color(s, code, colorize);
// Load mapping for comparisons only
const skuMap = loadSkuMap({ dbDir });
// Load mapping for comparisons only
const skuMap = loadSkuMap({ dbDir });
// Cheapest index is keyed by canonical sku (mapped)
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap });
// Cheapest index is keyed by canonical sku (mapped)
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap });
const endedAt = new Date();
const durMs = endedAt - report.startedAt;
const endedAt = new Date();
const durMs = endedAt - report.startedAt;
const storesSet = new Set(report.categories.map((c) => c.store));
const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0);
const storesSet = new Set(report.categories.map((c) => c.store));
const totalUnique = report.categories.reduce(
(acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0),
0,
);
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
ln("");
ln(paint("========== REPORT ==========", C.bold));
ln(
paint("[OK] ", C.green) +
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
durMs
)}`
);
ln("");
ln("");
ln(paint("========== REPORT ==========", C.bold));
ln(
paint("[OK] ", C.green) +
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
durMs,
)}`,
);
ln("");
ln(paint("Per-category summary:", C.bold));
const rows = report.categories.map((c) => ({
cat: `${c.store} | ${c.label}`,
pages: c.scannedPages,
uniq: c.discoveredUnique,
newC: c.newCount,
resC: c.restoredCount,
remC: c.removedCount,
updC: c.updatedCount,
ms: c.elapsedMs,
}));
ln(paint("Per-category summary:", C.bold));
const rows = report.categories.map((c) => ({
cat: `${c.store} | ${c.label}`,
pages: c.scannedPages,
uniq: c.discoveredUnique,
newC: c.newCount,
resC: c.restoredCount,
remC: c.removedCount,
updC: c.updatedCount,
ms: c.elapsedMs,
}));
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`);
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
for (const r of rows) {
ln(
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`
);
}
ln("");
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
ln(
`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`,
);
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
for (const r of rows) {
ln(
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`,
);
}
ln("");
const reportLabelW = Math.max(
16,
...report.newItems.map((x) => x.catLabel.length),
...report.restoredItems.map((x) => x.catLabel.length),
...report.updatedItems.map((x) => x.catLabel.length),
...report.removedItems.map((x) => x.catLabel.length)
);
const reportLabelW = Math.max(
16,
...report.newItems.map((x) => x.catLabel.length),
...report.restoredItems.map((x) => x.catLabel.length),
...report.updatedItems.map((x) => x.catLabel.length),
...report.removedItems.map((x) => x.catLabel.length),
);
function storeFromCatLabel(catLabel) {
return String(catLabel || "").split(" | ")[0] || "";
}
function storeFromCatLabel(catLabel) {
return String(catLabel || "").split(" | ")[0] || "";
}
function skuInline(sku) {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
}
function skuInline(sku) {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
}
function canonicalKeyForReportItem(catLabel, skuRaw, url) {
const storeLabel = storeFromCatLabel(catLabel);
const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url });
if (!skuKey) return "";
return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
}
function canonicalKeyForReportItem(catLabel, skuRaw, url) {
const storeLabel = storeFromCatLabel(catLabel);
const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url });
if (!skuKey) return "";
return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
}
function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return "";
function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return "";
const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return "";
const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel);
if (!curStore || best.storeLabel === curStore) return "";
const curStore = storeFromCatLabel(catLabel);
if (!curStore || best.storeLabel === curStore) return "";
const curP = priceToNumber(currentPriceStr);
if (!Number.isFinite(curP)) return "";
if (best.priceNum >= curP) return "";
const curP = priceToNumber(currentPriceStr);
if (!Number.isFinite(curP)) return "";
if (best.priceNum >= curP) return "";
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
}
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
}
function availableAtInline(catLabel, skuRaw, url) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return "";
function availableAtInline(catLabel, skuRaw, url) {
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
if (!canon) return "";
const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return "";
const best = cheapestSku.get(canon);
if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel);
if (curStore && best.storeLabel === curStore) return "";
const curStore = storeFromCatLabel(catLabel);
if (curStore && best.storeLabel === curStore) return "";
return paint(` (Available at ${best.storeLabel})`, C.gray);
}
return paint(` (Available at ${best.storeLabel})`, C.gray);
}
if (report.newItems.length) {
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
ln(`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("NEW LISTINGS (0)", C.bold));
ln("");
}
if (report.newItems.length) {
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
ln(
`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("NEW LISTINGS (0)", C.bold));
ln("");
}
if (report.restoredItems.length) {
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
ln(`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("RESTORED (0)", C.bold));
ln("");
}
if (report.restoredItems.length) {
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
for (const it of report.restoredItems.sort((a, b) =>
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
)) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
ln(
`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("RESTORED (0)", C.bold));
ln("");
}
if (report.removedItems.length) {
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const availTag = availableAtInline(it.catLabel, sku, it.url);
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("REMOVED (0)", C.bold));
ln("");
}
if (report.removedItems.length) {
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = String(it.sku || "");
const availTag = availableAtInline(it.catLabel, sku, it.url);
ln(
`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("REMOVED (0)", C.bold));
ln("");
}
if (report.updatedItems.length) {
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
if (report.updatedItems.length) {
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) {
newP = paint(newP, C.red); // increase
} else if (newN < oldN) {
newP = paint(newP, C.green); // decrease
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else {
newP = paint(newP, C.cyan);
}
} else {
newP = paint(newP, C.cyan);
}
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) {
newP = paint(newP, C.red); // increase
} else if (newN < oldN) {
newP = paint(newP, C.green); // decrease
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else {
newP = paint(newP, C.cyan);
}
} else {
newP = paint(newP, C.cyan);
}
const sku = String(u.sku || "");
const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || "");
const sku = String(u.sku || "");
const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || "");
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`
);
ln(` ${paint(u.url, C.dim)}`);
}
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`,
);
ln(` ${paint(u.url, C.dim)}`);
}
ln("");
} else {
ln(paint("PRICE CHANGES (0)", C.bold));
ln("");
}
ln("");
} else {
ln(paint("PRICE CHANGES (0)", C.bold));
ln("");
}
ln(paint("======== END REPORT ========", C.bold));
ln(paint("======== END REPORT ========", C.bold));
return out;
return out;
}
module.exports = { createReport, addCategoryResultToReport, renderFinalReport };

View file

@ -3,96 +3,89 @@
const { createReport } = require("./report");
const { setTimeout: sleep } = require("timers/promises");
const {
makeCatPrefixers,
buildCategoryContext,
loadCategoryDb,
discoverAndScanCategory,
} = require("./category_scan");
const { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory } = require("./category_scan");
// Some sites will intermittently 403/429. We don't want a single category/store
// to abort the entire run. Log and continue.
function formatErr(e) {
if (!e) return "Unknown error";
if (typeof e === "string") return e;
if (e.stack) return e.stack;
return String(e);
if (!e) return "Unknown error";
if (typeof e === "string") return e;
if (e.stack) return e.stack;
return String(e);
}
async function runAllStores(stores, { config, logger, http }) {
const report = createReport();
const { catPrefixOut } = makeCatPrefixers(stores, logger);
const report = createReport();
const { catPrefixOut } = makeCatPrefixers(stores, logger);
logger.info(`Debug=on`);
logger.info(
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`
);
logger.info(
`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`
);
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
logger.info(`Debug=on`);
logger.info(
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`,
);
logger.info(`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`);
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
const workItems = [];
for (const store of stores) {
for (const cat of store.categories) {
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
const ctx = { ...baseCtx, config, logger, http };
const prevDb = loadCategoryDb(logger, ctx);
workItems.push({ ctx, prevDb });
}
}
const workItems = [];
for (const store of stores) {
for (const cat of store.categories) {
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
const ctx = { ...baseCtx, config, logger, http };
const prevDb = loadCategoryDb(logger, ctx);
workItems.push({ ctx, prevDb });
}
}
// Host-level serialization: never run two categories from the same host concurrently.
const maxWorkers = Math.min(config.categoryConcurrency, workItems.length);
const queue = workItems.slice();
const inflightHosts = new Set();
// Host-level serialization: never run two categories from the same host concurrently.
const maxWorkers = Math.min(config.categoryConcurrency, workItems.length);
const queue = workItems.slice();
const inflightHosts = new Set();
async function runOne(w) {
try {
await discoverAndScanCategory(w.ctx, w.prevDb, report);
} catch (e) {
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
async function runOne(w) {
try {
await discoverAndScanCategory(w.ctx, w.prevDb, report);
} catch (e) {
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
// Keep it loud in logs, but do not fail the entire run.
logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`);
}
}
// Keep it loud in logs, but do not fail the entire run.
logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`);
}
}
async function worker() {
while (true) {
if (queue.length === 0) return;
async function worker() {
while (true) {
if (queue.length === 0) return;
// Pick next item whose host isn't currently running.
const idx = queue.findIndex((w) => {
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
return host && !inflightHosts.has(host);
});
// Pick next item whose host isn't currently running.
const idx = queue.findIndex((w) => {
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
return host && !inflightHosts.has(host);
});
if (idx === -1) {
// Nothing available right now; wait a bit.
await sleep(50);
continue;
}
if (idx === -1) {
// Nothing available right now; wait a bit.
await sleep(50);
continue;
}
const w = queue.splice(idx, 1)[0];
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
const w = queue.splice(idx, 1)[0];
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
inflightHosts.add(host);
try {
await runOne(w);
} finally {
inflightHosts.delete(host);
}
}
}
inflightHosts.add(host);
try {
await runOne(w);
} finally {
inflightHosts.delete(host);
}
}
}
const workers = [];
for (let i = 0; i < maxWorkers; i++) workers.push(worker());
await Promise.all(workers);
const workers = [];
for (let i = 0; i < maxWorkers; i++) workers.push(worker());
await Promise.all(workers);
return report;
return report;
}
module.exports = { runAllStores };

View file

@ -1,19 +1,19 @@
"use strict";
const C = {
reset: "\x1b[0m",
dim: "\x1b[2m",
bold: "\x1b[1m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
cyan: "\x1b[36m",
gray: "\x1b[90m",
reset: "\x1b[0m",
dim: "\x1b[2m",
bold: "\x1b[1m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
cyan: "\x1b[36m",
gray: "\x1b[90m",
};
function color(s, code, enabled) {
if (!enabled) return String(s);
return String(code || "") + String(s) + C.reset;
if (!enabled) return String(s);
return String(code || "") + String(s) + C.reset;
}
module.exports = { C, color };

View file

@ -1,86 +1,86 @@
"use strict";
function clampInt(v, def, min, max) {
if (def === null && (v === null || v === undefined)) return null;
const n = Number.parseInt(v ?? "", 10);
if (!Number.isFinite(n)) return def;
return Math.max(min, Math.min(max, n));
if (def === null && (v === null || v === undefined)) return null;
const n = Number.parseInt(v ?? "", 10);
if (!Number.isFinite(n)) return def;
return Math.max(min, Math.min(max, n));
}
function parseArgs(argv) {
let debug = false;
let maxPages = null;
let concurrency = null;
let staggerMs = null;
let guess = null;
let step = null;
let dataDir = null;
let reportDir = null;
let debug = false;
let maxPages = null;
let concurrency = null;
let staggerMs = null;
let guess = null;
let step = null;
let dataDir = null;
let reportDir = null;
const positional = [];
const positional = [];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--debug" || a === "-d") {
debug = true;
continue;
}
if (a === "--debug" || a === "-d") {
debug = true;
continue;
}
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
maxPages = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
maxPages = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
concurrency = clampInt(argv[i + 1], null, 1, 64);
i++;
continue;
}
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
concurrency = clampInt(argv[i + 1], null, 1, 64);
i++;
continue;
}
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
i++;
continue;
}
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
i++;
continue;
}
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
guess = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
guess = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
step = clampInt(argv[i + 1], null, 1, 500);
i++;
continue;
}
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
step = clampInt(argv[i + 1], null, 1, 500);
i++;
continue;
}
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
dataDir = String(argv[i + 1]);
i++;
continue;
}
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
dataDir = String(argv[i + 1]);
i++;
continue;
}
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
reportDir = String(argv[i + 1]);
i++;
continue;
}
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
reportDir = String(argv[i + 1]);
i++;
continue;
}
if (!String(a).startsWith("-")) positional.push(a);
}
if (!String(a).startsWith("-")) positional.push(a);
}
if (maxPages === null) {
const cand = positional.find((x) => /^\d+$/.test(String(x)));
if (cand) {
const n = Number.parseInt(cand, 10);
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
}
}
if (maxPages === null) {
const cand = positional.find((x) => /^\d+$/.test(String(x)));
if (cand) {
const n = Number.parseInt(cand, 10);
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
}
}
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
}
module.exports = { clampInt, parseArgs };

View file

@ -3,24 +3,24 @@
const { setTimeout: sleep } = require("timers/promises");
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
const out = new Array(arr.length);
let next = 0;
const out = new Array(arr.length);
let next = 0;
async function worker(workerId) {
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
while (true) {
const i = next++;
if (i >= arr.length) return;
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
out[i] = await fn(arr[i], i);
}
}
async function worker(workerId) {
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
while (true) {
const i = next++;
if (i >= arr.length) return;
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
out[i] = await fn(arr[i], i);
}
}
const w = Math.min(concurrency, arr.length);
const workers = [];
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
await Promise.all(workers);
return out;
const w = Math.min(concurrency, arr.length);
const workers = [];
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
await Promise.all(workers);
return out;
}
module.exports = { parallelMapStaggered };

View file

@ -1,12 +1,12 @@
"use strict";
function humanBytes(n) {
if (!Number.isFinite(n) || n <= 0) return "0B";
if (n < 1024) return `${n}B`;
const kb = n / 1024;
if (kb < 1024) return `${kb.toFixed(1)}KB`;
const mb = kb / 1024;
return `${mb.toFixed(1)}MB`;
if (!Number.isFinite(n) || n <= 0) return "0B";
if (n < 1024) return `${n}B`;
const kb = n / 1024;
if (kb < 1024) return `${kb.toFixed(1)}KB`;
const mb = kb / 1024;
return `${mb.toFixed(1)}MB`;
}
module.exports = { humanBytes };

View file

@ -1,141 +1,137 @@
"use strict";
function stripTags(s) {
return String(s).replace(/<[^>]*>/g, "");
return String(s).replace(/<[^>]*>/g, "");
}
function cleanText(s) {
return String(s)
.replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ")
.trim();
return String(s)
.replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function decodeHtml(s) {
return String(s)
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#039;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&nbsp;/g, " ")
.replace(/&laquo;/g, "«")
.replace(/&raquo;/g, "»");
return String(s)
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#039;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&nbsp;/g, " ")
.replace(/&laquo;/g, "«")
.replace(/&raquo;/g, "»");
}
function escapeRe(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function extractHtmlAttr(html, attrName) {
const re = new RegExp(
`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`,
"i"
);
const m = re.exec(html);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
const re = new RegExp(`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`, "i");
const m = re.exec(html);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
}
function pickFirstUrlFromSrcset(srcset) {
const s = String(srcset || "").trim();
if (!s) return "";
const first = (s.split(",")[0] || "").trim();
const url = (first.split(/\s+/)[0] || "").trim();
return url.replace(/^["']|["']$/g, "");
const s = String(srcset || "").trim();
if (!s) return "";
const first = (s.split(",")[0] || "").trim();
const url = (first.split(/\s+/)[0] || "").trim();
return url.replace(/^["']|["']$/g, "");
}
function normalizeMaybeRelativeUrl(raw, baseUrl) {
const r = String(raw || "").trim();
if (!r) return "";
let u = r;
if (u.startsWith("//")) u = `https:${u}`;
try {
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
} catch {
return u;
}
const r = String(raw || "").trim();
if (!r) return "";
let u = r;
if (u.startsWith("//")) u = `https:${u}`;
try {
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
} catch {
return u;
}
}
function resolveShopifyWidthPlaceholder(url, tag) {
const s = String(url || "");
if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s;
const s = String(url || "");
if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s;
// Pick a reasonable width from data-widths if available
let w = 400;
const dw = extractHtmlAttr(tag, "data-widths");
if (dw) {
try {
const arr = JSON.parse(dw);
if (Array.isArray(arr) && arr.length) {
if (arr.includes(400)) w = 400;
else if (arr.includes(360)) w = 360;
else w = arr[0];
}
} catch {}
}
// Pick a reasonable width from data-widths if available
let w = 400;
const dw = extractHtmlAttr(tag, "data-widths");
if (dw) {
try {
const arr = JSON.parse(dw);
if (Array.isArray(arr) && arr.length) {
if (arr.includes(400)) w = 400;
else if (arr.includes(360)) w = 360;
else w = arr[0];
}
} catch {}
}
return s
.replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`)
.replace(/_\{width\}(x)/gi, `_${w}$1`)
.replace(/%7Bwidth%7D/gi, String(w))
.replace(/\{width\}/gi, String(w));
return s
.replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`)
.replace(/_\{width\}(x)/gi, `_${w}$1`)
.replace(/%7Bwidth%7D/gi, String(w))
.replace(/\{width\}/gi, String(w));
}
function extractFirstImgUrl(html, baseUrl) {
const s = String(html || "");
const m = s.match(/<img\b[^>]*>/i);
if (!m) return "";
const s = String(html || "");
const m = s.match(/<img\b[^>]*>/i);
if (!m) return "";
const tag = m[0];
const tag = m[0];
const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"];
const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"];
for (const a of attrs) {
let v = extractHtmlAttr(tag, a);
if (!v) continue;
for (const a of attrs) {
let v = extractHtmlAttr(tag, a);
if (!v) continue;
v = decodeHtml(String(v)).trim();
if (!v) continue;
v = decodeHtml(String(v)).trim();
if (!v) continue;
const isSrcset = a.toLowerCase().includes("srcset");
if (isSrcset) v = pickFirstUrlFromSrcset(v);
v = String(v || "").trim();
if (!v) continue;
const isSrcset = a.toLowerCase().includes("srcset");
if (isSrcset) v = pickFirstUrlFromSrcset(v);
v = String(v || "").trim();
if (!v) continue;
if (/^data:/i.test(v)) continue;
if (/^data:/i.test(v)) continue;
// If this attr is a template URL, prefer trying srcset next
if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue;
// If this attr is a template URL, prefer trying srcset next
if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue;
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs;
}
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs;
}
// Fallback: accept template URLs but force a width
for (const a of ["data-src", "src"]) {
let v = extractHtmlAttr(tag, a);
if (!v) continue;
v = decodeHtml(String(v)).trim();
if (!v || /^data:/i.test(v)) continue;
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs;
}
// Fallback: accept template URLs but force a width
for (const a of ["data-src", "src"]) {
let v = extractHtmlAttr(tag, a);
if (!v) continue;
v = decodeHtml(String(v)).trim();
if (!v || /^data:/i.test(v)) continue;
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
abs = resolveShopifyWidthPlaceholder(abs, tag);
if (abs) return abs;
}
return "";
return "";
}
module.exports = {
stripTags,
cleanText,
decodeHtml,
escapeRe,
extractHtmlAttr,
extractFirstImgUrl,
stripTags,
cleanText,
decodeHtml,
escapeRe,
extractHtmlAttr,
extractFirstImgUrl,
};

View file

@ -1,21 +1,23 @@
"use strict";
function normPrice(p) {
return String(p || "").trim().replace(/\s+/g, "");
return String(p || "")
.trim()
.replace(/\s+/g, "");
}
function priceToNumber(p) {
const s = String(p || "");
const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN;
const s = String(p || "");
const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN;
}
function salePctOff(oldPriceStr, newPriceStr) {
const oldN = priceToNumber(oldPriceStr);
const newN = priceToNumber(newPriceStr);
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
if (newN >= oldN) return null;
return Math.round(((oldN - newN) / oldN) * 100);
const oldN = priceToNumber(oldPriceStr);
const newN = priceToNumber(newPriceStr);
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
if (newN >= oldN) return null;
return Math.round(((oldN - newN) / oldN) * 100);
}
module.exports = { normPrice, priceToNumber, salePctOff };

View file

@ -2,72 +2,71 @@
"use strict";
function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
}
function idToCspc6(idDigits) {
const s = String(idDigits || "").trim();
if (!/^\d{1,6}$/.test(s)) return "";
return s.padStart(6, "0");
const s = String(idDigits || "").trim();
if (!/^\d{1,6}$/.test(s)) return "";
return s.padStart(6, "0");
}
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function normalizeUpcDigits(v) {
const m = String(v ?? "").match(/\b(\d{12,14})\b/);
return m ? m[1] : "";
const m = String(v ?? "").match(/\b(\d{12,14})\b/);
return m ? m[1] : "";
}
// CHANGE: allow 1-11 digits so BCL 3-digit ids like id:141 are preserved
function normalizeIdDigits(v) {
const m = String(v ?? "").match(/\b(\d{1,11})\b/);
return m ? m[1] : "";
const m = String(v ?? "").match(/\b(\d{1,11})\b/);
return m ? m[1] : "";
}
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
function makeSyntheticSkuKey({ storeLabel, url }) {
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
}
/* ---------------- NEW: SKU quality helpers ---------------- */
function skuQuality(v) {
const s = String(v ?? "").trim();
if (!s) return 0; // missing
if (/^u:/i.test(s)) return 0; // synthetic
if (normalizeCspc(s)) return 3; // best (6-digit CSPC)
if (/^upc:/i.test(s)) return 2;
if (/^id:/i.test(s)) return 2;
return 1; // explicit non-synthetic string
const s = String(v ?? "").trim();
if (!s) return 0; // missing
if (/^u:/i.test(s)) return 0; // synthetic
if (normalizeCspc(s)) return 3; // best (6-digit CSPC)
if (/^upc:/i.test(s)) return 2;
if (/^id:/i.test(s)) return 2;
return 1; // explicit non-synthetic string
}
// Prefer higher quality; on ties keep existing (stable) value
function pickBetterSku(newSku, oldSku) {
const a = String(newSku ?? "").trim();
const b = String(oldSku ?? "").trim();
const qa = skuQuality(a);
const qb = skuQuality(b);
if (qa > qb) return a;
if (qb > qa) return b;
return b || a;
const a = String(newSku ?? "").trim();
const b = String(oldSku ?? "").trim();
const qa = skuQuality(a);
const qb = skuQuality(b);
if (qa > qb) return a;
if (qb > qa) return b;
return b || a;
}
// Only fetch product pages when missing/synthetic
function needsSkuDetail(sku) {
const s = String(sku ?? "").trim();
return !s || /^u:/i.test(s);
const s = String(sku ?? "").trim();
return !s || /^u:/i.test(s);
}
/**
@ -79,34 +78,34 @@ function needsSkuDetail(sku) {
* - else => u:<fnv(store|url)> (old recipe)
*/
function normalizeSkuKey(v, { storeLabel, url } = {}) {
const raw = String(v ?? "").trim();
const raw = String(v ?? "").trim();
const cspc = normalizeCspc(raw);
if (cspc) return cspc;
const cspc = normalizeCspc(raw);
if (cspc) return cspc;
// NEW: only if explicitly tagged, so legacy behavior doesn't change
if (/^upc:/i.test(raw)) {
const upc = normalizeUpcDigits(raw);
return upc ? `upc:${upc}` : "";
}
if (/^id:/i.test(raw)) {
const id = normalizeIdDigits(raw);
if (!id) return "";
const cspc = idToCspc6(id);
return cspc ? cspc : `id:${id}`;
}
// NEW: only if explicitly tagged, so legacy behavior doesn't change
if (/^upc:/i.test(raw)) {
const upc = normalizeUpcDigits(raw);
return upc ? `upc:${upc}` : "";
}
if (/^id:/i.test(raw)) {
const id = normalizeIdDigits(raw);
if (!id) return "";
const cspc = idToCspc6(id);
return cspc ? cspc : `id:${id}`;
}
if (raw.startsWith("u:")) return raw;
if (raw.startsWith("u:")) return raw;
const syn = makeSyntheticSkuKey({ storeLabel, url });
return syn || "";
const syn = makeSyntheticSkuKey({ storeLabel, url });
return syn || "";
}
module.exports = {
normalizeCspc,
normalizeSkuKey,
makeSyntheticSkuKey,
skuQuality,
pickBetterSku,
needsSkuDetail,
normalizeCspc,
normalizeSkuKey,
makeSyntheticSkuKey,
skuQuality,
pickBetterSku,
needsSkuDetail,
};

View file

@ -6,203 +6,200 @@ const path = require("path");
/* ---------------- Union-Find (undirected grouping) ---------------- */
class DSU {
constructor() {
this.parent = new Map();
this.rank = new Map();
}
_add(x) {
if (!this.parent.has(x)) {
this.parent.set(x, x);
this.rank.set(x, 0);
}
}
find(x) {
x = String(x || "").trim();
if (!x) return "";
this._add(x);
let p = this.parent.get(x);
if (p !== x) {
p = this.find(p);
this.parent.set(x, p);
}
return p;
}
union(a, b) {
a = String(a || "").trim();
b = String(b || "").trim();
if (!a || !b || a === b) return;
const ra = this.find(a);
const rb = this.find(b);
if (!ra || !rb || ra === rb) return;
constructor() {
this.parent = new Map();
this.rank = new Map();
}
_add(x) {
if (!this.parent.has(x)) {
this.parent.set(x, x);
this.rank.set(x, 0);
}
}
find(x) {
x = String(x || "").trim();
if (!x) return "";
this._add(x);
let p = this.parent.get(x);
if (p !== x) {
p = this.find(p);
this.parent.set(x, p);
}
return p;
}
union(a, b) {
a = String(a || "").trim();
b = String(b || "").trim();
if (!a || !b || a === b) return;
const ra = this.find(a);
const rb = this.find(b);
if (!ra || !rb || ra === rb) return;
const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0;
const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0;
if (rka < rkb) this.parent.set(ra, rb);
else if (rkb < rka) this.parent.set(rb, ra);
else {
this.parent.set(rb, ra);
this.rank.set(ra, rka + 1);
}
}
if (rka < rkb) this.parent.set(ra, rb);
else if (rkb < rka) this.parent.set(rb, ra);
else {
this.parent.set(rb, ra);
this.rank.set(ra, rka + 1);
}
}
}
function isUnknownSkuKey(k) {
return String(k || "").startsWith("u:");
return String(k || "").startsWith("u:");
}
function isNumericSku(k) {
return /^\d+$/.test(String(k || "").trim());
return /^\d+$/.test(String(k || "").trim());
}
function isUpcSku(k) {
const s = String(k || "").trim();
if (s.startsWith("upc:")) return true;
return /^\d{12,14}$/.test(s); // keep legacy support
const s = String(k || "").trim();
if (s.startsWith("upc:")) return true;
return /^\d{12,14}$/.test(s); // keep legacy support
}
function compareSku(a, b) {
a = String(a || "").trim();
b = String(b || "").trim();
if (a === b) return 0;
a = String(a || "").trim();
b = String(b || "").trim();
if (a === b) return 0;
const au = isUnknownSkuKey(a);
const bu = isUnknownSkuKey(b);
if (au !== bu) return au ? 1 : -1; // real first
const au = isUnknownSkuKey(a);
const bu = isUnknownSkuKey(b);
if (au !== bu) return au ? 1 : -1; // real first
const aUpc = isUpcSku(a);
const bUpc = isUpcSku(b);
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys
const aUpc = isUpcSku(a);
const bUpc = isUpcSku(b);
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys
const an = isNumericSku(a);
const bn = isNumericSku(b);
if (an && bn) {
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
const an = isNumericSku(a);
const bn = isNumericSku(b);
if (an && bn) {
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
return a < b ? -1 : 1;
return a < b ? -1 : 1;
}
/* ---------------- File discovery ---------------- */
function tryReadJson(file) {
try {
const txt = fs.readFileSync(file, "utf8");
return JSON.parse(txt);
} catch {
return null;
}
try {
const txt = fs.readFileSync(file, "utf8");
return JSON.parse(txt);
} catch {
return null;
}
}
function defaultSkuLinksCandidates(dbDir) {
const out = [];
const out = [];
// 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db)
if (dbDir) {
out.push(path.join(dbDir, "..", "sku_links.json"));
}
// 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db)
if (dbDir) {
out.push(path.join(dbDir, "..", "sku_links.json"));
}
// 2) repo root conventional location
out.push(path.join(process.cwd(), "data", "sku_links.json"));
// 2) repo root conventional location
out.push(path.join(process.cwd(), "data", "sku_links.json"));
// 3) common worktree location
out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json"));
// 3) common worktree location
out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json"));
return out;
return out;
}
function findSkuLinksFile({ dbDir, mappingFile } = {}) {
// env override
const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim();
if (env) return env;
// env override
const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim();
if (env) return env;
if (mappingFile) return mappingFile;
if (mappingFile) return mappingFile;
for (const f of defaultSkuLinksCandidates(dbDir)) {
if (!f) continue;
try {
if (fs.existsSync(f)) return f;
} catch {
// ignore
}
}
for (const f of defaultSkuLinksCandidates(dbDir)) {
if (!f) continue;
try {
if (fs.existsSync(f)) return f;
} catch {
// ignore
}
}
return "";
return "";
}
function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
}
/* ---------------- Public API ---------------- */
function buildSkuMapFromLinksArray(links) {
const dsu = new DSU();
const all = new Set();
const dsu = new DSU();
const all = new Set();
for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue;
for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue;
all.add(a);
all.add(b);
all.add(a);
all.add(b);
// undirected union => hardened vs A->B->C and cycles
dsu.union(a, b);
}
// undirected union => hardened vs A->B->C and cycles
dsu.union(a, b);
}
// root -> Set(members)
const byRoot = new Map();
for (const s of all) {
const r = dsu.find(s);
if (!r) continue;
let set = byRoot.get(r);
if (!set) byRoot.set(r, (set = new Set()));
set.add(s);
}
// root -> Set(members)
const byRoot = new Map();
for (const s of all) {
const r = dsu.find(s);
if (!r) continue;
let set = byRoot.get(r);
if (!set) byRoot.set(r, (set = new Set()));
set.add(s);
}
// root -> canonical rep
const repByRoot = new Map();
for (const [root, members] of byRoot.entries()) {
const arr = Array.from(members);
arr.sort(compareSku);
repByRoot.set(root, arr[0] || root);
}
// root -> canonical rep
const repByRoot = new Map();
for (const [root, members] of byRoot.entries()) {
const arr = Array.from(members);
arr.sort(compareSku);
repByRoot.set(root, arr[0] || root);
}
// sku -> canonical rep
const canonBySku = new Map();
for (const [root, members] of byRoot.entries()) {
const rep = repByRoot.get(root) || root;
for (const s of members) canonBySku.set(s, rep);
}
// sku -> canonical rep
const canonBySku = new Map();
for (const [root, members] of byRoot.entries()) {
const rep = repByRoot.get(root) || root;
for (const s of members) canonBySku.set(s, rep);
}
function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku);
if (!s) return s;
return canonBySku.get(s) || s;
}
function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku);
if (!s) return s;
return canonBySku.get(s) || s;
}
return { canonicalSku, _canonBySku: canonBySku };
return { canonicalSku, _canonBySku: canonBySku };
}
function loadSkuMap({ dbDir, mappingFile } = {}) {
const file = findSkuLinksFile({ dbDir, mappingFile });
if (!file) {
return buildSkuMapFromLinksArray([]);
}
const file = findSkuLinksFile({ dbDir, mappingFile });
if (!file) {
return buildSkuMapFromLinksArray([]);
}
const obj = tryReadJson(file);
const links = Array.isArray(obj?.links) ? obj.links : [];
return buildSkuMapFromLinksArray(links);
const obj = tryReadJson(file);
const links = Array.isArray(obj?.links) ? obj.links : [];
return buildSkuMapFromLinksArray(links);
}
module.exports = { loadSkuMap };

View file

@ -1,29 +1,29 @@
"use strict";
function padRight(s, n) {
s = String(s);
return s.length >= n ? s : s + " ".repeat(n - s.length);
s = String(s);
return s.length >= n ? s : s + " ".repeat(n - s.length);
}
function padLeft(s, n) {
s = String(s);
return s.length >= n ? s : " ".repeat(n - s.length) + s;
s = String(s);
return s.length >= n ? s : " ".repeat(n - s.length) + s;
}
function stripAnsi(s) {
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
}
function padRightV(s, n) {
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : s + " ".repeat(n - w);
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : s + " ".repeat(n - w);
}
function padLeftV(s, n) {
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : " ".repeat(n - w) + s;
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : " ".repeat(n - w) + s;
}
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };

View file

@ -3,11 +3,11 @@
const { cleanText, decodeHtml } = require("./html");
function sanitizeName(s) {
return cleanText(decodeHtml(String(s || "")))
.replace(/['"’“”`´]/g, "")
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
.replace(/\s+/g, " ")
.trim();
return cleanText(decodeHtml(String(s || "")))
.replace(/['"’“”`´]/g, "")
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
.replace(/\s+/g, " ")
.trim();
}
module.exports = { sanitizeName };

View file

@ -1,16 +1,19 @@
"use strict";
function ts(d = new Date()) {
const h = String(d.getHours()).padStart(2, "0");
const m = String(d.getMinutes()).padStart(2, "0");
const s = String(d.getSeconds()).padStart(2, "0");
const ms = String(d.getMilliseconds()).padStart(3, "0");
return `${h}:${m}:${s}.${ms}`;
const h = String(d.getHours()).padStart(2, "0");
const m = String(d.getMinutes()).padStart(2, "0");
const s = String(d.getSeconds()).padStart(2, "0");
const ms = String(d.getMilliseconds()).padStart(3, "0");
return `${h}:${m}:${s}.${ms}`;
}
function isoTimestampFileSafe(d = new Date()) {
// 2026-01-16T21-27-01Z
return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z");
// 2026-01-16T21-27-01Z
return d
.toISOString()
.replace(/:/g, "-")
.replace(/\.\d{3}Z$/, "Z");
}
module.exports = { ts, isoTimestampFileSafe };

View file

@ -1,50 +1,56 @@
"use strict";
function normalizeBaseUrl(startUrl) {
try {
const u = new URL(startUrl);
u.hash = "";
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
try {
const u = new URL(startUrl);
u.hash = "";
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
return u.toString();
} catch {
return startUrl;
}
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
return u.toString();
} catch {
return startUrl;
}
}
function makePageUrl(baseUrl, pageNum) {
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
const u = new URL(baseUrl);
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
u.pathname = u.pathname + `page/${pageNum}/`;
u.hash = "";
return u.toString();
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
const u = new URL(baseUrl);
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
u.pathname = u.pathname + `page/${pageNum}/`;
u.hash = "";
return u.toString();
}
function makePageUrlForCtx(ctx, baseUrl, pageNum) {
const fn = ctx?.store?.makePageUrl;
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
const fn = ctx?.store?.makePageUrl;
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
}
function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) u.searchParams.set(paramName, "1");
else u.searchParams.set(paramName, String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) u.searchParams.set(paramName, "1");
else u.searchParams.set(paramName, String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
u.searchParams.set("page", String(Math.max(1, pageNum)));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
u.searchParams.set("page", String(Math.max(1, pageNum)));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage };
module.exports = {
normalizeBaseUrl,
makePageUrl,
makePageUrlForCtx,
makePageUrlQueryParam,
makePageUrlShopifyQueryPage,
};

View file

@ -8,50 +8,50 @@ const { decodeHtml, stripTags, cleanText } = require("./html");
* - Else uses the normal price bdi/span content.
*/
function extractPriceFromTmbBlock(block) {
const span = matchFirstPriceSpan(block);
if (!span) return "";
const span = matchFirstPriceSpan(block);
if (!span) return "";
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
if (bdis.length) {
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
}
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
if (bdis.length) {
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
}
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
const text = cleanText(decodeHtml(stripTags(scope)));
const num = text.match(/(\d+(?:\.\d{2})?)/);
if (sym && num) return `${sym[1].trim()}${num[1]}`;
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
const text = cleanText(decodeHtml(stripTags(scope)));
const num = text.match(/(\d+(?:\.\d{2})?)/);
if (sym && num) return `${sym[1].trim()}${num[1]}`;
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
return m ? m[0].replace(/\s+/g, "") : "";
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
return m ? m[0].replace(/\s+/g, "") : "";
}
function matchFirstPriceSpan(html) {
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<span", i);
const nextClose = html.indexOf("</span>", i);
if (nextClose === -1) break;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<span", i);
const nextClose = html.indexOf("</span>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 5;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 7;
}
return "";
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 5;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 7;
}
return "";
}
module.exports = { extractPriceFromTmbBlock };

View file

@ -22,320 +22,310 @@ const path = require("path");
/* ---------------- helpers ---------------- */
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
fs.mkdirSync(dir, { recursive: true });
}
function readJson(p) {
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
}
function listDbFiles() {
const dir = path.join(process.cwd(), "data", "db");
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dir, e.name));
} catch {
return [];
}
const dir = path.join(process.cwd(), "data", "db");
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dir, e.name));
} catch {
return [];
}
}
function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function hasRealSku6(s) {
return /\b\d{6}\b/.test(String(s || ""));
return /\b\d{6}\b/.test(String(s || ""));
}
function isSyntheticSkuKey(k) {
return String(k || "").startsWith("u:");
return String(k || "").startsWith("u:");
}
function storeKeyFromDbPath(abs) {
const base = path.basename(abs);
const m = base.match(/^([^_]+)__.+\.json$/i);
const k = m ? m[1] : base.replace(/\.json$/i, "");
return String(k || "").toLowerCase();
const base = path.basename(abs);
const m = base.match(/^([^_]+)__.+\.json$/i);
const k = m ? m[1] : base.replace(/\.json$/i, "");
return String(k || "").toLowerCase();
}
/* ---------------- sku helpers ---------------- */
function loadSkuMapOrNull() {
try {
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
} catch {
return null;
}
try {
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
} catch {
return null;
}
}
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try {
// eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${storeLabel}:${url}`;
return "";
}
try {
// eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${storeLabel}:${url}`;
return "";
}
}
function canonicalize(k, skuMap) {
if (!k) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") {
return String(skuMap.canonicalSku(k) || k);
}
return k;
if (!k) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") {
return String(skuMap.canonicalSku(k) || k);
}
return k;
}
/* ---------------- grouping ---------------- */
const BC_STORE_KEYS = new Set([
"gull",
"strath",
"bcl",
"legacy",
"legacyliquor",
"tudor",
"vessel",
"vintage",
"arc"
]);
const BC_STORE_KEYS = new Set(["gull", "strath", "bcl", "legacy", "legacyliquor", "tudor", "vessel", "vintage", "arc"]);
function groupAllowsStore(group, storeKey) {
const k = String(storeKey || "").toLowerCase();
if (group === "bc") return BC_STORE_KEYS.has(k);
if (group === "ab") return !BC_STORE_KEYS.has(k);
return true; // all
const k = String(storeKey || "").toLowerCase();
if (group === "bc") return BC_STORE_KEYS.has(k);
if (group === "ab") return !BC_STORE_KEYS.has(k);
return true; // all
}
/* ---------------- args ---------------- */
function parseArgs(argv) {
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--require-all") out.requireAll = true;
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
}
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
return out;
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--require-all") out.requireAll = true;
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
}
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
return out;
}
/* ---------------- main ---------------- */
function main() {
const args = parseArgs(process.argv.slice(2));
const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const args = parseArgs(process.argv.slice(2));
const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
ensureDir(path.dirname(outPath));
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
ensureDir(path.dirname(outPath));
const dbFiles = listDbFiles();
if (!dbFiles.length) {
console.error("No DB files found");
process.exitCode = 2;
return;
}
const dbFiles = listDbFiles();
if (!dbFiles.length) {
console.error("No DB files found");
process.exitCode = 2;
return;
}
const skuMap = loadSkuMapOrNull();
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files`);
const skuMap = loadSkuMapOrNull();
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files`);
const storeToCanon = new Map(); // storeKey -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
const storeToCanon = new Map(); // storeKey -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
let liveRows = 0;
let removedRows = 0;
let liveRows = 0;
let removedRows = 0;
for (const abs of dbFiles.sort()) {
const obj = readJson(abs);
if (!obj) continue;
for (const abs of dbFiles.sort()) {
const obj = readJson(abs);
if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
if (!storeLabel) continue;
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
if (!storeLabel) continue;
const storeKey = storeKeyFromDbPath(abs);
if (!groupAllowsStore(args.group, storeKey)) continue;
const storeKey = storeKeyFromDbPath(abs);
if (!groupAllowsStore(args.group, storeKey)) continue;
if (!storeToCanon.has(storeKey)) {
storeToCanon.set(storeKey, new Set());
}
if (!storeToCanon.has(storeKey)) {
storeToCanon.set(storeKey, new Set());
}
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : [];
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : [];
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
for (const it of items) {
if (!it) continue;
if (it.removed) {
removedRows++;
continue;
}
liveRows++;
for (const it of items) {
if (!it) continue;
if (it.removed) {
removedRows++;
continue;
}
liveRows++;
const skuKey = normalizeSkuKeyOrEmpty({
skuRaw: it.sku,
storeLabel,
url: it.url,
});
if (!skuKey) continue;
const skuKey = normalizeSkuKeyOrEmpty({
skuRaw: it.sku,
storeLabel,
url: it.url,
});
if (!skuKey) continue;
const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) continue;
const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) continue;
storeToCanon.get(storeKey).add(canonSku);
storeToCanon.get(storeKey).add(canonSku);
let agg = canonAgg.get(canonSku);
if (!agg) {
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
canonAgg.set(canonSku, agg);
}
let agg = canonAgg.get(canonSku);
if (!agg) {
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
canonAgg.set(canonSku, agg);
}
agg.stores.add(storeKey);
agg.stores.add(storeKey);
const priceNum = priceToNumber(it.price);
if (priceNum !== null) {
const prev = agg.storeMin.get(storeKey);
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
}
const priceNum = priceToNumber(it.price);
if (priceNum !== null) {
const prev = agg.storeMin.get(storeKey);
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
}
const listing = {
canonSku,
skuKey,
skuRaw: String(it.sku || ""),
name: String(it.name || ""),
price: String(it.price || ""),
priceNum,
url: String(it.url || ""),
storeKey,
storeLabel,
categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel,
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
};
const listing = {
canonSku,
skuKey,
skuRaw: String(it.sku || ""),
name: String(it.name || ""),
price: String(it.price || ""),
priceNum,
url: String(it.url || ""),
storeKey,
storeLabel,
categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel,
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
};
agg.listings.push(listing);
agg.listings.push(listing);
if (priceNum !== null) {
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
agg.cheapest = { priceNum, item: listing };
}
}
}
}
if (priceNum !== null) {
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
agg.cheapest = { priceNum, item: listing };
}
}
}
}
const stores = [...storeToCanon.keys()].sort();
const storeCount = stores.length;
const stores = [...storeToCanon.keys()].sort();
const storeCount = stores.length;
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
function pickRepresentative(agg) {
const preferred = agg.listings
.filter((l) => l.hasRealSku6)
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
function pickRepresentative(agg) {
const preferred = agg.listings
.filter((l) => l.hasRealSku6)
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
if (preferred.length) return preferred[0];
if (agg.cheapest) return agg.cheapest.item;
return agg.listings[0] || null;
}
if (preferred.length) return preferred[0];
if (agg.cheapest) return agg.cheapest.item;
return agg.listings[0] || null;
}
const rows = [];
const rows = [];
for (const [canonSku, agg] of canonAgg.entries()) {
const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s));
for (const [canonSku, agg] of canonAgg.entries()) {
const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s));
const storePrices = {};
for (const s of stores) {
const p = agg.storeMin.get(s);
if (Number.isFinite(p)) storePrices[s] = p;
}
const storePrices = {};
for (const s of stores) {
const p = agg.storeMin.get(s);
if (Number.isFinite(p)) storePrices[s] = p;
}
rows.push({
canonSku,
storeCount: agg.stores.size,
stores: [...agg.stores].sort(),
missingStores,
storePrices, // { [storeKey]: number } min live price per store
representative: rep
? {
name: rep.name,
price: rep.price,
priceNum: rep.priceNum,
storeKey: rep.storeKey,
storeLabel: rep.storeLabel,
skuRaw: rep.skuRaw,
skuKey: rep.skuKey,
url: rep.url,
categoryLabel: rep.categoryLabel,
dbFile: rep.dbFile,
}
: null,
cheapest: agg.cheapest
? {
price: agg.cheapest.item.price,
priceNum: agg.cheapest.priceNum,
storeKey: agg.cheapest.item.storeKey,
url: agg.cheapest.item.url,
}
: null,
});
}
rows.push({
canonSku,
storeCount: agg.stores.size,
stores: [...agg.stores].sort(),
missingStores,
storePrices, // { [storeKey]: number } min live price per store
representative: rep
? {
name: rep.name,
price: rep.price,
priceNum: rep.priceNum,
storeKey: rep.storeKey,
storeLabel: rep.storeLabel,
skuRaw: rep.skuRaw,
skuKey: rep.skuKey,
url: rep.url,
categoryLabel: rep.categoryLabel,
dbFile: rep.dbFile,
}
: null,
cheapest: agg.cheapest
? {
price: agg.cheapest.item.price,
priceNum: agg.cheapest.priceNum,
storeKey: agg.cheapest.item.storeKey,
url: agg.cheapest.item.url,
}
: null,
});
}
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
return String(a.canonSku).localeCompare(String(b.canonSku));
});
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
return String(a.canonSku).localeCompare(String(b.canonSku));
});
const filtered = args.requireAll
? rows.filter((r) => r.storeCount === storeCount)
: rows.filter((r) => r.storeCount >= args.minStores);
const filtered = args.requireAll
? rows.filter((r) => r.storeCount === storeCount)
: rows.filter((r) => r.storeCount >= args.minStores);
const top = filtered.slice(0, args.top);
const top = filtered.slice(0, args.top);
const payload = {
generatedAt: new Date().toISOString(),
args: {
top: args.top,
minStores: args.minStores,
requireAll: args.requireAll,
group: args.group,
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
},
storeCount,
stores,
totals: {
liveRows,
removedRows,
canonSkus: canonAgg.size,
outputCount: top.length,
},
rows: top,
};
const payload = {
generatedAt: new Date().toISOString(),
args: {
top: args.top,
minStores: args.minStores,
requireAll: args.requireAll,
group: args.group,
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
},
storeCount,
stores,
totals: {
liveRows,
removedRows,
canonSkus: canonAgg.size,
outputCount: top.length,
},
rows: top,
};
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
}
main();

View file

@ -30,278 +30,278 @@ const fs = require("fs");
const path = require("path");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function gitShowJson(sha, filePath) {
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
});
return JSON.parse(txt);
} catch {
return null;
}
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
});
return JSON.parse(txt);
} catch {
return null;
}
}
function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
}
function readJson(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch {
return null;
}
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch {
return null;
}
}
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
fs.mkdirSync(dir, { recursive: true });
}
function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function pctOff(oldStr, newStr) {
const a = priceToNumber(oldStr);
const b = priceToNumber(newStr);
if (a === null || b === null) return null;
if (a <= 0) return null;
if (b >= a) return 0;
return Math.round(((a - b) / a) * 100);
const a = priceToNumber(oldStr);
const b = priceToNumber(newStr);
if (a === null || b === null) return null;
if (a <= 0) return null;
if (b >= a) return 0;
return Math.round(((a - b) / a) * 100);
}
function htmlEscape(s) {
return String(s ?? "")
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
return String(s ?? "")
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
function normToken(s) {
return String(s || "")
.toLowerCase()
.trim()
.replace(/\s+/g, " ")
.replace(/[^\w:./-]+/g, "");
return String(s || "")
.toLowerCase()
.trim()
.replace(/\s+/g, " ")
.replace(/[^\w:./-]+/g, "");
}
function getFirstParentSha(headSha) {
try {
const out = runGit(["rev-list", "--parents", "-n", "1", headSha]);
const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
}
try {
const out = runGit(["rev-list", "--parents", "-n", "1", headSha]);
const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
}
}
function listChangedDbFiles(fromSha, toSha) {
try {
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter((s) => s && s.endsWith(".json"));
} catch {
return [];
}
try {
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter((s) => s && s.endsWith(".json"));
} catch {
return [];
}
}
function listDbFilesOnDisk() {
const dir = path.join(process.cwd(), "data", "db");
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name));
} catch {
return [];
}
const dir = path.join(process.cwd(), "data", "db");
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name));
} catch {
return [];
}
}
// We reuse your existing canonical SKU mapping logic.
function loadSkuMapOrNull() {
try {
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
} catch {
return null;
}
try {
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
} catch {
return null;
}
}
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try {
// eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
return "";
}
try {
// eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
return "";
}
}
function canonicalize(skuKey, skuMap) {
if (!skuKey) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey);
return skuKey;
if (!skuKey) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey);
return skuKey;
}
function mapDbItems(obj, skuMap, { includeRemoved }) {
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const categoryLabel = String(obj?.categoryLabel || obj?.category || "");
const items = Array.isArray(obj?.items) ? obj.items : [];
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const categoryLabel = String(obj?.categoryLabel || obj?.category || "");
const items = Array.isArray(obj?.items) ? obj.items : [];
const m = new Map(); // canonSku -> item (for this store+category db)
for (const it of items) {
if (!it) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
const m = new Map(); // canonSku -> item (for this store+category db)
for (const it of items) {
if (!it) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url });
const canon = canonicalize(skuKey, skuMap);
if (!canon) continue;
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url });
const canon = canonicalize(skuKey, skuMap);
if (!canon) continue;
m.set(canon, {
canonSku: canon,
skuRaw: String(it.sku || ""),
name: String(it.name || ""),
price: String(it.price || ""),
url: String(it.url || ""),
img: String(it.img || it.image || it.thumb || ""),
removed,
storeLabel,
categoryLabel,
});
}
return m;
m.set(canon, {
canonSku: canon,
skuRaw: String(it.sku || ""),
name: String(it.name || ""),
price: String(it.price || ""),
url: String(it.url || ""),
img: String(it.img || it.image || it.thumb || ""),
removed,
storeLabel,
categoryLabel,
});
}
return m;
}
function diffDb(prevObj, nextObj, skuMap) {
const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true });
const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true });
const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false });
const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false });
const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true });
const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true });
const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false });
const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false });
const newItems = [];
const priceDown = [];
const newItems = [];
const priceDown = [];
for (const [canon, now] of nextLive.entries()) {
const had = prevAll.get(canon);
if (!had) {
newItems.push(now);
continue;
}
}
for (const [canon, now] of nextLive.entries()) {
const had = prevAll.get(canon);
if (!had) {
newItems.push(now);
continue;
}
}
for (const [canon, now] of nextLive.entries()) {
const was = prevLive.get(canon);
if (!was) continue;
for (const [canon, now] of nextLive.entries()) {
const was = prevLive.get(canon);
if (!was) continue;
const a = String(was.price || "");
const b = String(now.price || "");
if (a === b) continue;
const a = String(was.price || "");
const b = String(now.price || "");
if (a === b) continue;
const aN = priceToNumber(a);
const bN = priceToNumber(b);
if (aN === null || bN === null) continue;
if (bN >= aN) continue;
const aN = priceToNumber(a);
const bN = priceToNumber(b);
if (aN === null || bN === null) continue;
if (bN >= aN) continue;
priceDown.push({
...now,
oldPrice: a,
newPrice: b,
pct: pctOff(a, b),
});
}
priceDown.push({
...now,
oldPrice: a,
newPrice: b,
pct: pctOff(a, b),
});
}
return { newItems, priceDown };
return { newItems, priceDown };
}
function buildCurrentIndexes(skuMap) {
const files = listDbFilesOnDisk();
const availability = new Map(); // canonSku -> Set(storeLabel)
const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} }
const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item)
const files = listDbFilesOnDisk();
const availability = new Map(); // canonSku -> Set(storeLabel)
const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} }
const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item)
for (const file of files) {
const obj = readJson(file);
if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || "");
if (!storeLabel) continue;
for (const file of files) {
const obj = readJson(file);
if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || "");
if (!storeLabel) continue;
const live = mapDbItems(obj, skuMap, { includeRemoved: false });
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
const live = mapDbItems(obj, skuMap, { includeRemoved: false });
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
for (const it of live.values()) {
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
availability.get(it.canonSku).add(storeLabel);
for (const it of live.values()) {
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
availability.get(it.canonSku).add(storeLabel);
byStoreCanon.get(storeLabel).set(it.canonSku, it);
byStoreCanon.get(storeLabel).set(it.canonSku, it);
const p = priceToNumber(it.price);
if (p === null) continue;
const p = priceToNumber(it.price);
if (p === null) continue;
const cur = cheapest.get(it.canonSku);
if (!cur) {
cheapest.set(it.canonSku, {
priceNum: p,
stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
});
} else if (p < cur.priceNum) {
cheapest.set(it.canonSku, {
priceNum: p,
stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
});
} else if (p === cur.priceNum) {
cur.stores.add(storeLabel);
}
}
}
const cur = cheapest.get(it.canonSku);
if (!cur) {
cheapest.set(it.canonSku, {
priceNum: p,
stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
});
} else if (p < cur.priceNum) {
cheapest.set(it.canonSku, {
priceNum: p,
stores: new Set([storeLabel]),
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
});
} else if (p === cur.priceNum) {
cur.stores.add(storeLabel);
}
}
}
return { availability, cheapest, byStoreCanon };
return { availability, cheapest, byStoreCanon };
}
function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl }) {
const now = new Date().toISOString();
const now = new Date().toISOString();
function section(titleText, rowsHtml) {
return `
function section(titleText, rowsHtml) {
return `
<div style="margin:16px 0 6px 0;font-weight:700;font-size:16px">${htmlEscape(titleText)}</div>
${rowsHtml || `<div style="color:#666">None</div>`}
`;
}
}
function card(it, extraHtml) {
const img = it.img
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
: "";
const name = htmlEscape(it.name || "");
const store = htmlEscape(it.storeLabel || "");
const cat = htmlEscape(it.categoryLabel || "");
const price = htmlEscape(it.price || "");
const url = htmlEscape(it.url || "");
return `
function card(it, extraHtml) {
const img = it.img
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
: "";
const name = htmlEscape(it.name || "");
const store = htmlEscape(it.storeLabel || "");
const cat = htmlEscape(it.categoryLabel || "");
const price = htmlEscape(it.price || "");
const url = htmlEscape(it.url || "");
return `
<table role="presentation" width="100%" cellpadding="0" cellspacing="0" style="border:1px solid #eee;border-radius:12px;margin:10px 0">
<tr>
<td style="padding:12px;vertical-align:top;width:96px">${img || ""}</td>
@ -315,15 +315,15 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
</tr>
</table>
`;
}
}
const uniqueHtml = uniqueNews.map((it) => card(it)).join("");
const salesHtml = bigSales
.map((it) => {
const pct = Number.isFinite(it.pct) ? it.pct : null;
const oldP = htmlEscape(it.oldPrice || "");
const newP = htmlEscape(it.newPrice || "");
const extra = `
const uniqueHtml = uniqueNews.map((it) => card(it)).join("");
const salesHtml = bigSales
.map((it) => {
const pct = Number.isFinite(it.pct) ? it.pct : null;
const oldP = htmlEscape(it.oldPrice || "");
const newP = htmlEscape(it.newPrice || "");
const extra = `
<div style="margin-top:6px;font-size:13px">
<span style="color:#b00020;text-decoration:line-through">${oldP}</span>
<span style="margin:0 6px;color:#666"></span>
@ -331,11 +331,11 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
${pct !== null ? `<span style="margin-left:8px;color:#137333;font-weight:700">(${pct}% off)</span>` : ""}
</div>
`;
return card({ ...it, price: "" }, extra);
})
.join("");
return card({ ...it, price: "" }, extra);
})
.join("");
const links = `
const links = `
<div style="margin-top:10px;font-size:12px;color:#666">
${commitUrl ? `Commit: <a href="${htmlEscape(commitUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(commitUrl)}</a><br/>` : ""}
${pagesUrl ? `Visualizer: <a href="${htmlEscape(pagesUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(pagesUrl)}</a>` : ""}
@ -343,7 +343,7 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
</div>
`;
return `<!doctype html>
return `<!doctype html>
<html>
<head>
<meta charset="utf-8" />
@ -365,137 +365,137 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
}
function writeGithubOutput(kv) {
const outPath = process.env.GITHUB_OUTPUT;
if (!outPath) return;
const lines = [];
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
const outPath = process.env.GITHUB_OUTPUT;
if (!outPath) return;
const lines = [];
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
}
function main() {
const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const headSha = runGit(["rev-parse", "HEAD"]);
const parentSha = getFirstParentSha(headSha);
if (!parentSha) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 });
return;
}
const headSha = runGit(["rev-parse", "HEAD"]);
const parentSha = getFirstParentSha(headSha);
if (!parentSha) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 });
return;
}
const skuMap = loadSkuMapOrNull();
const skuMap = loadSkuMapOrNull();
const changed = listChangedDbFiles(parentSha, headSha);
if (!changed.length) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 });
return;
}
const changed = listChangedDbFiles(parentSha, headSha);
if (!changed.length) {
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
writeGithubOutput({ should_send: 0 });
return;
}
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
const uniqueNews = [];
const bigSales = [];
const uniqueNews = [];
const bigSales = [];
for (const file of changed) {
const existedBefore = gitFileExistsAtSha(parentSha, file);
const existsNow = gitFileExistsAtSha(headSha, file);
for (const file of changed) {
const existedBefore = gitFileExistsAtSha(parentSha, file);
const existsNow = gitFileExistsAtSha(headSha, file);
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
if (!existedBefore && existsNow) {
continue;
}
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
if (!existedBefore && existsNow) {
continue;
}
const prevObj = gitShowJson(parentSha, file);
const nextObj = gitShowJson(headSha, file);
if (!prevObj && !nextObj) continue;
const prevObj = gitShowJson(parentSha, file);
const nextObj = gitShowJson(headSha, file);
if (!prevObj && !nextObj) continue;
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
for (const it of newItems) {
const stores = availability.get(it.canonSku);
const storeCount = stores ? stores.size : 0;
if (storeCount !== 1) continue;
if (!stores.has(it.storeLabel)) continue;
for (const it of newItems) {
const stores = availability.get(it.canonSku);
const storeCount = stores ? stores.size : 0;
if (storeCount !== 1) continue;
if (!stores.has(it.storeLabel)) continue;
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
uniqueNews.push(cur);
}
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
uniqueNews.push(cur);
}
for (const it of priceDown) {
const pct = it.pct;
if (!Number.isFinite(pct) || pct < 20) continue;
for (const it of priceDown) {
const pct = it.pct;
if (!Number.isFinite(pct) || pct < 20) continue;
const best = cheapest.get(it.canonSku);
if (!best) continue;
const best = cheapest.get(it.canonSku);
if (!best) continue;
const newN = priceToNumber(it.newPrice);
if (newN === null) continue;
const newN = priceToNumber(it.newPrice);
if (newN === null) continue;
if (best.priceNum !== newN) continue;
if (!best.stores.has(it.storeLabel)) continue;
if (best.priceNum !== newN) continue;
if (!best.stores.has(it.storeLabel)) continue;
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
bigSales.push({
...cur,
oldPrice: it.oldPrice,
newPrice: it.newPrice,
pct,
});
}
}
bigSales.push({
...cur,
oldPrice: it.oldPrice,
newPrice: it.newPrice,
pct,
});
}
}
function dedupe(arr) {
const out = [];
const seen = new Set();
for (const it of arr) {
const k = `${it.canonSku}|${it.storeLabel}`;
if (seen.has(k)) continue;
seen.add(k);
out.push(it);
}
return out;
}
function dedupe(arr) {
const out = [];
const seen = new Set();
for (const it of arr) {
const k = `${it.canonSku}|${it.storeLabel}`;
if (seen.has(k)) continue;
seen.add(k);
out.push(it);
}
return out;
}
const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || ""));
const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0));
const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || ""));
const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0));
const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0;
const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0;
const subject = shouldSend
? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales`
: `Spirit Tracker: (no alert)`;
const subject = shouldSend
? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales`
: `Spirit Tracker: (no alert)`;
const ghRepo = process.env.GITHUB_REPOSITORY || "";
const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com";
const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : "";
const pagesUrl = process.env.PAGES_URL || "";
const ghRepo = process.env.GITHUB_REPOSITORY || "";
const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com";
const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : "";
const pagesUrl = process.env.PAGES_URL || "";
const html = renderHtml({
title: "Spirit Tracker Alert",
subtitle: subject,
uniqueNews: uniqueFinal,
bigSales: salesFinal,
commitUrl,
pagesUrl,
});
const html = renderHtml({
title: "Spirit Tracker Alert",
subtitle: subject,
uniqueNews: uniqueFinal,
bigSales: salesFinal,
commitUrl,
pagesUrl,
});
const htmlPath = path.join(reportsDir, "alert.html");
const subjPath = path.join(reportsDir, "alert_subject.txt");
const sendPath = path.join(reportsDir, "alert_should_send.txt");
const htmlPath = path.join(reportsDir, "alert.html");
const subjPath = path.join(reportsDir, "alert_subject.txt");
const sendPath = path.join(reportsDir, "alert_should_send.txt");
fs.writeFileSync(htmlPath, html, "utf8");
fs.writeFileSync(subjPath, subject + "\n", "utf8");
fs.writeFileSync(sendPath, (shouldSend ? "1\n" : "0\n"), "utf8");
fs.writeFileSync(htmlPath, html, "utf8");
fs.writeFileSync(subjPath, subject + "\n", "utf8");
fs.writeFileSync(sendPath, shouldSend ? "1\n" : "0\n", "utf8");
writeGithubOutput({
should_send: shouldSend ? 1 : 0,
subject,
html_path: htmlPath,
});
writeGithubOutput({
should_send: shouldSend ? 1 : 0,
subject,
html_path: htmlPath,
});
}
main();

View file

@ -6,134 +6,134 @@ const fs = require("fs");
const path = require("path");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function listDbFiles(dbDir) {
try {
return fs
.readdirSync(dbDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dbDir, e.name));
} catch {
return [];
}
try {
return fs
.readdirSync(dbDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dbDir, e.name));
} catch {
return [];
}
}
function listCommonListingReportFiles(reportsDir) {
try {
return fs
.readdirSync(reportsDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => e.name)
.filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name))
.map((name) => path.join(reportsDir, name));
} catch {
return [];
}
try {
return fs
.readdirSync(reportsDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => e.name)
.filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name))
.map((name) => path.join(reportsDir, name));
} catch {
return [];
}
}
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function buildCommitPayloadForFiles({ repoRoot, relFiles, maxRawPerFile, maxDaysPerFile }) {
const payload = {
generatedAt: new Date().toISOString(),
branch: "data",
files: {},
};
const payload = {
generatedAt: new Date().toISOString(),
branch: "data",
files: {},
};
for (const rel of relFiles.sort()) {
let txt = "";
try {
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]);
} catch {
continue;
}
for (const rel of relFiles.sort()) {
let txt = "";
try {
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]);
} catch {
continue;
}
const lines = txt
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
const lines = txt
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
// git log is newest -> oldest.
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
const byDate = new Map(); // date -> { sha, date, ts }
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
// git log is newest -> oldest.
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
const byDate = new Map(); // date -> { sha, date, ts }
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
if (!d) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
if (!d) continue;
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
}
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
}
// Convert to oldest -> newest
let arr = [...byDate.values()].reverse();
// Convert to oldest -> newest
let arr = [...byDate.values()].reverse();
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
if (arr.length > maxDaysPerFile) {
arr = arr.slice(arr.length - maxDaysPerFile);
}
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
if (arr.length > maxDaysPerFile) {
arr = arr.slice(arr.length - maxDaysPerFile);
}
payload.files[rel] = arr;
}
payload.files[rel] = arr;
}
return payload;
return payload;
}
function main() {
const repoRoot = process.cwd();
const dbDir = path.join(repoRoot, "data", "db");
const reportsDir = path.join(repoRoot, "reports");
const outDir = path.join(repoRoot, "viz", "data");
const repoRoot = process.cwd();
const dbDir = path.join(repoRoot, "data", "db");
const reportsDir = path.join(repoRoot, "reports");
const outDir = path.join(repoRoot, "viz", "data");
fs.mkdirSync(outDir, { recursive: true });
fs.mkdirSync(outDir, { recursive: true });
// ---- Existing output (UNCHANGED): db_commits.json ----
const outFileDb = path.join(outDir, "db_commits.json");
// ---- Existing output (UNCHANGED): db_commits.json ----
const outFileDb = path.join(outDir, "db_commits.json");
const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
// We want the viz to show ONE point per day (the most recent run that day).
// So we collapse multiple commits per day down to the newest commit for that date.
//
// With multiple runs/day, we also want to keep a long-ish daily history.
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
// We want the viz to show ONE point per day (the most recent run that day).
// So we collapse multiple commits per day down to the newest commit for that date.
//
// With multiple runs/day, we also want to keep a long-ish daily history.
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
const payloadDb = buildCommitPayloadForFiles({
repoRoot,
relFiles: dbFiles,
maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE,
});
const payloadDb = buildCommitPayloadForFiles({
repoRoot,
relFiles: dbFiles,
maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE,
});
fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`);
fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`);
// ---- New output: common listings report commits ----
const outFileCommon = path.join(outDir, "common_listings_commits.json");
// ---- New output: common listings report commits ----
const outFileCommon = path.join(outDir, "common_listings_commits.json");
const reportFilesAbs = listCommonListingReportFiles(reportsDir);
const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs)));
const reportFilesAbs = listCommonListingReportFiles(reportsDir);
const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs)));
const payloadCommon = buildCommitPayloadForFiles({
repoRoot,
relFiles: reportFilesRel,
maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE,
});
const payloadCommon = buildCommitPayloadForFiles({
repoRoot,
relFiles: reportFilesRel,
maxRawPerFile: MAX_RAW_PER_FILE,
maxDaysPerFile: MAX_DAYS_PER_FILE,
});
fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`);
fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`);
}
main();

View file

@ -6,233 +6,224 @@ const path = require("path");
const { execFileSync } = require("child_process");
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
fs.mkdirSync(dir, { recursive: true });
}
function listJsonFiles(dir) {
const out = [];
try {
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
if (!String(ent.name || "").endsWith(".json")) continue;
out.push(path.join(dir, ent.name));
}
} catch {
// ignore
}
return out;
const out = [];
try {
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
if (!String(ent.name || "").endsWith(".json")) continue;
out.push(path.join(dir, ent.name));
}
} catch {
// ignore
}
return out;
}
function readJson(file) {
try {
return JSON.parse(fs.readFileSync(file, "utf8"));
} catch {
return null;
}
try {
return JSON.parse(fs.readFileSync(file, "utf8"));
} catch {
return null;
}
}
function readDbCommitsOrNull(repoRoot) {
const p = path.join(repoRoot, "viz", "data", "db_commits.json");
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
const p = path.join(repoRoot, "viz", "data", "db_commits.json");
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
}
function gitShowJson(sha, filePath) {
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
});
return JSON.parse(txt);
} catch {
return null;
}
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
});
return JSON.parse(txt);
} catch {
return null;
}
}
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
}
function makeSyntheticSku(storeLabel, url) {
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
}
function keySkuForItem(it, storeLabel) {
const real = normalizeCspc(it?.sku);
if (real) return real;
return makeSyntheticSku(storeLabel, it?.url);
const real = normalizeCspc(it?.sku);
if (real) return real;
return makeSyntheticSku(storeLabel, it?.url);
}
// Returns Map(skuKey -> firstSeenAtISO) for this dbFile (store/category file).
function computeFirstSeenForDbFile({
repoRoot,
relDbFile,
storeLabel,
wantSkuKeys,
commitsArr,
nowIso,
}) {
const out = new Map();
const want = new Set(wantSkuKeys);
function computeFirstSeenForDbFile({ repoRoot, relDbFile, storeLabel, wantSkuKeys, commitsArr, nowIso }) {
const out = new Map();
const want = new Set(wantSkuKeys);
// No commit history available -> treat as new today
if (!Array.isArray(commitsArr) || !commitsArr.length) {
for (const k of want) out.set(k, nowIso);
return out;
}
// No commit history available -> treat as new today
if (!Array.isArray(commitsArr) || !commitsArr.length) {
for (const k of want) out.set(k, nowIso);
return out;
}
// commitsArr is oldest -> newest (from db_commits.json)
for (const c of commitsArr) {
const sha = String(c?.sha || "");
const ts = String(c?.ts || "");
if (!sha || !ts) continue;
// commitsArr is oldest -> newest (from db_commits.json)
for (const c of commitsArr) {
const sha = String(c?.sha || "");
const ts = String(c?.ts || "");
if (!sha || !ts) continue;
const obj = gitShowJson(sha, relDbFile);
const items = Array.isArray(obj?.items) ? obj.items : [];
const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || "");
const obj = gitShowJson(sha, relDbFile);
const items = Array.isArray(obj?.items) ? obj.items : [];
const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || "");
for (const it of items) {
if (!it) continue;
if (Boolean(it.removed)) continue; // first time it existed LIVE in this file
for (const it of items) {
if (!it) continue;
if (Boolean(it.removed)) continue; // first time it existed LIVE in this file
const k = keySkuForItem(it, sLabel);
if (!k) continue;
if (!want.has(k)) continue;
if (out.has(k)) continue;
const k = keySkuForItem(it, sLabel);
if (!k) continue;
if (!want.has(k)) continue;
if (out.has(k)) continue;
out.set(k, ts);
if (out.size >= want.size) break;
}
out.set(k, ts);
if (out.size >= want.size) break;
}
if (out.size >= want.size) break;
}
if (out.size >= want.size) break;
}
// Anything never seen historically -> new today
for (const k of want) if (!out.has(k)) out.set(k, nowIso);
// Anything never seen historically -> new today
for (const k of want) if (!out.has(k)) out.set(k, nowIso);
return out;
return out;
}
function main() {
const repoRoot = path.resolve(__dirname, "..");
const dbDir = path.join(repoRoot, "data", "db");
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "index.json");
const repoRoot = path.resolve(__dirname, "..");
const dbDir = path.join(repoRoot, "data", "db");
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "index.json");
ensureDir(outDir);
ensureDir(outDir);
const nowIso = new Date().toISOString();
const commitsManifest = readDbCommitsOrNull(repoRoot);
const nowIso = new Date().toISOString();
const commitsManifest = readDbCommitsOrNull(repoRoot);
const items = [];
let liveCount = 0;
const items = [];
let liveCount = 0;
for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file);
if (!obj) continue;
for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file);
if (!obj) continue;
const store = String(obj.store || "");
const storeLabel = String(obj.storeLabel || store || "");
const category = String(obj.category || "");
const categoryLabel = String(obj.categoryLabel || "");
const source = String(obj.source || "");
const updatedAt = String(obj.updatedAt || "");
const store = String(obj.store || "");
const storeLabel = String(obj.storeLabel || store || "");
const category = String(obj.category || "");
const categoryLabel = String(obj.categoryLabel || "");
const source = String(obj.source || "");
const updatedAt = String(obj.updatedAt || "");
const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json
const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json
const arr = Array.isArray(obj.items) ? obj.items : [];
const arr = Array.isArray(obj.items) ? obj.items : [];
// Build want keys from CURRENT file contents (includes removed rows too)
const wantSkuKeys = [];
for (const it of arr) {
if (!it) continue;
const k = keySkuForItem(it, storeLabel);
if (k) wantSkuKeys.push(k);
}
// Build want keys from CURRENT file contents (includes removed rows too)
const wantSkuKeys = [];
for (const it of arr) {
if (!it) continue;
const k = keySkuForItem(it, storeLabel);
if (k) wantSkuKeys.push(k);
}
const commitsArr = commitsManifest?.files?.[dbFile] || null;
const firstSeenByKey = computeFirstSeenForDbFile({
repoRoot,
relDbFile: dbFile,
storeLabel,
wantSkuKeys,
commitsArr,
nowIso,
});
const commitsArr = commitsManifest?.files?.[dbFile] || null;
const firstSeenByKey = computeFirstSeenForDbFile({
repoRoot,
relDbFile: dbFile,
storeLabel,
wantSkuKeys,
commitsArr,
nowIso,
});
for (const it of arr) {
if (!it) continue;
for (const it of arr) {
if (!it) continue;
const removed = Boolean(it.removed);
if (!removed) liveCount++;
const removed = Boolean(it.removed);
if (!removed) liveCount++;
const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim();
const price = String(it.price || "").trim();
const url = String(it.url || "").trim();
const img = String(it.img || it.image || it.thumb || "").trim();
const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim();
const price = String(it.price || "").trim();
const url = String(it.url || "").trim();
const img = String(it.img || it.image || it.thumb || "").trim();
const skuKey = keySkuForItem(it, storeLabel);
const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso;
const skuKey = keySkuForItem(it, storeLabel);
const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso;
items.push({
sku,
name,
price,
url,
img,
removed, // NEW (additive): allows viz to show history / removed-only items
store,
storeLabel,
category,
categoryLabel,
source,
updatedAt,
firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now)
dbFile,
});
}
}
items.push({
sku,
name,
price,
url,
img,
removed, // NEW (additive): allows viz to show history / removed-only items
store,
storeLabel,
category,
categoryLabel,
source,
updatedAt,
firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now)
dbFile,
});
}
}
items.sort((a, b) => {
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
items.sort((a, b) => {
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
const outObj = {
generatedAt: nowIso,
// Additive metadata. Old readers can ignore.
includesRemoved: true,
count: items.length,
countLive: liveCount,
items,
};
const outObj = {
generatedAt: nowIso,
// Additive metadata. Old readers can ignore.
includesRemoved: true,
count: items.length,
countLive: liveCount,
items,
};
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
process.stdout.write(
`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`
);
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`);
}
module.exports = { main };
if (require.main === module) {
main();
main();
}

View file

@ -6,474 +6,467 @@ const fs = require("fs");
const path = require("path");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function gitShowJson(sha, filePath) {
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
});
return JSON.parse(txt);
} catch {
return null;
}
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
});
return JSON.parse(txt);
} catch {
return null;
}
}
function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
}
function gitListTreeFiles(sha, dirRel) {
try {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
} catch {
return [];
}
try {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
} catch {
return [];
}
}
function readJsonFileOrNull(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch {
return null;
}
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch {
return null;
}
}
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function normPriceStr(p) {
return String(p ?? "").trim();
return String(p ?? "").trim();
}
function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
}
function makeSyntheticSku(storeLabel, url) {
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
return `u:${fnv1a32(`${store}|${u}`)}`;
}
function keySkuForItem(it, storeLabel) {
const real = normalizeCspc(it?.sku);
if (real) return real;
return makeSyntheticSku(storeLabel, it?.url);
const real = normalizeCspc(it?.sku);
if (real) return real;
return makeSyntheticSku(storeLabel, it?.url);
}
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
const storeLabel = String(obj?.storeLabel || obj?.store || "");
for (const it of items) {
if (!it) continue;
for (const it of items) {
if (!it) continue;
const sku = keySkuForItem(it, storeLabel);
if (!sku) continue;
const sku = keySkuForItem(it, storeLabel);
if (!sku) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
const next = {
sku,
name: String(it.name || ""),
price: String(it.price || ""),
url: String(it.url || ""),
removed,
};
const next = {
sku,
name: String(it.name || ""),
price: String(it.price || ""),
url: String(it.url || ""),
removed,
};
const prev = m.get(sku);
if (!prev) {
m.set(sku, next);
continue;
}
const prev = m.get(sku);
if (!prev) {
m.set(sku, next);
continue;
}
// Prefer the non-removed record if both exist.
if (prev.removed && !next.removed) {
m.set(sku, next);
continue;
}
if (!prev.removed && next.removed) {
continue; // keep the active one
}
// Prefer the non-removed record if both exist.
if (prev.removed && !next.removed) {
m.set(sku, next);
continue;
}
if (!prev.removed && next.removed) {
continue; // keep the active one
}
// Otherwise keep the “better” one (more complete data), deterministic.
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
if (nextScore > prevScore) m.set(sku, next);
}
// Otherwise keep the “better” one (more complete data), deterministic.
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
if (nextScore > prevScore) m.set(sku, next);
}
return m;
return m;
}
function diffDb(prevObj, nextObj) {
const prevAll = mapBySku(prevObj, { includeRemoved: true });
const nextAll = mapBySku(nextObj, { includeRemoved: true });
const prevAll = mapBySku(prevObj, { includeRemoved: true });
const nextAll = mapBySku(nextObj, { includeRemoved: true });
const prevLive = mapBySku(prevObj, { includeRemoved: false });
const nextLive = mapBySku(nextObj, { includeRemoved: false });
const prevLive = mapBySku(prevObj, { includeRemoved: false });
const nextLive = mapBySku(nextObj, { includeRemoved: false });
const newItems = [];
const restoredItems = [];
const removedItems = [];
const priceChanges = [];
const newItems = [];
const restoredItems = [];
const removedItems = [];
const priceChanges = [];
for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku);
if (!had) {
newItems.push({ ...now });
continue;
}
if (had.removed) {
restoredItems.push({ ...now });
continue;
}
}
for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku);
if (!had) {
newItems.push({ ...now });
continue;
}
if (had.removed) {
restoredItems.push({ ...now });
continue;
}
}
for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) {
removedItems.push({ ...was });
}
}
for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) {
removedItems.push({ ...was });
}
}
for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku);
if (!was) continue;
for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku);
if (!was) continue;
const a = normPriceStr(was.price);
const b = normPriceStr(now.price);
if (a === b) continue;
const a = normPriceStr(was.price);
const b = normPriceStr(now.price);
if (a === b) continue;
const aN = priceToNumber(a);
const bN = priceToNumber(b);
const aN = priceToNumber(a);
const bN = priceToNumber(b);
let kind = "price_change";
if (aN !== null && bN !== null) {
if (bN < aN) kind = "price_down";
else if (bN > aN) kind = "price_up";
else kind = "price_change";
}
let kind = "price_change";
if (aN !== null && bN !== null) {
if (bN < aN) kind = "price_down";
else if (bN > aN) kind = "price_up";
else kind = "price_change";
}
priceChanges.push({
kind,
sku,
name: now.name || was.name || "",
oldPrice: a,
newPrice: b,
url: now.url || was.url || "",
});
}
priceChanges.push({
kind,
sku,
name: now.name || was.name || "",
oldPrice: a,
newPrice: b,
url: now.url || was.url || "",
});
}
return { newItems, restoredItems, removedItems, priceChanges };
return { newItems, restoredItems, removedItems, priceChanges };
}
function getHeadShaOrEmpty() {
try {
return runGit(["rev-parse", "--verify", "HEAD"]);
} catch {
return "";
}
try {
return runGit(["rev-parse", "--verify", "HEAD"]);
} catch {
return "";
}
}
function firstParentSha(sha) {
try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
}
try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean);
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
}
}
function listChangedDbFiles(fromSha, toSha) {
if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db");
}
if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db");
}
if (!fromSha && toSha === "WORKTREE") {
try {
return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name));
} catch {
return [];
}
}
if (!fromSha && toSha === "WORKTREE") {
try {
return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name));
} catch {
return [];
}
}
try {
if (toSha === "WORKTREE") {
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
}
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
} catch {
return [];
}
try {
if (toSha === "WORKTREE") {
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
}
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
} catch {
return [];
}
}
function logDbCommitsSince(sinceIso) {
try {
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
const arr = [];
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
arr.push({ sha, ts, date: d });
}
arr.reverse();
return arr;
} catch {
return [];
}
try {
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
const lines = out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
const arr = [];
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
arr.push({ sha, ts, date: d });
}
arr.reverse();
return arr;
} catch {
return [];
}
}
function main() {
const repoRoot = process.cwd();
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "recent.json");
fs.mkdirSync(outDir, { recursive: true });
const repoRoot = process.cwd();
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "recent.json");
fs.mkdirSync(outDir, { recursive: true });
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7));
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000));
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7));
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000));
const now = new Date();
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
const sinceIso = since.toISOString();
const now = new Date();
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
const sinceIso = since.toISOString();
const headSha = getHeadShaOrEmpty();
const items = [];
const headSha = getHeadShaOrEmpty();
const items = [];
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
const pairs = [];
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
const pairs = [];
if (commits.length) {
const first = commits[0];
const parent = firstParentSha(first.sha);
pairs.push({
fromSha: parent || "",
toSha: first.sha,
ts: first.ts,
date: first.date,
});
if (commits.length) {
const first = commits[0];
const parent = firstParentSha(first.sha);
pairs.push({
fromSha: parent || "",
toSha: first.sha,
ts: first.ts,
date: first.date,
});
for (let i = 1; i < commits.length; i++) {
pairs.push({
fromSha: commits[i - 1].sha,
toSha: commits[i].sha,
ts: commits[i].ts,
date: commits[i].date,
});
}
}
for (let i = 1; i < commits.length; i++) {
pairs.push({
fromSha: commits[i - 1].sha,
toSha: commits[i].sha,
ts: commits[i].ts,
date: commits[i].date,
});
}
}
if (headSha) {
pairs.push({
fromSha: headSha,
toSha: "WORKTREE",
ts: now.toISOString(),
date: dateOnly(now.toISOString()),
});
}
if (headSha) {
pairs.push({
fromSha: headSha,
toSha: "WORKTREE",
ts: now.toISOString(),
date: dateOnly(now.toISOString()),
});
}
function isSmwsBottle(storeLabel, it) {
const hay = [
storeLabel,
it?.name,
it?.url,
]
.map((x) => String(x || ""))
.join(" | ")
.toLowerCase();
return hay.includes("smws") || hay.includes("scotch malt whisky society");
}
function isSmwsBottle(storeLabel, it) {
const hay = [storeLabel, it?.name, it?.url]
.map((x) => String(x || ""))
.join(" | ")
.toLowerCase();
return hay.includes("smws") || hay.includes("scotch malt whisky society");
}
for (const p of pairs) {
const fromSha = p.fromSha;
const toSha = p.toSha;
const ts = p.ts;
const d = p.date;
for (const p of pairs) {
const fromSha = p.fromSha;
const toSha = p.toSha;
const ts = p.ts;
const d = p.date;
const files = listChangedDbFiles(fromSha, toSha);
if (!files.length) continue;
const files = listChangedDbFiles(fromSha, toSha);
if (!files.length) continue;
for (const file of files) {
let prevObj = null;
let nextObj = null;
for (const file of files) {
let prevObj = null;
let nextObj = null;
if (toSha === "WORKTREE") {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
} else {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = gitShowJson(toSha, file);
}
if (toSha === "WORKTREE") {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
} else {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = gitShowJson(toSha, file);
}
const nextExists =
toSha === "WORKTREE"
? fs.existsSync(path.join(repoRoot, file))
: gitFileExistsAtSha(toSha, file);
if (!nextExists) continue;
const nextExists =
toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file);
if (!nextExists) continue;
if (!prevObj && !nextObj) continue;
if (!prevObj && !nextObj) continue;
const storeLabel = String(
nextObj?.storeLabel ||
nextObj?.store ||
prevObj?.storeLabel ||
prevObj?.store ||
""
);
const categoryLabel = String(
nextObj?.categoryLabel ||
nextObj?.category ||
prevObj?.categoryLabel ||
prevObj?.category ||
""
);
const storeLabel = String(
nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "",
);
const categoryLabel = String(
nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "",
);
const isNewStoreFile =
Boolean(fromSha) &&
!gitFileExistsAtSha(fromSha, file) &&
(toSha === "WORKTREE"
? fs.existsSync(path.join(repoRoot, file))
: gitFileExistsAtSha(toSha, file));
const isNewStoreFile =
Boolean(fromSha) &&
!gitFileExistsAtSha(fromSha, file) &&
(toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file));
let { newItems, restoredItems, removedItems, priceChanges } = diffDb(
prevObj,
nextObj
);
let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
if (isNewStoreFile) {
newItems = [];
restoredItems = [];
}
if (isNewStoreFile) {
newItems = [];
restoredItems = [];
}
for (const it of newItems) {
if (isSmwsBottle(storeLabel, it)) continue;
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "new",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of newItems) {
if (isSmwsBottle(storeLabel, it)) continue;
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "new",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of restoredItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "restored",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of restoredItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "restored",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of removedItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "removed",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of removedItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "removed",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const u of priceChanges) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: u.kind,
sku: u.sku,
name: u.name,
storeLabel,
categoryLabel,
oldPrice: normPriceStr(u.oldPrice),
newPrice: normPriceStr(u.newPrice),
url: u.url,
dbFile: file,
});
}
}
}
for (const u of priceChanges) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: u.kind,
sku: u.sku,
name: u.name,
storeLabel,
categoryLabel,
oldPrice: normPriceStr(u.oldPrice),
newPrice: normPriceStr(u.newPrice),
url: u.url,
dbFile: file,
});
}
}
}
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
const trimmed = items.slice(0, maxItems);
const trimmed = items.slice(0, maxItems);
const payload = {
generatedAt: now.toISOString(),
windowDays,
since: sinceIso,
headSha,
count: trimmed.length,
items: trimmed,
};
const payload = {
generatedAt: now.toISOString(),
windowDays,
since: sinceIso,
headSha,
count: trimmed.length,
items: trimmed,
};
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
}
main();

View file

@ -8,24 +8,24 @@ const DB_DIR = path.join(__dirname, "../data/db");
const LINKS_FILE = path.join(__dirname, "../data/sku_links.json");
function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
}
// collect all valid SKUs from db files (normalized)
const validSkus = new Set();
for (const file of fs.readdirSync(DB_DIR)) {
if (!file.endsWith(".json")) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue;
for (const item of data.items) {
if (!item || !item.sku) continue;
const k = normalizeImplicitSkuKey(item.sku);
if (k) validSkus.add(k);
}
if (!file.endsWith(".json")) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue;
for (const item of data.items) {
if (!item || !item.sku) continue;
const k = normalizeImplicitSkuKey(item.sku);
if (k) validSkus.add(k);
}
}
// load links
@ -40,40 +40,40 @@ const seen = new Set(); // dedupe after normalization
const nextLinks = [];
for (const x of Array.isArray(linksData.links) ? linksData.links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) {
prunedMissing++;
continue;
}
if (!a || !b) {
prunedMissing++;
continue;
}
// drop links that are now implicit (id:1234 <-> 001234 etc)
if (a === b) {
prunedAuto++;
continue;
}
// drop links that are now implicit (id:1234 <-> 001234 etc)
if (a === b) {
prunedAuto++;
continue;
}
// keep only links where BOTH normalized skus exist in db
if (!validSkus.has(a) || !validSkus.has(b)) {
prunedMissing++;
continue;
}
// keep only links where BOTH normalized skus exist in db
if (!validSkus.has(a) || !validSkus.has(b)) {
prunedMissing++;
continue;
}
// dedupe (undirected) after normalization
const key = a < b ? `${a}|${b}` : `${b}|${a}`;
if (seen.has(key)) {
prunedDup++;
continue;
}
seen.add(key);
// dedupe (undirected) after normalization
const key = a < b ? `${a}|${b}` : `${b}|${a}`;
if (seen.has(key)) {
prunedDup++;
continue;
}
seen.add(key);
// preserve datestamps/metadata; just normalize the SKUs
nextLinks.push({
...x,
fromSku: a,
toSku: b,
});
// preserve datestamps/metadata; just normalize the SKUs
nextLinks.push({
...x,
fromSku: a,
toSku: b,
});
}
linksData.links = nextLinks;

View file

@ -12,307 +12,340 @@ const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price");
const { isoTimestampFileSafe } = require("../src/utils/time");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function gitShowText(sha, filePath) {
try {
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
} catch {
return null;
}
try {
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
} catch {
return null;
}
}
function gitListDbFiles(sha, dbDirRel) {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
return new Set(lines);
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
const lines = out
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
return new Set(lines);
}
function parseJsonOrNull(txt) {
if (txt == null) return null;
try {
return JSON.parse(txt);
} catch {
return null;
}
if (txt == null) return null;
try {
return JSON.parse(txt);
} catch {
return null;
}
}
function mapItemsByUrl(obj) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
m.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
removed: Boolean(it.removed),
});
}
return m;
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
m.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
removed: Boolean(it.removed),
});
}
return m;
}
function buildDiffForDb(prevObj, nextObj) {
const prev = mapItemsByUrl(prevObj);
const next = mapItemsByUrl(nextObj);
const prev = mapItemsByUrl(prevObj);
const next = mapItemsByUrl(nextObj);
const urls = new Set([...prev.keys(), ...next.keys()]);
const urls = new Set([...prev.keys(), ...next.keys()]);
const newItems = [];
const restoredItems = [];
const removedItems = [];
const updatedItems = [];
const newItems = [];
const restoredItems = [];
const removedItems = [];
const updatedItems = [];
for (const url of urls) {
const a = prev.get(url);
const b = next.get(url);
for (const url of urls) {
const a = prev.get(url);
const b = next.get(url);
const aExists = Boolean(a);
const bExists = Boolean(b);
const aExists = Boolean(a);
const bExists = Boolean(b);
const aRemoved = Boolean(a?.removed);
const bRemoved = Boolean(b?.removed);
const aRemoved = Boolean(a?.removed);
const bRemoved = Boolean(b?.removed);
if (!aExists && bExists && !bRemoved) {
newItems.push({ ...b });
continue;
}
if (!aExists && bExists && !bRemoved) {
newItems.push({ ...b });
continue;
}
if (aExists && aRemoved && bExists && !bRemoved) {
restoredItems.push({ ...b });
continue;
}
if (aExists && aRemoved && bExists && !bRemoved) {
restoredItems.push({ ...b });
continue;
}
if (aExists && !aRemoved && (!bExists || bRemoved)) {
removedItems.push({ ...a });
continue;
}
if (aExists && !aRemoved && (!bExists || bRemoved)) {
removedItems.push({ ...a });
continue;
}
if (aExists && bExists && !aRemoved && !bRemoved) {
const aP = normPrice(a.price);
const bP = normPrice(b.price);
if (aP !== bP) {
updatedItems.push({
name: b.name || a.name || "",
sku: normalizeCspc(b.sku || a.sku || ""),
oldPrice: a.price || "",
newPrice: b.price || "",
url,
});
}
}
}
if (aExists && bExists && !aRemoved && !bRemoved) {
const aP = normPrice(a.price);
const bP = normPrice(b.price);
if (aP !== bP) {
updatedItems.push({
name: b.name || a.name || "",
sku: normalizeCspc(b.sku || a.sku || ""),
oldPrice: a.price || "",
newPrice: b.price || "",
url,
});
}
}
}
return { newItems, restoredItems, removedItems, updatedItems };
return { newItems, restoredItems, removedItems, updatedItems };
}
function parseArgs(argv) {
const flags = new Set();
const kv = new Map();
const positional = [];
const flags = new Set();
const kv = new Map();
const positional = [];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (!a.startsWith("-")) {
positional.push(a);
continue;
}
if (a === "--no-color") {
flags.add("no-color");
continue;
}
if (a === "--color") {
flags.add("color");
continue;
}
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
kv.set(a, argv[i + 1]);
i++;
continue;
}
flags.add(a);
}
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (!a.startsWith("-")) {
positional.push(a);
continue;
}
if (a === "--no-color") {
flags.add("no-color");
continue;
}
if (a === "--color") {
flags.add("color");
continue;
}
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
kv.set(a, argv[i + 1]);
i++;
continue;
}
flags.add(a);
}
const fromSha = positional[0] || "";
const toSha = positional[1] || "";
const dbDir = kv.get("--db-dir") || "data/db";
const outFile = kv.get("--out") || "";
const fromSha = positional[0] || "";
const toSha = positional[1] || "";
const dbDir = kv.get("--db-dir") || "data/db";
const outFile = kv.get("--out") || "";
return { fromSha, toSha, dbDir, outFile, flags };
return { fromSha, toSha, dbDir, outFile, flags };
}
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
const paint = (s, code) => color(s, code, colorize);
const paint = (s, code) => color(s, code, colorize);
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
ln(paint("========== DIFF REPORT ==========", C.bold));
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
ln(
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`
);
ln("");
ln(paint("========== DIFF REPORT ==========", C.bold));
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
ln(
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`,
);
ln("");
const rows = diffReport.categories;
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
const rows = diffReport.categories;
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
ln(paint("Per-category summary:", C.bold));
ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`);
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
for (const r of rows) {
ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`);
}
ln("");
ln(paint("Per-category summary:", C.bold));
ln(
`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`,
);
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
for (const r of rows) {
ln(
`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`,
);
}
ln("");
const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length));
const labelW = Math.max(
16,
...diffReport.newItems.map((x) => x.catLabel.length),
...diffReport.restoredItems.map((x) => x.catLabel.length),
...diffReport.removedItems.map((x) => x.catLabel.length),
...diffReport.updatedItems.map((x) => x.catLabel.length),
);
const skuInline = (sku) => {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
};
const skuInline = (sku) => {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
};
if (diffReport.newItems.length) {
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.newItems.length) {
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(
`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.restoredItems.length) {
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.restoredItems.length) {
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
for (const it of diffReport.restoredItems.sort((a, b) =>
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
)) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(
`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.removedItems.length) {
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.removedItems.length) {
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
for (const it of diffReport.removedItems.sort((a, b) =>
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
)) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(
`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.updatedItems.length) {
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
if (diffReport.updatedItems.length) {
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
for (const u of diffReport.updatedItems.sort((a, b) =>
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
)) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) newP = paint(newP, C.red);
else if (newN < oldN) {
newP = paint(newP, C.green);
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else newP = paint(newP, C.cyan);
} else newP = paint(newP, C.cyan);
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) newP = paint(newP, C.red);
else if (newN < oldN) {
newP = paint(newP, C.green);
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else newP = paint(newP, C.cyan);
} else newP = paint(newP, C.cyan);
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`
);
ln(` ${paint(u.url, C.dim)}`);
}
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`,
);
ln(` ${paint(u.url, C.dim)}`);
}
ln("");
}
ln("");
}
ln(paint("======== END DIFF REPORT ========", C.bold));
ln(paint("======== END DIFF REPORT ========", C.bold));
return out;
return out;
}
async function main() {
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
if (!fromSha || !toSha) {
console.error(`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`);
process.exitCode = 2;
return;
}
if (!fromSha || !toSha) {
console.error(
`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`,
);
process.exitCode = 2;
return;
}
// If user provides short SHAs, git accepts them.
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
// If user provides short SHAs, git accepts them.
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
const filesA = gitListDbFiles(fromSha, dbDir);
const filesB = gitListDbFiles(toSha, dbDir);
const files = new Set([...filesA, ...filesB]);
const filesA = gitListDbFiles(fromSha, dbDir);
const filesB = gitListDbFiles(toSha, dbDir);
const files = new Set([...filesA, ...filesB]);
const diffReport = {
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
newItems: [],
restoredItems: [],
removedItems: [],
updatedItems: [],
};
const diffReport = {
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
newItems: [],
restoredItems: [],
removedItems: [],
updatedItems: [],
};
for (const file of [...files].sort()) {
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
for (const file of [...files].sort()) {
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?");
const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file));
const catLabelFull = `${storeLabel} | ${catLabel}`;
const storeLabel = String(
nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?",
);
const catLabel = String(
nextObj?.categoryLabel ||
prevObj?.categoryLabel ||
nextObj?.category ||
prevObj?.category ||
path.basename(file),
);
const catLabelFull = `${storeLabel} | ${catLabel}`;
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
diffReport.categories.push({
catLabel: catLabelFull,
newCount: newItems.length,
restoredCount: restoredItems.length,
removedCount: removedItems.length,
updatedCount: updatedItems.length,
});
diffReport.categories.push({
catLabel: catLabelFull,
newCount: newItems.length,
restoredCount: restoredItems.length,
removedCount: removedItems.length,
updatedCount: updatedItems.length,
});
diffReport.totals.newCount += newItems.length;
diffReport.totals.restoredCount += restoredItems.length;
diffReport.totals.removedCount += removedItems.length;
diffReport.totals.updatedCount += updatedItems.length;
diffReport.totals.newCount += newItems.length;
diffReport.totals.restoredCount += restoredItems.length;
diffReport.totals.removedCount += removedItems.length;
diffReport.totals.updatedCount += updatedItems.length;
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
}
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
}
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
process.stdout.write(reportText);
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
process.stdout.write(reportText);
const outPath = outFile
? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile))
: "";
const outPath = outFile ? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile)) : "";
if (outPath) {
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
}
if (outPath) {
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
}
}
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

View file

@ -13,31 +13,31 @@ const includeLinked = process.argv.includes("--include-linked");
// load linked SKUs
const linkedSkus = new Set();
if (!includeLinked && fs.existsSync(LINKS_FILE)) {
const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8"));
for (const { fromSku, toSku } of links) {
linkedSkus.add(String(fromSku));
linkedSkus.add(String(toSku));
}
const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8"));
for (const { fromSku, toSku } of links) {
linkedSkus.add(String(fromSku));
linkedSkus.add(String(toSku));
}
}
for (const file of fs.readdirSync(DB_DIR)) {
if (!file.endsWith(".json")) continue;
if (!file.endsWith(".json")) continue;
if (!includeKegNCork && file.startsWith("kegncork__")) continue;
if (!includeCoop && file.startsWith("coop__")) continue;
if (!includeKegNCork && file.startsWith("kegncork__")) continue;
if (!includeCoop && file.startsWith("coop__")) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue;
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
if (!Array.isArray(data.items)) continue;
for (const { sku, url, removed } of data.items) {
if (
removed === false &&
typeof sku === "string" &&
sku.startsWith("u:") &&
url &&
(includeLinked || !linkedSkus.has(sku))
) {
console.log(url);
}
}
for (const { sku, url, removed } of data.items) {
if (
removed === false &&
typeof sku === "string" &&
sku.startsWith("u:") &&
url &&
(includeLinked || !linkedSkus.has(sku))
) {
console.log(url);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -4,12 +4,12 @@ import path from "node:path";
import { execSync } from "node:child_process";
function die(msg) {
console.error(msg);
process.exit(1);
console.error(msg);
process.exit(1);
}
function sh(cmd) {
return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim();
return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim();
}
const ISSUE_BODY = process.env.ISSUE_BODY || "";
@ -20,16 +20,14 @@ const REPO = process.env.REPO || "";
if (!ISSUE_NUMBER) die("Missing ISSUE_NUMBER");
if (!REPO) die("Missing REPO");
const m = ISSUE_BODY.match(
/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/
);
const m = ISSUE_BODY.match(/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/);
if (!m) die("No stviz payload found in issue body.");
let payload;
try {
payload = JSON.parse(m[1]);
payload = JSON.parse(m[1]);
} catch (e) {
die(`Invalid JSON payload: ${e?.message || e}`);
die(`Invalid JSON payload: ${e?.message || e}`);
}
if (payload?.schema !== "stviz-sku-edits-v1") die("Unsupported payload schema.");
@ -38,259 +36,246 @@ const linksIn = Array.isArray(payload?.links) ? payload.links : [];
const ignoresIn = Array.isArray(payload?.ignores) ? payload.ignores : [];
function normSku(s) {
return String(s || "").trim();
return String(s || "").trim();
}
function linkKeyFrom(a, b) {
const x = normSku(a);
const y = normSku(b);
return x && y && x !== y ? `${x}${y}` : "";
const x = normSku(a);
const y = normSku(b);
return x && y && x !== y ? `${x}${y}` : "";
}
function linkKey(x) {
return linkKeyFrom(x?.fromSku, x?.toSku);
return linkKeyFrom(x?.fromSku, x?.toSku);
}
function pairKey(a, b) {
const x = normSku(a),
y = normSku(b);
if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
const x = normSku(a),
y = normSku(b);
if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
}
/* ---------------- Minimal, merge-friendly JSON array insertion ---------------- */
function findJsonArraySpan(src, propName) {
// Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent }
const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m");
const mm = src.match(re);
if (!mm) return null;
// Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent }
const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m");
const mm = src.match(re);
if (!mm) return null;
const fieldIndent = mm[1] || "";
const at = mm.index || 0;
const open = src.indexOf("[", at);
if (open < 0) return null;
const fieldIndent = mm[1] || "";
const at = mm.index || 0;
const open = src.indexOf("[", at);
if (open < 0) return null;
// scan to matching ']'
let i = open;
let depth = 0;
let inStr = false;
let esc = false;
// scan to matching ']'
let i = open;
let depth = 0;
let inStr = false;
let esc = false;
for (; i < src.length; i++) {
const ch = src[i];
for (; i < src.length; i++) {
const ch = src[i];
if (inStr) {
if (esc) {
esc = false;
} else if (ch === "\\") {
esc = true;
} else if (ch === '"') {
inStr = false;
}
continue;
}
if (inStr) {
if (esc) {
esc = false;
} else if (ch === "\\") {
esc = true;
} else if (ch === '"') {
inStr = false;
}
continue;
}
if (ch === '"') {
inStr = true;
continue;
}
if (ch === '"') {
inStr = true;
continue;
}
if (ch === "[") depth++;
else if (ch === "]") {
depth--;
if (depth === 0) {
const close = i;
return { start: at, open, close, end: close + 1, fieldIndent };
}
}
}
if (ch === "[") depth++;
else if (ch === "]") {
depth--;
if (depth === 0) {
const close = i;
return { start: at, open, close, end: close + 1, fieldIndent };
}
}
}
return null;
return null;
}
function splitArrayObjectBlocks(arrayInnerText) {
// arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas)
// returns raw blocks (each block is the exact text for a JSON object, preserving formatting)
const blocks = [];
// arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas)
// returns raw blocks (each block is the exact text for a JSON object, preserving formatting)
const blocks = [];
let i = 0;
const s = arrayInnerText;
let i = 0;
const s = arrayInnerText;
function skipWsAndCommas() {
while (i < s.length) {
const ch = s[i];
if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++;
else break;
}
}
function skipWsAndCommas() {
while (i < s.length) {
const ch = s[i];
if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++;
else break;
}
}
skipWsAndCommas();
skipWsAndCommas();
while (i < s.length) {
if (s[i] !== "{") {
// if something unexpected, advance a bit
i++;
skipWsAndCommas();
continue;
}
while (i < s.length) {
if (s[i] !== "{") {
// if something unexpected, advance a bit
i++;
skipWsAndCommas();
continue;
}
const start = i;
let depth = 0;
let inStr = false;
let esc = false;
const start = i;
let depth = 0;
let inStr = false;
let esc = false;
for (; i < s.length; i++) {
const ch = s[i];
for (; i < s.length; i++) {
const ch = s[i];
if (inStr) {
if (esc) {
esc = false;
} else if (ch === "\\") {
esc = true;
} else if (ch === '"') {
inStr = false;
}
continue;
}
if (inStr) {
if (esc) {
esc = false;
} else if (ch === "\\") {
esc = true;
} else if (ch === '"') {
inStr = false;
}
continue;
}
if (ch === '"') {
inStr = true;
continue;
}
if (ch === '"') {
inStr = true;
continue;
}
if (ch === "{") depth++;
else if (ch === "}") {
depth--;
if (depth === 0) {
i++; // include '}'
const raw = s.slice(start, i);
blocks.push(raw);
break;
}
}
}
if (ch === "{") depth++;
else if (ch === "}") {
depth--;
if (depth === 0) {
i++; // include '}'
const raw = s.slice(start, i);
blocks.push(raw);
break;
}
}
}
skipWsAndCommas();
}
skipWsAndCommas();
}
return blocks;
return blocks;
}
function detectItemIndent(arrayInnerText, fieldIndent) {
// Try to infer indentation for the '{' line inside the array.
// If empty array, default to fieldIndent + 2 spaces.
const m = arrayInnerText.match(/\n([ \t]*)\{/);
if (m) return m[1];
return fieldIndent + " ";
// Try to infer indentation for the '{' line inside the array.
// If empty array, default to fieldIndent + 2 spaces.
const m = arrayInnerText.match(/\n([ \t]*)\{/);
if (m) return m[1];
return fieldIndent + " ";
}
function makePrettyObjBlock(objIndent, obj) {
// Match JSON.stringify(..., 2) object formatting inside arrays
const a = objIndent;
const b = objIndent + " ";
const fromSku = normSku(obj?.fromSku);
const toSku = normSku(obj?.toSku);
const skuA = normSku(obj?.skuA);
const skuB = normSku(obj?.skuB);
// Match JSON.stringify(..., 2) object formatting inside arrays
const a = objIndent;
const b = objIndent + " ";
const fromSku = normSku(obj?.fromSku);
const toSku = normSku(obj?.toSku);
const skuA = normSku(obj?.skuA);
const skuB = normSku(obj?.skuB);
if (fromSku && toSku) {
return (
`${a}{\n` +
`${b}"fromSku": ${JSON.stringify(fromSku)},\n` +
`${b}"toSku": ${JSON.stringify(toSku)}\n` +
`${a}}`
);
}
if (fromSku && toSku) {
return (
`${a}{\n` +
`${b}"fromSku": ${JSON.stringify(fromSku)},\n` +
`${b}"toSku": ${JSON.stringify(toSku)}\n` +
`${a}}`
);
}
if (skuA && skuB) {
return (
`${a}{\n` +
`${b}"skuA": ${JSON.stringify(skuA)},\n` +
`${b}"skuB": ${JSON.stringify(skuB)}\n` +
`${a}}`
);
}
if (skuA && skuB) {
return `${a}{\n` + `${b}"skuA": ${JSON.stringify(skuA)},\n` + `${b}"skuB": ${JSON.stringify(skuB)}\n` + `${a}}`;
}
return `${a}{}`;
return `${a}{}`;
}
function applyInsertionsToArrayText({
src,
propName,
incoming,
keyFn,
normalizeFn,
}) {
const span = findJsonArraySpan(src, propName);
if (!span) die(`Could not find "${propName}" array in ${filePath}`);
const before = src.slice(0, span.open + 1); // includes '['
const inner = src.slice(span.open + 1, span.close); // between [ and ]
const after = src.slice(span.close); // starts with ']'
const itemIndent = detectItemIndent(inner, span.fieldIndent);
// Parse existing objects to build a dedupe set (does NOT modify inner text)
const rawBlocks = splitArrayObjectBlocks(inner);
const seen = new Set();
for (const raw of rawBlocks) {
try {
const obj = JSON.parse(raw);
const k = keyFn(obj);
if (k) seen.add(k);
} catch {
// ignore unparsable blocks for dedupe purposes
}
}
const toAdd = [];
for (const x of incoming) {
const nx = normalizeFn(x);
const k = keyFn(nx);
if (!k || seen.has(k)) continue;
seen.add(k);
toAdd.push(nx);
}
if (!toAdd.length) return src;
// Deterministic order for new items only (doesn't reorder existing)
const addBlocks = toAdd
.map((obj) => ({ obj, key: keyFn(obj) }))
.sort((a, b) => String(a.key).localeCompare(String(b.key)))
.map((x) => makePrettyObjBlock(itemIndent, x.obj));
const wasInlineEmpty = /^\s*$/.test(inner);
let newInner;
if (wasInlineEmpty) {
// "links": [] -> pretty multiline
newInner =
"\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent;
} else {
// Keep existing whitespace EXACTLY; append before trailing whitespace
const m = inner.match(/\s*$/);
const tail = m ? m[0] : "";
const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
newInner = body + ",\n" + addBlocks.join(",\n") + tail;
}
return before + newInner + after;
}
function applyInsertionsToArrayText({ src, propName, incoming, keyFn, normalizeFn }) {
const span = findJsonArraySpan(src, propName);
if (!span) die(`Could not find "${propName}" array in ${filePath}`);
const before = src.slice(0, span.open + 1); // includes '['
const inner = src.slice(span.open + 1, span.close); // between [ and ]
const after = src.slice(span.close); // starts with ']'
const itemIndent = detectItemIndent(inner, span.fieldIndent);
// Parse existing objects to build a dedupe set (does NOT modify inner text)
const rawBlocks = splitArrayObjectBlocks(inner);
const seen = new Set();
for (const raw of rawBlocks) {
try {
const obj = JSON.parse(raw);
const k = keyFn(obj);
if (k) seen.add(k);
} catch {
// ignore unparsable blocks for dedupe purposes
}
}
const toAdd = [];
for (const x of incoming) {
const nx = normalizeFn(x);
const k = keyFn(nx);
if (!k || seen.has(k)) continue;
seen.add(k);
toAdd.push(nx);
}
if (!toAdd.length) return src;
// Deterministic order for new items only (doesn't reorder existing)
const addBlocks = toAdd
.map((obj) => ({ obj, key: keyFn(obj) }))
.sort((a, b) => String(a.key).localeCompare(String(b.key)))
.map((x) => makePrettyObjBlock(itemIndent, x.obj));
const wasInlineEmpty = /^\s*$/.test(inner);
let newInner;
if (wasInlineEmpty) {
// "links": [] -> pretty multiline
newInner = "\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent;
} else {
// Keep existing whitespace EXACTLY; append before trailing whitespace
const m = inner.match(/\s*$/);
const tail = m ? m[0] : "";
const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
newInner = body + ",\n" + addBlocks.join(",\n") + tail;
}
return before + newInner + after;
}
/* ---------------- Apply edits ---------------- */
const filePath = path.join("data", "sku_links.json");
function ensureFileExists() {
if (fs.existsSync(filePath)) return;
fs.mkdirSync(path.dirname(filePath), { recursive: true });
// Create with stable formatting; generatedAt intentionally blank (we do not mutate it later)
const seed = { generatedAt: "", links: [], ignores: [] };
fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8");
if (fs.existsSync(filePath)) return;
fs.mkdirSync(path.dirname(filePath), { recursive: true });
// Create with stable formatting; generatedAt intentionally blank (we do not mutate it later)
const seed = { generatedAt: "", links: [], ignores: [] };
fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8");
}
ensureFileExists();
@ -301,42 +286,42 @@ let text = fs.readFileSync(filePath, "utf8");
// Also: do NOT re-stringify entire JSON; we only surgically insert into arrays.
const normLinksIn = linksIn.map((x) => ({
fromSku: normSku(x?.fromSku),
toSku: normSku(x?.toSku),
fromSku: normSku(x?.fromSku),
toSku: normSku(x?.toSku),
}));
const normIgnoresIn = ignoresIn.map((x) => {
const a = normSku(x?.skuA);
const b = normSku(x?.skuB);
const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|");
return { skuA: p, skuB: q };
const a = normSku(x?.skuA);
const b = normSku(x?.skuB);
const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|");
return { skuA: p, skuB: q };
});
// Insert links (sorted by from→to)
text = applyInsertionsToArrayText({
src: text,
propName: "links",
incoming: normLinksIn,
keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku),
normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }),
src: text,
propName: "links",
incoming: normLinksIn,
keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku),
normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }),
});
// Insert ignores (sorted by canonical pair)
text = applyInsertionsToArrayText({
src: text,
propName: "ignores",
incoming: normIgnoresIn,
keyFn: (o) => pairKey(o?.skuA, o?.skuB),
normalizeFn: (o) => {
const a = normSku(o?.skuA);
const b = normSku(o?.skuB);
const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|");
return { skuA: p, skuB: q };
},
src: text,
propName: "ignores",
incoming: normIgnoresIn,
keyFn: (o) => pairKey(o?.skuA, o?.skuB),
normalizeFn: (o) => {
const a = normSku(o?.skuA);
const b = normSku(o?.skuB);
const k = pairKey(a, b);
if (!k) return { skuA: "", skuB: "" };
const [p, q] = k.split("|");
return { skuA: p, skuB: q };
},
});
fs.writeFileSync(filePath, text, "utf8");
@ -345,10 +330,10 @@ fs.writeFileSync(filePath, text, "utf8");
// Ensure git identity is set for commit (Actions runners often lack it)
try {
sh(`git config user.name "github-actions[bot]"`);
sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`);
sh(`git config user.name "github-actions[bot]"`);
sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`);
} catch {
// ignore
// ignore
}
const ts = new Date().toISOString().replace(/[:.]/g, "-");
@ -360,8 +345,8 @@ sh(`git add "${filePath}"`);
// If no diffs (all edits were duplicates), don't create PR or close issue.
const diff = sh(`git status --porcelain "${filePath}"`);
if (!diff) {
console.log("No changes to commit (all edits already present). Leaving issue open.");
process.exit(0);
console.log("No changes to commit (all edits already present). Leaving issue open.");
process.exit(0);
}
sh(`git commit -m "stviz: apply sku edits (issue #${ISSUE_NUMBER})"`);
@ -371,21 +356,20 @@ const prTitle = `STVIZ: SKU link updates (issue #${ISSUE_NUMBER})`;
const prBody = `Automated PR created from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}`;
function extractPrUrl(out) {
// gh pr create usually prints the PR URL to stdout; be robust in case extra text appears.
const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/);
if (!m) die(`Could not find PR URL in gh output:\n${out}`);
return m[0];
// gh pr create usually prints the PR URL to stdout; be robust in case extra text appears.
const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/);
if (!m) die(`Could not find PR URL in gh output:\n${out}`);
return m[0];
}
// Create PR and capture URL/number without relying on unsupported flags
const prCreateOut = sh(
`gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"`
);
const prUrl = extractPrUrl(prCreateOut);
// Create PR and capture URL/number without relying on unsupported flags
const prCreateOut = sh(
`gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"`,
);
const prUrl = extractPrUrl(prCreateOut);
const prNumber = sh(`gh -R "${REPO}" pr view "${prUrl}" --json number --jq .number`);
sh(
`gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"`
`gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"`,
);

View file

@ -4,7 +4,7 @@
const { main } = require("./src/main");
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

View file

@ -1,62 +1,64 @@
export async function fetchJson(url) {
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.json();
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.json();
}
export async function fetchText(url) {
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.text();
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.text();
}
export function inferGithubOwnerRepo() {
const host = location.hostname || "";
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
if (m) {
const owner = m[1];
const parts = (location.pathname || "/").split("/").filter(Boolean);
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
return { owner, repo };
}
return { owner: "brennanwilkes", repo: "spirit-tracker" };
const host = location.hostname || "";
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
if (m) {
const owner = m[1];
const parts = (location.pathname || "/").split("/").filter(Boolean);
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
return { owner, repo };
}
return { owner: "brennanwilkes", repo: "spirit-tracker" };
}
export function isLocalWriteMode() {
const h = String(location.hostname || "").toLowerCase();
return (location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost");
const h = String(location.hostname || "").toLowerCase();
return (
(location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost")
);
}
/* ---- Local disk-backed SKU link API (only on viz/serve.js) ---- */
export async function apiReadSkuMetaFromLocalServer() {
const r = await fetch("/__stviz/sku-links", { cache: "no-store" });
if (!r.ok) throw new Error(`HTTP ${r.status}`);
const j = await r.json();
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
const r = await fetch("/__stviz/sku-links", { cache: "no-store" });
if (!r.ok) throw new Error(`HTTP ${r.status}`);
const j = await r.json();
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
}
export async function apiWriteSkuLink(fromSku, toSku) {
const res = await fetch("/__stviz/sku-links", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ fromSku, toSku }),
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json();
const res = await fetch("/__stviz/sku-links", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ fromSku, toSku }),
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json();
}
export async function apiWriteSkuIgnore(skuA, skuB) {
const res = await fetch("/__stviz/sku-ignores", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ skuA, skuB }),
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json();
const res = await fetch("/__stviz/sku-ignores", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ skuA, skuB }),
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return await res.json();
}
/**
@ -65,50 +67,50 @@ export async function apiWriteSkuIgnore(skuA, skuB) {
* - On local server: reads via /__stviz/sku-links (disk)
*/
export async function loadSkuMetaBestEffort() {
// 1) GitHub Pages / static deploy inside viz/
try {
const j = await fetchJson("./data/sku_links.json");
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
} catch {}
// 1) GitHub Pages / static deploy inside viz/
try {
const j = await fetchJson("./data/sku_links.json");
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
} catch {}
// 2) alternate static path (in case you later serve viz under a subpath)
try {
const j = await fetchJson("/data/sku_links.json");
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
} catch {}
// 2) alternate static path (in case you later serve viz under a subpath)
try {
const j = await fetchJson("/data/sku_links.json");
return {
links: Array.isArray(j?.links) ? j.links : [],
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
};
} catch {}
// 3) Local server API (disk)
try {
return await apiReadSkuMetaFromLocalServer();
} catch {}
// 3) Local server API (disk)
try {
return await apiReadSkuMetaFromLocalServer();
} catch {}
return { links: [], ignores: [] };
return { links: [], ignores: [] };
}
/* ---- GitHub history helpers ---- */
export async function githubListCommits({ owner, repo, branch, path }) {
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
const page1 = await fetchJson(u1);
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
const page1 = await fetchJson(u1);
if (Array.isArray(page1) && page1.length === 100) {
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
const page2 = await fetchJson(u2);
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
}
if (Array.isArray(page1) && page1.length === 100) {
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
const page2 = await fetchJson(u2);
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
}
return Array.isArray(page1) ? page1 : [];
return Array.isArray(page1) ? page1 : [];
}
export async function githubFetchFileAtSha({ owner, repo, sha, path }) {
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`;
const txt = await fetchText(raw);
return JSON.parse(txt);
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`;
const txt = await fetchText(raw);
return JSON.parse(txt);
}

View file

@ -3,106 +3,106 @@ import { parsePriceToNumber, keySkuForRow, normSearchText } from "./sku.js";
// Build one row per *canonical* SKU (after applying sku map) + combined searchable text
export function aggregateBySku(listings, canonicalizeSkuFn) {
const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x;
const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x;
const bySku = new Map();
const bySku = new Map();
for (const r of listings) {
const rawSku = keySkuForRow(r);
const sku = canon(rawSku);
for (const r of listings) {
const rawSku = keySkuForRow(r);
const sku = canon(rawSku);
const name = String(r?.name || "");
const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || "");
const removed = Boolean(r?.removed);
const name = String(r?.name || "");
const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || "");
const removed = Boolean(r?.removed);
const img = normImg(r?.img || r?.image || r?.thumb || "");
const img = normImg(r?.img || r?.image || r?.thumb || "");
const pNum = parsePriceToNumber(r?.price);
const pStr = String(r?.price || "");
const pNum = parsePriceToNumber(r?.price);
const pStr = String(r?.price || "");
let agg = bySku.get(sku);
if (!agg) {
agg = {
sku, // canonical sku
name: name || "",
img: "",
cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "",
stores: new Set(), // LIVE stores only
storesEver: new Set(), // live + removed presence (history)
sampleUrl: url || "",
_searchParts: [],
searchText: "",
let agg = bySku.get(sku);
if (!agg) {
agg = {
sku, // canonical sku
name: name || "",
img: "",
cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "",
stores: new Set(), // LIVE stores only
storesEver: new Set(), // live + removed presence (history)
sampleUrl: url || "",
_searchParts: [],
searchText: "",
_imgByName: new Map(),
_imgAny: "",
};
bySku.set(sku, agg);
}
_imgByName: new Map(),
_imgAny: "",
};
bySku.set(sku, agg);
}
if (storeLabel) {
agg.storesEver.add(storeLabel);
if (!removed) agg.stores.add(storeLabel);
}
if (!agg.sampleUrl && url) agg.sampleUrl = url;
if (storeLabel) {
agg.storesEver.add(storeLabel);
if (!removed) agg.stores.add(storeLabel);
}
if (!agg.sampleUrl && url) agg.sampleUrl = url;
// Keep first non-empty name, but keep thumbnail aligned to chosen name
if (!agg.name && name) {
agg.name = name;
if (img) agg.img = img;
} else if (agg.name && name === agg.name && img && !agg.img) {
agg.img = img;
}
// Keep first non-empty name, but keep thumbnail aligned to chosen name
if (!agg.name && name) {
agg.name = name;
if (img) agg.img = img;
} else if (agg.name && name === agg.name && img && !agg.img) {
agg.img = img;
}
if (img) {
if (!agg._imgAny) agg._imgAny = img;
if (name) agg._imgByName.set(name, img);
}
if (img) {
if (!agg._imgAny) agg._imgAny = img;
if (name) agg._imgByName.set(name, img);
}
// cheapest across LIVE rows only (so removed history doesn't "win")
if (!removed && pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || "";
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
}
}
// cheapest across LIVE rows only (so removed history doesn't "win")
if (!removed && pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || "";
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
}
}
// search parts: include canonical + raw sku so searching either works
agg._searchParts.push(sku);
if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku);
if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel);
if (removed) agg._searchParts.push("removed");
}
// search parts: include canonical + raw sku so searching either works
agg._searchParts.push(sku);
if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku);
if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel);
if (removed) agg._searchParts.push("removed");
}
const out = [...bySku.values()];
const out = [...bySku.values()];
for (const it of out) {
if (!it.img) {
const m = it._imgByName;
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
else it.img = it._imgAny || "";
}
for (const it of out) {
if (!it.img) {
const m = it._imgByName;
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
else it.img = it._imgAny || "";
}
delete it._imgByName;
delete it._imgAny;
delete it._imgByName;
delete it._imgAny;
it.storeCount = it.stores.size;
it.storeCountEver = it.storesEver.size;
it.removedEverywhere = it.storeCount === 0;
it.storeCount = it.stores.size;
it.storeCountEver = it.storesEver.size;
it.removedEverywhere = it.storeCount === 0;
it._searchParts.push(it.sku);
it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || "");
it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts;
}
it._searchParts.push(it.sku);
it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || "");
it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts;
}
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
return out;
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
return out;
}

View file

@ -1,59 +1,61 @@
export function esc(s) {
return String(s ?? "").replace(/[&<>"']/g, (c) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;" }[c]));
}
export function normImg(s) {
const v = String(s || "").trim();
if (!v) return "";
if (/^data:/i.test(v)) return "";
return v;
}
export function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
export function prettyTs(iso) {
const s = String(iso || "");
if (!s) return "";
const d0 = new Date(s);
const t0 = d0.getTime();
if (!Number.isFinite(t0)) return "";
// Round to nearest hour
const d = new Date(Math.round(t0 / 3600000) * 3600000);
const parts = new Intl.DateTimeFormat("en-US", {
timeZone: "America/Vancouver",
month: "long",
day: "numeric",
hour: "numeric",
minute: "2-digit",
hour12: true,
}).formatToParts(d);
let month = "";
let day = "";
let hour = "";
let minute = "";
let dayPeriod = "";
for (const p of parts) {
if (p.type === "month") month = p.value;
else if (p.type === "day") day = p.value;
else if (p.type === "hour") hour = p.value;
else if (p.type === "minute") minute = p.value;
else if (p.type === "dayPeriod") dayPeriod = p.value;
}
return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`;
}
export function renderThumbHtml(imgUrl, cls = "thumb") {
const img = normImg(imgUrl);
if (!img) return `<div class="thumbPlaceholder"></div>`;
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
}
return String(s ?? "").replace(
/[&<>"']/g,
(c) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;" })[c],
);
}
export function normImg(s) {
const v = String(s || "").trim();
if (!v) return "";
if (/^data:/i.test(v)) return "";
return v;
}
export function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
export function prettyTs(iso) {
const s = String(iso || "");
if (!s) return "";
const d0 = new Date(s);
const t0 = d0.getTime();
if (!Number.isFinite(t0)) return "";
// Round to nearest hour
const d = new Date(Math.round(t0 / 3600000) * 3600000);
const parts = new Intl.DateTimeFormat("en-US", {
timeZone: "America/Vancouver",
month: "long",
day: "numeric",
hour: "numeric",
minute: "2-digit",
hour12: true,
}).formatToParts(d);
let month = "";
let day = "";
let hour = "";
let minute = "";
let dayPeriod = "";
for (const p of parts) {
if (p.type === "month") month = p.value;
else if (p.type === "day") day = p.value;
else if (p.type === "hour") hour = p.value;
else if (p.type === "minute") minute = p.value;
else if (p.type === "dayPeriod") dayPeriod = p.value;
}
return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`;
}
export function renderThumbHtml(imgUrl, cls = "thumb") {
const img = normImg(imgUrl);
if (!img) return `<div class="thumbPlaceholder"></div>`;
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
}

File diff suppressed because it is too large Load diff

View file

@ -2,91 +2,88 @@
import { keySkuForRow } from "../sku.js";
function isRealSkuKey(skuKey) {
const s = String(skuKey || "").trim();
return /^\d{6}$/.test(s);
const s = String(skuKey || "").trim();
return /^\d{6}$/.test(s);
}
function isSoftSkuKey(k) {
const s = String(k || "");
return s.startsWith("upc:") || s.startsWith("id:");
const s = String(k || "");
return s.startsWith("upc:") || s.startsWith("id:");
}
function isUnknownSkuKey2(k) {
return String(k || "").trim().startsWith("u:");
return String(k || "")
.trim()
.startsWith("u:");
}
function isBCStoreLabel(label) {
const s = String(label || "").toLowerCase();
return (
s.includes("bcl") ||
s.includes("strath") ||
s.includes("gull") ||
s.includes("legacy") ||
s.includes("tudor") ||
s.includes("vessel") ||
s.includes("arc") ||
s.includes("vintagespirits")
);
const s = String(label || "").toLowerCase();
return (
s.includes("bcl") ||
s.includes("strath") ||
s.includes("gull") ||
s.includes("legacy") ||
s.includes("tudor") ||
s.includes("vessel") ||
s.includes("arc") ||
s.includes("vintagespirits")
);
}
function skuIsBC(allRows, skuKey) {
for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || "");
if (isBCStoreLabel(lab)) return true;
}
return false;
for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || "");
if (isBCStoreLabel(lab)) return true;
}
return false;
}
function isABStoreLabel(label) {
const s = String(label || "").toLowerCase();
return (
s.includes("alberta") ||
s.includes("calgary") ||
s.includes("edmonton") ||
/\bab\b/.test(s)
);
const s = String(label || "").toLowerCase();
return s.includes("alberta") || s.includes("calgary") || s.includes("edmonton") || /\bab\b/.test(s);
}
function skuIsAB(allRows, skuKey) {
for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || "");
if (isABStoreLabel(lab)) return true;
}
return false;
for (const r of allRows) {
if (keySkuForRow(r) !== skuKey) continue;
const lab = String(r.storeLabel || r.store || "");
if (isABStoreLabel(lab)) return true;
}
return false;
}
function scoreCanonical(allRows, skuKey) {
const s = String(skuKey || "");
const real = isRealSkuKey(s) ? 1 : 0;
const ab = skuIsAB(allRows, s) ? 1 : 0;
const bc = skuIsBC(allRows, s) ? 1 : 0;
const soft = isSoftSkuKey(s) ? 1 : 0;
const unk = isUnknownSkuKey2(s) ? 1 : 0;
const s = String(skuKey || "");
const real = isRealSkuKey(s) ? 1 : 0;
const ab = skuIsAB(allRows, s) ? 1 : 0;
const bc = skuIsBC(allRows, s) ? 1 : 0;
const soft = isSoftSkuKey(s) ? 1 : 0;
const unk = isUnknownSkuKey2(s) ? 1 : 0;
let base = 0;
if (real) base = 1000;
else if (soft) base = 200;
else if (!unk) base = 100;
else base = -1000;
let base = 0;
if (real) base = 1000;
else if (soft) base = 200;
else if (!unk) base = 100;
else base = -1000;
return base + ab * 25 - bc * 10;
return base + ab * 25 - bc * 10;
}
export function pickPreferredCanonical(allRows, skuKeys) {
let best = "";
let bestScore = -Infinity;
for (const k of skuKeys) {
const s = String(k || "").trim();
if (!s) continue;
const sc = scoreCanonical(allRows, s);
if (sc > bestScore) {
bestScore = sc;
best = s;
} else if (sc === bestScore && s && best && s < best) {
best = s;
}
}
return best;
let best = "";
let bestScore = -Infinity;
for (const k of skuKeys) {
const s = String(k || "").trim();
if (!s) continue;
const sc = scoreCanonical(allRows, s);
if (sc > bestScore) {
bestScore = sc;
best = s;
} else if (sc === bestScore && s && best && s < best) {
best = s;
}
}
return best;
}

View file

@ -1,78 +1,77 @@
// viz/app/linker/price.js
export function buildPricePenaltyForPair({ allAgg, rules, kPerGroup = 6 }) {
// canonSku -> sorted array of up to kPerGroup lowest prices
const groupPrices = new Map();
function insertPrice(arr, p) {
// keep sorted ascending, cap length
let i = 0;
while (i < arr.length && arr[i] <= p) i++;
arr.splice(i, 0, p);
if (arr.length > kPerGroup) arr.length = kPerGroup;
}
for (const it of allAgg || []) {
if (!it) continue;
const sku = String(it.sku || "");
if (!sku) continue;
const p = it.cheapestPriceNum;
if (p == null || !(p > 0)) continue;
const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku);
let arr = groupPrices.get(canon);
if (!arr) groupPrices.set(canon, (arr = []));
insertPrice(arr, p);
}
function bestRelativeGap(prA, prB) {
// min |a-b| / min(a,b)
let best = Infinity;
for (let i = 0; i < prA.length; i++) {
const a = prA[i];
for (let j = 0; j < prB.length; j++) {
const b = prB[j];
const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b));
if (gap < best) best = gap;
if (best <= 0.001) return best;
}
}
return best;
}
function gapToMultiplier(gap) {
// gap = 0.40 => 40% relative difference
// <=35%: no penalty
// 35-50%: ease down to ~0.75
// >50%: continue down gently, floor at 0.35
if (!(gap >= 0)) return 1.0;
if (gap <= 0.35) return 1.0;
if (gap <= 0.50) {
const t = (gap - 0.35) / 0.15; // 0..1
return 1.0 - 0.25 * t; // 1.00 -> 0.75
}
const m = 0.75 * (0.5 / gap);
return Math.max(0.35, m);
}
return function pricePenaltyForPair(aSku, bSku) {
const a = String(aSku || "");
const b = String(bSku || "");
if (!a || !b) return 1.0;
const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a);
const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b);
const prA = groupPrices.get(aCanon);
const prB = groupPrices.get(bCanon);
if (!prA || !prB || !prA.length || !prB.length) return 1.0;
const gap = bestRelativeGap(prA, prB);
if (!isFinite(gap)) return 1.0;
return gapToMultiplier(gap);
};
}
// canonSku -> sorted array of up to kPerGroup lowest prices
const groupPrices = new Map();
function insertPrice(arr, p) {
// keep sorted ascending, cap length
let i = 0;
while (i < arr.length && arr[i] <= p) i++;
arr.splice(i, 0, p);
if (arr.length > kPerGroup) arr.length = kPerGroup;
}
for (const it of allAgg || []) {
if (!it) continue;
const sku = String(it.sku || "");
if (!sku) continue;
const p = it.cheapestPriceNum;
if (p == null || !(p > 0)) continue;
const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku);
let arr = groupPrices.get(canon);
if (!arr) groupPrices.set(canon, (arr = []));
insertPrice(arr, p);
}
function bestRelativeGap(prA, prB) {
// min |a-b| / min(a,b)
let best = Infinity;
for (let i = 0; i < prA.length; i++) {
const a = prA[i];
for (let j = 0; j < prB.length; j++) {
const b = prB[j];
const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b));
if (gap < best) best = gap;
if (best <= 0.001) return best;
}
}
return best;
}
function gapToMultiplier(gap) {
// gap = 0.40 => 40% relative difference
// <=35%: no penalty
// 35-50%: ease down to ~0.75
// >50%: continue down gently, floor at 0.35
if (!(gap >= 0)) return 1.0;
if (gap <= 0.35) return 1.0;
if (gap <= 0.5) {
const t = (gap - 0.35) / 0.15; // 0..1
return 1.0 - 0.25 * t; // 1.00 -> 0.75
}
const m = 0.75 * (0.5 / gap);
return Math.max(0.35, m);
}
return function pricePenaltyForPair(aSku, bSku) {
const a = String(aSku || "");
const b = String(bSku || "");
if (!a || !b) return 1.0;
const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a);
const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b);
const prA = groupPrices.get(aCanon);
const prB = groupPrices.get(bCanon);
if (!prA || !prB || !prA.length || !prB.length) return 1.0;
const gap = bestRelativeGap(prA, prB);
if (!isFinite(gap)) return 1.0;
return gapToMultiplier(gap);
};
}

View file

@ -3,267 +3,288 @@ import { tokenizeQuery, normSearchText } from "../sku.js";
// Ignore ultra-common / low-signal tokens in bottle names.
const SIM_STOP_TOKENS = new Set([
"the","a","an","and","of","to","in","for","with",
"year","years","yr","yrs","old",
"whisky","whiskey","scotch","single","malt","cask","finish","edition","release","batch","strength","abv","proof",
"anniversary",
"the",
"a",
"an",
"and",
"of",
"to",
"in",
"for",
"with",
"year",
"years",
"yr",
"yrs",
"old",
"whisky",
"whiskey",
"scotch",
"single",
"malt",
"cask",
"finish",
"edition",
"release",
"batch",
"strength",
"abv",
"proof",
"anniversary",
]);
const SMWS_WORD_RE = /\bsmws\b/i;
const SMWS_CODE_RE = /\b(\d{1,3}\.\d{1,4})\b/;
export function smwsKeyFromName(name) {
const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE);
return m ? m[1] : "";
const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE);
return m ? m[1] : "";
}
const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;
export function numKey(t) {
const s = String(t || "").trim().toLowerCase();
if (!s) return "";
if (/^\d+$/.test(s)) return s;
const m = s.match(ORDINAL_RE);
return m ? m[1] : "";
const s = String(t || "")
.trim()
.toLowerCase();
if (!s) return "";
if (/^\d+$/.test(s)) return s;
const m = s.match(ORDINAL_RE);
return m ? m[1] : "";
}
function isNumberToken(t) {
return !!numKey(t);
return !!numKey(t);
}
export function extractAgeFromText(normName) {
const s = String(normName || "");
if (!s) return "";
const s = String(normName || "");
if (!s) return "";
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
if (m && m[1]) return String(parseInt(m[1], 10));
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
if (m && m[1]) return String(parseInt(m[1], 10));
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
if (m2 && m2[1]) return String(parseInt(m2[1], 10));
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
if (m2 && m2[1]) return String(parseInt(m2[1], 10));
return "";
return "";
}
export function filterSimTokens(tokens) {
const out = [];
const seen = new Set();
const out = [];
const seen = new Set();
const SIM_EQUIV = new Map([
["years", "yr"],
["year", "yr"],
["yrs", "yr"],
["yr", "yr"],
["whiskey", "whisky"],
["whisky", "whisky"],
["bourbon", "bourbon"],
]);
const SIM_EQUIV = new Map([
["years", "yr"],
["year", "yr"],
["yrs", "yr"],
["yr", "yr"],
["whiskey", "whisky"],
["whisky", "whisky"],
["bourbon", "bourbon"],
]);
const VOL_UNIT = new Set(["ml","l","cl","oz","liter","liters","litre","litres"]);
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0%
const VOL_UNIT = new Set(["ml", "l", "cl", "oz", "liter", "liters", "litre", "litres"]);
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0%
const arr = Array.isArray(tokens) ? tokens : [];
const arr = Array.isArray(tokens) ? tokens : [];
for (let i = 0; i < arr.length; i++) {
const raw = arr[i];
let t = String(raw || "").trim().toLowerCase();
if (!t) continue;
for (let i = 0; i < arr.length; i++) {
const raw = arr[i];
let t = String(raw || "")
.trim()
.toLowerCase();
if (!t) continue;
if (!/[a-z0-9]/i.test(t)) continue;
if (!/[a-z0-9]/i.test(t)) continue;
if (VOL_INLINE_RE.test(t)) continue;
if (PCT_INLINE_RE.test(t)) continue;
if (VOL_INLINE_RE.test(t)) continue;
if (PCT_INLINE_RE.test(t)) continue;
t = SIM_EQUIV.get(t) || t;
t = SIM_EQUIV.get(t) || t;
const nk = numKey(t);
if (nk) t = nk;
const nk = numKey(t);
if (nk) t = nk;
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
if (/^\d+(?:\.\d+)?$/.test(t)) {
const next = String(arr[i + 1] || "").trim().toLowerCase();
const nextNorm = SIM_EQUIV.get(next) || next;
if (VOL_UNIT.has(nextNorm)) {
i++;
continue;
}
}
if (/^\d+(?:\.\d+)?$/.test(t)) {
const next = String(arr[i + 1] || "")
.trim()
.toLowerCase();
const nextNorm = SIM_EQUIV.get(next) || next;
if (VOL_UNIT.has(nextNorm)) {
i++;
continue;
}
}
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
if (seen.has(t)) continue;
seen.add(t);
out.push(t);
}
if (seen.has(t)) continue;
seen.add(t);
out.push(t);
}
return out;
return out;
}
export function numberMismatchPenalty(aTokens, bTokens) {
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
if (!aNums.size || !bNums.size) return 1.0;
for (const n of aNums) if (bNums.has(n)) return 1.0;
return 0.28;
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
if (!aNums.size || !bNums.size) return 1.0;
for (const n of aNums) if (bNums.has(n)) return 1.0;
return 0.28;
}
export function levenshtein(a, b) {
a = String(a || "");
b = String(b || "");
const n = a.length, m = b.length;
if (!n) return m;
if (!m) return n;
a = String(a || "");
b = String(b || "");
const n = a.length,
m = b.length;
if (!n) return m;
if (!m) return n;
const dp = new Array(m + 1);
for (let j = 0; j <= m; j++) dp[j] = j;
const dp = new Array(m + 1);
for (let j = 0; j <= m; j++) dp[j] = j;
for (let i = 1; i <= n; i++) {
let prev = dp[0];
dp[0] = i;
const ca = a.charCodeAt(i - 1);
for (let j = 1; j <= m; j++) {
const tmp = dp[j];
const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
prev = tmp;
}
}
return dp[m];
for (let i = 1; i <= n; i++) {
let prev = dp[0];
dp[0] = i;
const ca = a.charCodeAt(i - 1);
for (let j = 1; j <= m; j++) {
const tmp = dp[j];
const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
prev = tmp;
}
}
return dp[m];
}
export function tokenContainmentScore(aTokens, bTokens) {
const A = filterSimTokens(aTokens || []);
const B = filterSimTokens(bTokens || []);
if (!A.length || !B.length) return 0;
const A = filterSimTokens(aTokens || []);
const B = filterSimTokens(bTokens || []);
if (!A.length || !B.length) return 0;
const aSet = new Set(A);
const bSet = new Set(B);
const aSet = new Set(A);
const bSet = new Set(B);
const small = aSet.size <= bSet.size ? aSet : bSet;
const big = aSet.size <= bSet.size ? bSet : aSet;
const small = aSet.size <= bSet.size ? aSet : bSet;
const big = aSet.size <= bSet.size ? bSet : aSet;
let hit = 0;
for (const t of small) if (big.has(t)) hit++;
let hit = 0;
for (const t of small) if (big.has(t)) hit++;
const recall = hit / Math.max(1, small.size);
const precision = hit / Math.max(1, big.size);
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
const recall = hit / Math.max(1, small.size);
const precision = hit / Math.max(1, big.size);
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
return f1;
return f1;
}
export function similarityScore(aName, bName) {
const a = normSearchText(aName);
const b = normSearchText(bName);
if (!a || !b) return 0;
const a = normSearchText(aName);
const b = normSearchText(bName);
if (!a || !b) return 0;
const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge;
const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge;
const aToksRaw = tokenizeQuery(a);
const bToksRaw = tokenizeQuery(b);
const aToksRaw = tokenizeQuery(a);
const bToksRaw = tokenizeQuery(b);
const aToks = filterSimTokens(aToksRaw);
const bToks = filterSimTokens(bToksRaw);
if (!aToks.length || !bToks.length) return 0;
const aToks = filterSimTokens(aToksRaw);
const bToks = filterSimTokens(bToksRaw);
if (!aToks.length || !bToks.length) return 0;
const contain = tokenContainmentScore(aToksRaw, bToksRaw);
const contain = tokenContainmentScore(aToksRaw, bToksRaw);
const aFirst = aToks[0] || "";
const bFirst = bToks[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const aFirst = aToks[0] || "";
const bFirst = bToks[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const A = new Set(aToks.slice(1));
const B = new Set(bToks.slice(1));
let inter = 0;
for (const w of A) if (B.has(w)) inter++;
const denom = Math.max(1, Math.max(A.size, B.size));
const overlapTail = inter / denom;
const A = new Set(aToks.slice(1));
const B = new Set(bToks.slice(1));
let inter = 0;
for (const w of A) if (B.has(w)) inter++;
const denom = Math.max(1, Math.max(A.size, B.size));
const overlapTail = inter / denom;
const d = levenshtein(a, b);
const maxLen = Math.max(1, Math.max(a.length, b.length));
const levSim = 1 - d / maxLen;
const d = levenshtein(a, b);
const maxLen = Math.max(1, Math.max(a.length, b.length));
const levSim = 1 - d / maxLen;
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
const smallN = Math.min(aToks.length, bToks.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
const smallN = Math.min(aToks.length, bToks.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
const numGate = numberMismatchPenalty(aToks, bToks);
const numGate = numberMismatchPenalty(aToks, bToks);
let s =
numGate *
(firstMatch * 3.0 +
overlapTail * 2.2 * gate +
levSim * (firstMatch ? 1.0 : (0.10 + 0.70 * contain)));
let s = numGate * (firstMatch * 3.0 + overlapTail * 2.2 * gate + levSim * (firstMatch ? 1.0 : 0.1 + 0.7 * contain));
if (ageMatch) s *= 2.2;
else if (ageMismatch) s *= 0.18;
if (ageMatch) s *= 2.2;
else if (ageMismatch) s *= 0.18;
s *= 1 + 0.9 * contain;
s *= 1 + 0.9 * contain;
return s;
return s;
}
export function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
const aTokensRaw = aTokens || [];
const bTokensRaw = bTokens || [];
const aTokensRaw = aTokens || [];
const bTokensRaw = bTokens || [];
const aTokF = filterSimTokens(aTokensRaw);
const bTokF = filterSimTokens(bTokensRaw);
if (!aTokF.length || !bTokF.length) return 0;
const aTokF = filterSimTokens(aTokensRaw);
const bTokF = filterSimTokens(bTokensRaw);
if (!aTokF.length || !bTokF.length) return 0;
const a = String(aNormName || "");
const b = String(bNormName || "");
const a = String(aNormName || "");
const b = String(bNormName || "");
const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge;
const aAge = extractAgeFromText(a);
const bAge = extractAgeFromText(b);
const ageBoth = !!(aAge && bAge);
const ageMatch = ageBoth && aAge === bAge;
const ageMismatch = ageBoth && aAge !== bAge;
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
const aFirst = aTokF[0] || "";
const bFirst = bTokF[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const aFirst = aTokF[0] || "";
const bFirst = bTokF[0] || "";
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
const aTail = aTokF.slice(1);
const bTail = bTokF.slice(1);
const aTail = aTokF.slice(1);
const bTail = bTokF.slice(1);
let inter = 0;
const bSet = new Set(bTail);
for (const t of aTail) if (bSet.has(t)) inter++;
let inter = 0;
const bSet = new Set(bTail);
for (const t of aTail) if (bSet.has(t)) inter++;
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
const overlapTail = inter / denom;
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
const overlapTail = inter / denom;
const pref =
firstMatch &&
a.slice(0, 10) &&
b.slice(0, 10) &&
a.slice(0, 10) === b.slice(0, 10)
? 0.2
: 0;
const pref = firstMatch && a.slice(0, 10) && b.slice(0, 10) && a.slice(0, 10) === b.slice(0, 10) ? 0.2 : 0;
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
const smallN = Math.min(aTokF.length, bTokF.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
const smallN = Math.min(aTokF.length, bTokF.length);
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
const numGate = numberMismatchPenalty(aTokF, bTokF);
const numGate = numberMismatchPenalty(aTokF, bTokF);
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
if (ageMatch) s *= 2.0;
else if (ageMismatch) s *= 0.2;
if (ageMatch) s *= 2.0;
else if (ageMismatch) s *= 0.2;
s *= 1 + 0.9 * contain;
s *= 1 + 0.9 * contain;
return s;
return s;
}

View file

@ -4,43 +4,43 @@ import { keySkuForRow } from "../sku.js";
const SIZE_TOLERANCE_ML = 8;
export function parseSizesMlFromText(text) {
const s = String(text || "").toLowerCase();
if (!s) return [];
const s = String(text || "").toLowerCase();
if (!s) return [];
const out = new Set();
const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g;
const out = new Set();
const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g;
let m;
while ((m = re.exec(s))) {
const val = parseFloat(m[1]);
const unit = m[2];
if (!isFinite(val) || val <= 0) continue;
let m;
while ((m = re.exec(s))) {
const val = parseFloat(m[1]);
const unit = m[2];
if (!isFinite(val) || val <= 0) continue;
let ml = 0;
if (unit === "ml") ml = Math.round(val);
else if (unit === "cl") ml = Math.round(val * 10);
else ml = Math.round(val * 1000);
let ml = 0;
if (unit === "ml") ml = Math.round(val);
else if (unit === "cl") ml = Math.round(val * 10);
else ml = Math.round(val * 1000);
if (ml >= 50 && ml <= 5000) out.add(ml);
}
if (ml >= 50 && ml <= 5000) out.add(ml);
}
return Array.from(out);
return Array.from(out);
}
function sizeSetsMatch(aSet, bSet) {
if (!aSet?.size || !bSet?.size) return false;
for (const a of aSet) {
for (const b of bSet) {
if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true;
}
}
return false;
if (!aSet?.size || !bSet?.size) return false;
for (const a of aSet) {
for (const b of bSet) {
if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true;
}
}
return false;
}
export function sizePenalty(aSet, bSet) {
if (!aSet?.size || !bSet?.size) return 1.0;
if (sizeSetsMatch(aSet, bSet)) return 1.0;
return 0.08;
if (!aSet?.size || !bSet?.size) return 1.0;
if (sizeSetsMatch(aSet, bSet)) return 1.0;
return 0.08;
}
/**
@ -48,61 +48,61 @@ export function sizePenalty(aSet, bSet) {
* This keeps linker_page.js clean and makes cache rebuild explicit when rules change.
*/
export function buildSizePenaltyForPair({ allRows, allAgg, rules }) {
const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml>
const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml>
function ensureSkuSet(k) {
let set = SKU_SIZE_CACHE.get(k);
if (!set) SKU_SIZE_CACHE.set(k, (set = new Set()));
return set;
}
function ensureSkuSet(k) {
let set = SKU_SIZE_CACHE.get(k);
if (!set) SKU_SIZE_CACHE.set(k, (set = new Set()));
return set;
}
for (const r of allRows) {
if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
for (const r of allRows) {
if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
const name = r.name || r.title || r.productName || "";
const sizes = parseSizesMlFromText(name);
if (!sizes.length) continue;
const name = r.name || r.title || r.productName || "";
const sizes = parseSizesMlFromText(name);
if (!sizes.length) continue;
const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x);
}
const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x);
}
for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim();
if (!skuKey || !it?.name) continue;
const sizes = parseSizesMlFromText(it.name);
if (!sizes.length) continue;
for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim();
if (!skuKey || !it?.name) continue;
const sizes = parseSizesMlFromText(it.name);
if (!sizes.length) continue;
const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x);
}
const set = ensureSkuSet(skuKey);
for (const x of sizes) set.add(x);
}
const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml>
const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml>
function ensureCanonSet(k) {
let set = CANON_SIZE_CACHE.get(k);
if (!set) CANON_SIZE_CACHE.set(k, (set = new Set()));
return set;
}
function ensureCanonSet(k) {
let set = CANON_SIZE_CACHE.get(k);
if (!set) CANON_SIZE_CACHE.set(k, (set = new Set()));
return set;
}
for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim();
if (!skuKey) continue;
for (const it of allAgg) {
const skuKey = String(it?.sku || "").trim();
if (!skuKey) continue;
const canon = String(rules.canonicalSku(skuKey) || skuKey);
const canonSet = ensureCanonSet(canon);
const canon = String(rules.canonicalSku(skuKey) || skuKey);
const canonSet = ensureCanonSet(canon);
const skuSet = SKU_SIZE_CACHE.get(skuKey);
if (skuSet) for (const x of skuSet) canonSet.add(x);
}
const skuSet = SKU_SIZE_CACHE.get(skuKey);
if (skuSet) for (const x of skuSet) canonSet.add(x);
}
return function sizePenaltyForPair(aSku, bSku) {
const aCanon = String(rules.canonicalSku(String(aSku || "")) || "");
const bCanon = String(rules.canonicalSku(String(bSku || "")) || "");
const A = aCanon ? (CANON_SIZE_CACHE.get(aCanon) || new Set()) : new Set();
const B = bCanon ? (CANON_SIZE_CACHE.get(bCanon) || new Set()) : new Set();
return sizePenalty(A, B);
};
return function sizePenaltyForPair(aSku, bSku) {
const aCanon = String(rules.canonicalSku(String(aSku || "")) || "");
const bCanon = String(rules.canonicalSku(String(bSku || "")) || "");
const A = aCanon ? CANON_SIZE_CACHE.get(aCanon) || new Set() : new Set();
const B = bCanon ? CANON_SIZE_CACHE.get(bCanon) || new Set() : new Set();
return sizePenalty(A, B);
};
}

View file

@ -1,43 +1,42 @@
// viz/app/linker/store_cache.js
function canonKeyForSku(rules, skuKey) {
const s = String(skuKey || "").trim();
if (!s) return "";
return String(rules.canonicalSku(s) || s);
}
export function buildCanonStoreCache(allAgg, rules) {
const m = new Map(); // canonSku -> Set<storeLabel>
for (const it of allAgg) {
if (!it) continue;
const skuKey = String(it.sku || "").trim();
if (!skuKey) continue;
const canon = String(rules.canonicalSku(skuKey) || skuKey);
let set = m.get(canon);
if (!set) m.set(canon, (set = new Set()));
const stores = it.stores;
if (stores && stores.size) for (const s of stores) set.add(s);
}
return m;
}
function canonStoresForSku(rules, canonStoreCache, skuKey) {
const canon = canonKeyForSku(rules, skuKey);
return canon ? canonStoreCache.get(canon) || new Set() : new Set();
}
export function makeSameStoreCanonFn(rules, canonStoreCache) {
return function sameStoreCanon(aSku, bSku) {
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
if (!A.size || !B.size) return false;
for (const s of A) if (B.has(s)) return true;
return false;
};
}
const s = String(skuKey || "").trim();
if (!s) return "";
return String(rules.canonicalSku(s) || s);
}
export function buildCanonStoreCache(allAgg, rules) {
const m = new Map(); // canonSku -> Set<storeLabel>
for (const it of allAgg) {
if (!it) continue;
const skuKey = String(it.sku || "").trim();
if (!skuKey) continue;
const canon = String(rules.canonicalSku(skuKey) || skuKey);
let set = m.get(canon);
if (!set) m.set(canon, (set = new Set()));
const stores = it.stores;
if (stores && stores.size) for (const s of stores) set.add(s);
}
return m;
}
function canonStoresForSku(rules, canonStoreCache, skuKey) {
const canon = canonKeyForSku(rules, skuKey);
return canon ? canonStoreCache.get(canon) || new Set() : new Set();
}
export function makeSameStoreCanonFn(rules, canonStoreCache) {
return function sameStoreCanon(aSku, bSku) {
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
if (!A.size || !B.size) return false;
for (const s of A) if (B.has(s)) return true;
return false;
};
}

File diff suppressed because it is too large Load diff

View file

@ -2,46 +2,46 @@
import { keySkuForRow } from "../sku.js";
function urlQuality(r) {
const u = String(r?.url || "").trim();
if (!u) return -1;
let s = 0;
s += u.length;
if (/\bproduct\/\d+\//.test(u)) s += 50;
if (/[a-z0-9-]{8,}/i.test(u)) s += 10;
return s;
const u = String(r?.url || "").trim();
if (!u) return -1;
let s = 0;
s += u.length;
if (/\bproduct\/\d+\//.test(u)) s += 50;
if (/[a-z0-9-]{8,}/i.test(u)) s += 10;
return s;
}
export function buildUrlBySkuStore(allRows) {
const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url)
const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url)
for (const r of allRows) {
if (!r || r.removed) continue;
for (const r of allRows) {
if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim();
if (!storeLabel || !url) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim();
if (!storeLabel || !url) continue;
let m = URL_BY_SKU_STORE.get(skuKey);
if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map()));
let m = URL_BY_SKU_STORE.get(skuKey);
if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map()));
const prevUrl = m.get(storeLabel);
if (!prevUrl) {
m.set(storeLabel, url);
continue;
}
const prevUrl = m.get(storeLabel);
if (!prevUrl) {
m.set(storeLabel, url);
continue;
}
const prevScore = urlQuality({ url: prevUrl });
const nextScore = urlQuality(r);
const prevScore = urlQuality({ url: prevUrl });
const nextScore = urlQuality(r);
if (nextScore > prevScore) {
m.set(storeLabel, url);
} else if (nextScore === prevScore && url < prevUrl) {
m.set(storeLabel, url);
}
}
if (nextScore > prevScore) {
m.set(storeLabel, url);
} else if (nextScore === prevScore && url < prevUrl) {
m.set(storeLabel, url);
}
}
return URL_BY_SKU_STORE;
return URL_BY_SKU_STORE;
}

File diff suppressed because it is too large Load diff

View file

@ -15,23 +15,23 @@ import { renderStore } from "./store_page.js";
import { renderStats, destroyStatsChart } from "./stats_page.js";
function route() {
const $app = document.getElementById("app");
if (!$app) return;
const $app = document.getElementById("app");
if (!$app) return;
// always clean up chart when navigating
destroyChart();
destroyStatsChart();
// always clean up chart when navigating
destroyChart();
destroyStatsChart();
const h = location.hash || "#/";
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
const h = location.hash || "#/";
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
if (parts.length === 0) return renderSearch($app);
if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1]));
if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1]));
if (parts[0] === "link") return renderSkuLinker($app);
if (parts[0] === "stats") return renderStats($app);
if (parts.length === 0) return renderSearch($app);
if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1]));
if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1]));
if (parts[0] === "link") return renderSkuLinker($app);
if (parts[0] === "stats") return renderStats($app);
return renderSearch($app);
return renderSearch($app);
}
window.addEventListener("hashchange", route);

View file

@ -5,225 +5,225 @@ import { applyPendingToMeta } from "./pending.js";
let CACHED = null;
export function clearSkuRulesCache() {
CACHED = null;
CACHED = null;
}
function normalizeImplicitSkuKey(k) {
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
const s = String(k || "").trim();
const m = s.match(/^id:(\d{1,6})$/i);
if (m) return String(m[1]).padStart(6, "0");
return s;
}
function canonicalPairKey(a, b) {
const x = normalizeImplicitSkuKey(a);
const y = normalizeImplicitSkuKey(b);
if (!x || !y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
const x = normalizeImplicitSkuKey(a);
const y = normalizeImplicitSkuKey(b);
if (!x || !y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
}
function buildForwardMap(links) {
// Keep this for reference/debug; grouping no longer depends on direction.
const m = new Map();
for (const x of Array.isArray(links) ? links : []) {
const fromSku = normalizeImplicitSkuKey(x?.fromSku);
const toSku = normalizeImplicitSkuKey(x?.toSku);
if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku);
}
return m;
// Keep this for reference/debug; grouping no longer depends on direction.
const m = new Map();
for (const x of Array.isArray(links) ? links : []) {
const fromSku = normalizeImplicitSkuKey(x?.fromSku);
const toSku = normalizeImplicitSkuKey(x?.toSku);
if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku);
}
return m;
}
function buildIgnoreSet(ignores) {
const s = new Set();
for (const x of Array.isArray(ignores) ? ignores : []) {
const a = String(x?.skuA || x?.a || x?.left || "").trim();
const b = String(x?.skuB || x?.b || x?.right || "").trim();
const k = canonicalPairKey(a, b);
if (k) s.add(k);
}
return s;
const s = new Set();
for (const x of Array.isArray(ignores) ? ignores : []) {
const a = String(x?.skuA || x?.a || x?.left || "").trim();
const b = String(x?.skuB || x?.b || x?.right || "").trim();
const k = canonicalPairKey(a, b);
if (k) s.add(k);
}
return s;
}
/* ---------------- Union-Find grouping (hardened) ---------------- */
class DSU {
constructor() {
this.parent = new Map();
this.rank = new Map();
}
_add(x) {
if (!this.parent.has(x)) {
this.parent.set(x, x);
this.rank.set(x, 0);
}
}
find(x) {
x = String(x || "").trim();
if (!x) return "";
this._add(x);
let p = this.parent.get(x);
if (p !== x) {
p = this.find(p);
this.parent.set(x, p);
}
return p;
}
union(a, b) {
a = String(a || "").trim();
b = String(b || "").trim();
if (!a || !b || a === b) return;
const ra = this.find(a);
const rb = this.find(b);
if (!ra || !rb || ra === rb) return;
constructor() {
this.parent = new Map();
this.rank = new Map();
}
_add(x) {
if (!this.parent.has(x)) {
this.parent.set(x, x);
this.rank.set(x, 0);
}
}
find(x) {
x = String(x || "").trim();
if (!x) return "";
this._add(x);
let p = this.parent.get(x);
if (p !== x) {
p = this.find(p);
this.parent.set(x, p);
}
return p;
}
union(a, b) {
a = String(a || "").trim();
b = String(b || "").trim();
if (!a || !b || a === b) return;
const ra = this.find(a);
const rb = this.find(b);
if (!ra || !rb || ra === rb) return;
const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0;
const rka = this.rank.get(ra) || 0;
const rkb = this.rank.get(rb) || 0;
if (rka < rkb) {
this.parent.set(ra, rb);
} else if (rkb < rka) {
this.parent.set(rb, ra);
} else {
this.parent.set(rb, ra);
this.rank.set(ra, rka + 1);
}
}
if (rka < rkb) {
this.parent.set(ra, rb);
} else if (rkb < rka) {
this.parent.set(rb, ra);
} else {
this.parent.set(rb, ra);
this.rank.set(ra, rka + 1);
}
}
}
function isUnknownSkuKey(key) {
return String(key || "").startsWith("u:");
return String(key || "").startsWith("u:");
}
function isNumericSku(key) {
return /^\d+$/.test(String(key || "").trim());
return /^\d+$/.test(String(key || "").trim());
}
function compareSku(a, b) {
// Stable ordering to choose a canonical representative.
// Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex.
a = String(a || "").trim();
b = String(b || "").trim();
if (a === b) return 0;
// Stable ordering to choose a canonical representative.
// Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex.
a = String(a || "").trim();
b = String(b || "").trim();
if (a === b) return 0;
const aUnknown = isUnknownSkuKey(a);
const bUnknown = isUnknownSkuKey(b);
if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first
const aUnknown = isUnknownSkuKey(a);
const bUnknown = isUnknownSkuKey(b);
if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first
const aNum = isNumericSku(a);
const bNum = isNumericSku(b);
if (aNum && bNum) {
// compare as integers (safe: these are small SKU strings)
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
const aNum = isNumericSku(a);
const bNum = isNumericSku(b);
if (aNum && bNum) {
// compare as integers (safe: these are small SKU strings)
const na = Number(a);
const nb = Number(b);
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
}
// fallback lex
return a < b ? -1 : 1;
// fallback lex
return a < b ? -1 : 1;
}
function buildGroupsAndCanonicalMap(links) {
const dsu = new DSU();
const all = new Set();
const dsu = new DSU();
const all = new Set();
for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue;
all.add(a);
all.add(b);
for (const x of Array.isArray(links) ? links : []) {
const a = normalizeImplicitSkuKey(x?.fromSku);
const b = normalizeImplicitSkuKey(x?.toSku);
if (!a || !b) continue;
all.add(a);
all.add(b);
// IMPORTANT: union is undirected for grouping (hardened vs cycles)
dsu.union(a, b);
}
// IMPORTANT: union is undirected for grouping (hardened vs cycles)
dsu.union(a, b);
}
// root -> Set(members)
const groupsByRoot = new Map();
for (const s of all) {
const r = dsu.find(s);
if (!r) continue;
let set = groupsByRoot.get(r);
if (!set) groupsByRoot.set(r, (set = new Set()));
set.add(s);
}
// root -> Set(members)
const groupsByRoot = new Map();
for (const s of all) {
const r = dsu.find(s);
if (!r) continue;
let set = groupsByRoot.get(r);
if (!set) groupsByRoot.set(r, (set = new Set()));
set.add(s);
}
// Choose a canonical representative per group
const repByRoot = new Map();
for (const [root, members] of groupsByRoot.entries()) {
const arr = Array.from(members);
arr.sort(compareSku);
const rep = arr[0] || root;
repByRoot.set(root, rep);
}
// Choose a canonical representative per group
const repByRoot = new Map();
for (const [root, members] of groupsByRoot.entries()) {
const arr = Array.from(members);
arr.sort(compareSku);
const rep = arr[0] || root;
repByRoot.set(root, rep);
}
// sku -> canonical rep
const canonBySku = new Map();
// canonical rep -> Set(members) (what the rest of the app uses)
const groupsByCanon = new Map();
// sku -> canonical rep
const canonBySku = new Map();
// canonical rep -> Set(members) (what the rest of the app uses)
const groupsByCanon = new Map();
for (const [root, members] of groupsByRoot.entries()) {
const rep = repByRoot.get(root) || root;
let g = groupsByCanon.get(rep);
if (!g) groupsByCanon.set(rep, (g = new Set([rep])));
for (const s of members) {
canonBySku.set(s, rep);
g.add(s);
}
}
for (const [root, members] of groupsByRoot.entries()) {
const rep = repByRoot.get(root) || root;
let g = groupsByCanon.get(rep);
if (!g) groupsByCanon.set(rep, (g = new Set([rep])));
for (const s of members) {
canonBySku.set(s, rep);
g.add(s);
}
}
return { canonBySku, groupsByCanon };
return { canonBySku, groupsByCanon };
}
export async function loadSkuRules() {
if (CACHED) return CACHED;
if (CACHED) return CACHED;
let meta = await loadSkuMetaBestEffort();
let meta = await loadSkuMetaBestEffort();
// On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage
if (!isLocalWriteMode()) {
meta = applyPendingToMeta(meta);
}
// On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage
if (!isLocalWriteMode()) {
meta = applyPendingToMeta(meta);
}
const links = Array.isArray(meta?.links) ? meta.links : [];
const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
const links = Array.isArray(meta?.links) ? meta.links : [];
const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
// keep forwardMap for visibility/debug; grouping uses union-find
const forwardMap = buildForwardMap(links);
// keep forwardMap for visibility/debug; grouping uses union-find
const forwardMap = buildForwardMap(links);
const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links);
const ignoreSet = buildIgnoreSet(ignores);
const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links);
const ignoreSet = buildIgnoreSet(ignores);
function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku);
if (!s) return s;
return canonBySku.get(s) || s;
}
function canonicalSku(sku) {
const s = normalizeImplicitSkuKey(sku);
if (!s) return s;
return canonBySku.get(s) || s;
}
function groupForCanonical(toSku) {
const canon = canonicalSku(toSku);
const g = groupsByCanon.get(canon);
return g ? new Set(g) : new Set([canon]);
}
function groupForCanonical(toSku) {
const canon = canonicalSku(toSku);
const g = groupsByCanon.get(canon);
return g ? new Set(g) : new Set([canon]);
}
function isIgnoredPair(a, b) {
const k = canonicalPairKey(a, b);
return k ? ignoreSet.has(k) : false;
}
function isIgnoredPair(a, b) {
const k = canonicalPairKey(a, b);
return k ? ignoreSet.has(k) : false;
}
CACHED = {
links,
ignores,
forwardMap,
CACHED = {
links,
ignores,
forwardMap,
// "toGroups" retained name for compatibility with existing code
toGroups: groupsByCanon,
ignoreSet,
// "toGroups" retained name for compatibility with existing code
toGroups: groupsByCanon,
ignoreSet,
canonicalSku,
groupForCanonical,
isIgnoredPair,
canonicalPairKey,
};
canonicalSku,
groupForCanonical,
isIgnoredPair,
canonicalPairKey,
};
return CACHED;
return CACHED;
}

View file

@ -3,219 +3,217 @@ const LS_KEY = "stviz:v1:pendingSkuEdits";
const LS_SUBMITTED_KEY = "stviz:v1:submittedSkuEdits";
function safeParseJson(s) {
try {
return JSON.parse(String(s || ""));
} catch {
return null;
}
try {
return JSON.parse(String(s || ""));
} catch {
return null;
}
}
function normSku(s) {
return String(s || "").trim();
return String(s || "").trim();
}
function linkKey(fromSku, toSku) {
const f = normSku(fromSku);
const t = normSku(toSku);
if (!f || !t || f === t) return "";
return `${f}${t}`;
const f = normSku(fromSku);
const t = normSku(toSku);
if (!f || !t || f === t) return "";
return `${f}${t}`;
}
function pairKey(a, b) {
const x = normSku(a);
const y = normSku(b);
if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
const x = normSku(a);
const y = normSku(b);
if (!x || !y || x === y) return "";
return x < y ? `${x}|${y}` : `${y}|${x}`;
}
function loadEditsFromKey(key) {
const raw = (() => {
try {
return localStorage.getItem(key) || "";
} catch {
return "";
}
})();
const raw = (() => {
try {
return localStorage.getItem(key) || "";
} catch {
return "";
}
})();
const j = safeParseJson(raw);
const links = Array.isArray(j?.links) ? j.links : [];
const ignores = Array.isArray(j?.ignores) ? j.ignores : [];
const j = safeParseJson(raw);
const links = Array.isArray(j?.links) ? j.links : [];
const ignores = Array.isArray(j?.ignores) ? j.ignores : [];
return {
createdAt: String(j?.createdAt || ""),
links: links
.map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) }))
.filter((x) => linkKey(x.fromSku, x.toSku)),
ignores: ignores
.map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) }))
.filter((x) => pairKey(x.skuA, x.skuB)),
};
return {
createdAt: String(j?.createdAt || ""),
links: links
.map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) }))
.filter((x) => linkKey(x.fromSku, x.toSku)),
ignores: ignores
.map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) }))
.filter((x) => pairKey(x.skuA, x.skuB)),
};
}
function saveEditsToKey(key, edits) {
const out = {
createdAt: edits?.createdAt || new Date().toISOString(),
links: Array.isArray(edits?.links) ? edits.links : [],
ignores: Array.isArray(edits?.ignores) ? edits.ignores : [],
};
try {
localStorage.setItem(key, JSON.stringify(out));
} catch {}
return out;
const out = {
createdAt: edits?.createdAt || new Date().toISOString(),
links: Array.isArray(edits?.links) ? edits.links : [],
ignores: Array.isArray(edits?.ignores) ? edits.ignores : [],
};
try {
localStorage.setItem(key, JSON.stringify(out));
} catch {}
return out;
}
export function loadPendingEdits() {
return loadEditsFromKey(LS_KEY);
return loadEditsFromKey(LS_KEY);
}
export function savePendingEdits(edits) {
return saveEditsToKey(LS_KEY, edits);
return saveEditsToKey(LS_KEY, edits);
}
export function clearPendingEdits() {
try {
localStorage.removeItem(LS_KEY);
} catch {}
try {
localStorage.removeItem(LS_KEY);
} catch {}
}
export function loadSubmittedEdits() {
return loadEditsFromKey(LS_SUBMITTED_KEY);
return loadEditsFromKey(LS_SUBMITTED_KEY);
}
export function saveSubmittedEdits(edits) {
return saveEditsToKey(LS_SUBMITTED_KEY, edits);
return saveEditsToKey(LS_SUBMITTED_KEY, edits);
}
export function clearSubmittedEdits() {
try {
localStorage.removeItem(LS_SUBMITTED_KEY);
} catch {}
try {
localStorage.removeItem(LS_SUBMITTED_KEY);
} catch {}
}
export function pendingCounts() {
const e = loadPendingEdits();
return {
links: e.links.length,
ignores: e.ignores.length,
total: e.links.length + e.ignores.length,
};
const e = loadPendingEdits();
return {
links: e.links.length,
ignores: e.ignores.length,
total: e.links.length + e.ignores.length,
};
}
export function addPendingLink(fromSku, toSku) {
const f = normSku(fromSku);
const t = normSku(toSku);
const k = linkKey(f, t);
if (!k) return false;
const f = normSku(fromSku);
const t = normSku(toSku);
const k = linkKey(f, t);
if (!k) return false;
const pending = loadPendingEdits();
const submitted = loadSubmittedEdits();
const pending = loadPendingEdits();
const submitted = loadSubmittedEdits();
const seen = new Set(
[
...pending.links.map((x) => linkKey(x.fromSku, x.toSku)),
...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)),
].filter(Boolean)
);
const seen = new Set(
[
...pending.links.map((x) => linkKey(x.fromSku, x.toSku)),
...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)),
].filter(Boolean),
);
if (seen.has(k)) return false;
if (seen.has(k)) return false;
pending.links.push({ fromSku: f, toSku: t });
savePendingEdits(pending);
return true;
pending.links.push({ fromSku: f, toSku: t });
savePendingEdits(pending);
return true;
}
export function addPendingIgnore(skuA, skuB) {
const a = normSku(skuA);
const b = normSku(skuB);
const k = pairKey(a, b);
if (!k) return false;
const a = normSku(skuA);
const b = normSku(skuB);
const k = pairKey(a, b);
if (!k) return false;
const pending = loadPendingEdits();
const submitted = loadSubmittedEdits();
const pending = loadPendingEdits();
const submitted = loadSubmittedEdits();
const seen = new Set(
[
...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)),
...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)),
].filter(Boolean)
);
const seen = new Set(
[
...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)),
...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)),
].filter(Boolean),
);
if (seen.has(k)) return false;
if (seen.has(k)) return false;
pending.ignores.push({ skuA: a, skuB: b });
savePendingEdits(pending);
return true;
pending.ignores.push({ skuA: a, skuB: b });
savePendingEdits(pending);
return true;
}
// Merge PENDING + SUBMITTED into a meta object {links, ignores}
export function applyPendingToMeta(meta) {
const base = {
generatedAt: String(meta?.generatedAt || ""),
links: Array.isArray(meta?.links) ? meta.links.slice() : [],
ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [],
};
const base = {
generatedAt: String(meta?.generatedAt || ""),
links: Array.isArray(meta?.links) ? meta.links.slice() : [],
ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [],
};
const p0 = loadPendingEdits();
const p1 = loadSubmittedEdits();
const overlay = {
links: [...(p0.links || []), ...(p1.links || [])],
ignores: [...(p0.ignores || []), ...(p1.ignores || [])],
};
const p0 = loadPendingEdits();
const p1 = loadSubmittedEdits();
const overlay = {
links: [...(p0.links || []), ...(p1.links || [])],
ignores: [...(p0.ignores || []), ...(p1.ignores || [])],
};
// merge links (dedupe by from→to)
const seenL = new Set(
base.links
.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim()))
.filter(Boolean)
);
for (const x of overlay.links) {
const k = linkKey(x.fromSku, x.toSku);
if (!k || seenL.has(k)) continue;
seenL.add(k);
base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
}
// merge links (dedupe by from→to)
const seenL = new Set(
base.links.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim())).filter(Boolean),
);
for (const x of overlay.links) {
const k = linkKey(x.fromSku, x.toSku);
if (!k || seenL.has(k)) continue;
seenL.add(k);
base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
}
// merge ignores (dedupe by canonical pair key)
const seenI = new Set(
base.ignores
.map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim()))
.filter(Boolean)
);
for (const x of overlay.ignores) {
const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue;
seenI.add(k);
base.ignores.push({ skuA: x.skuA, skuB: x.skuB });
}
// merge ignores (dedupe by canonical pair key)
const seenI = new Set(
base.ignores
.map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim()))
.filter(Boolean),
);
for (const x of overlay.ignores) {
const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue;
seenI.add(k);
base.ignores.push({ skuA: x.skuA, skuB: x.skuB });
}
return base;
return base;
}
// Move everything from pending -> submitted, then clear pending.
// Returns the moved payload (what should be sent in PR/issue).
export function movePendingToSubmitted() {
const pending = loadPendingEdits();
if (!pending.links.length && !pending.ignores.length) return pending;
const pending = loadPendingEdits();
if (!pending.links.length && !pending.ignores.length) return pending;
const sub = loadSubmittedEdits();
const sub = loadSubmittedEdits();
const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean));
for (const x of pending.links) {
const k = linkKey(x.fromSku, x.toSku);
if (!k || seenL.has(k)) continue;
seenL.add(k);
sub.links.push({ fromSku: x.fromSku, toSku: x.toSku });
}
const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean));
for (const x of pending.links) {
const k = linkKey(x.fromSku, x.toSku);
if (!k || seenL.has(k)) continue;
seenL.add(k);
sub.links.push({ fromSku: x.fromSku, toSku: x.toSku });
}
const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean));
for (const x of pending.ignores) {
const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue;
seenI.add(k);
sub.ignores.push({ skuA: x.skuA, skuB: x.skuB });
}
const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean));
for (const x of pending.ignores) {
const k = pairKey(x.skuA, x.skuB);
if (!k || seenI.has(k)) continue;
seenI.add(k);
sub.ignores.push({ skuA: x.skuA, skuB: x.skuB });
}
saveSubmittedEdits(sub);
clearPendingEdits();
return pending;
saveSubmittedEdits(sub);
clearPendingEdits();
return pending;
}

View file

@ -1,21 +1,12 @@
import { esc, renderThumbHtml, prettyTs } from "./dom.js";
import {
tokenizeQuery,
matchesAllTokens,
displaySku,
keySkuForRow,
parsePriceToNumber,
} from "./sku.js";
import { tokenizeQuery, matchesAllTokens, displaySku, keySkuForRow, parsePriceToNumber } from "./sku.js";
import { loadIndex, loadRecent, loadSavedQuery, saveQuery } from "./state.js";
import { aggregateBySku } from "./catalog.js";
import { loadSkuRules } from "./mapping.js";
import {
smwsDistilleryCodesForQueryPrefix,
smwsDistilleryCodeFromName,
} from "./smws.js";
import { smwsDistilleryCodesForQueryPrefix, smwsDistilleryCodeFromName } from "./smws.js";
export function renderSearch($app) {
$app.innerHTML = `
$app.innerHTML = `
<div class="container">
<div class="header">
<!-- Row 1 -->
@ -50,123 +41,117 @@ export function renderSearch($app) {
</div>
`;
const $q = document.getElementById("q");
const $results = document.getElementById("results");
const $stores = document.getElementById("stores");
const $clearSearch = document.getElementById("clearSearch");
const $q = document.getElementById("q");
const $results = document.getElementById("results");
const $stores = document.getElementById("stores");
const $clearSearch = document.getElementById("clearSearch");
$q.value = loadSavedQuery();
$q.value = loadSavedQuery();
let aggBySku = new Map();
let allAgg = [];
let indexReady = false;
let aggBySku = new Map();
let allAgg = [];
let indexReady = false;
// canonicalSku -> storeLabel -> url
let URL_BY_SKU_STORE = new Map();
// canonicalSku -> storeLabel -> url
let URL_BY_SKU_STORE = new Map();
function buildUrlMap(listings, canonicalSkuFn) {
const out = new Map();
for (const r of Array.isArray(listings) ? listings : []) {
if (!r || r.removed) continue;
function buildUrlMap(listings, canonicalSkuFn) {
const out = new Map();
for (const r of Array.isArray(listings) ? listings : []) {
if (!r || r.removed) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
const skuKey = String(keySkuForRow(r) || "").trim();
if (!skuKey) continue;
const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey);
if (!sku) continue;
const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey);
if (!sku) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim();
if (!storeLabel || !url) continue;
const storeLabel = String(r.storeLabel || r.store || "").trim();
const url = String(r.url || "").trim();
if (!storeLabel || !url) continue;
let m = out.get(sku);
if (!m) out.set(sku, (m = new Map()));
if (!m.has(storeLabel)) m.set(storeLabel, url);
}
return out;
}
let m = out.get(sku);
if (!m) out.set(sku, (m = new Map()));
if (!m.has(storeLabel)) m.set(storeLabel, url);
}
return out;
}
function urlForAgg(it, storeLabel) {
const sku = String(it?.sku || "");
const s = String(storeLabel || "");
return URL_BY_SKU_STORE.get(sku)?.get(s) || "";
}
function urlForAgg(it, storeLabel) {
const sku = String(it?.sku || "");
const s = String(storeLabel || "");
return URL_BY_SKU_STORE.get(sku)?.get(s) || "";
}
function normStoreLabel(s) {
return String(s || "").trim();
}
function normStoreLabel(s) {
return String(s || "").trim();
}
function renderStoreButtons(listings) {
// include all stores seen (live or removed) so the selector is stable
const set = new Set();
for (const r of Array.isArray(listings) ? listings : []) {
const lab = normStoreLabel(r?.storeLabel || r?.store || "");
if (lab) set.add(lab);
}
const stores = Array.from(set).sort((a, b) => a.localeCompare(b));
function renderStoreButtons(listings) {
// include all stores seen (live or removed) so the selector is stable
const set = new Set();
for (const r of Array.isArray(listings) ? listings : []) {
const lab = normStoreLabel(r?.storeLabel || r?.store || "");
if (lab) set.add(lab);
}
const stores = Array.from(set).sort((a, b) => a.localeCompare(b));
if (!stores.length) {
$stores.innerHTML = "";
return;
}
if (!stores.length) {
$stores.innerHTML = "";
return;
}
const totalChars = stores.reduce((n, s) => n + s.length, 0);
const target = totalChars / 2;
const totalChars = stores.reduce((n, s) => n + s.length, 0);
const target = totalChars / 2;
let acc = 0;
let breakAt = stores.length;
let acc = 0;
let breakAt = stores.length;
for (let i = 0; i < stores.length; i++) {
acc += stores[i].length;
if (acc >= target) {
breakAt = i + 1;
break;
}
}
for (let i = 0; i < stores.length; i++) {
acc += stores[i].length;
if (acc >= target) {
breakAt = i + 1;
break;
}
}
$stores.innerHTML = stores
.map((s, i) => {
const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent(
s
)}">${esc(s)}</a>`;
const brk =
i === breakAt - 1 && stores.length > 1
? `<span class="storeBreak" aria-hidden="true"></span>`
: "";
return btn + brk;
})
.join("");
}
$stores.innerHTML = stores
.map((s, i) => {
const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent(s)}">${esc(s)}</a>`;
const brk =
i === breakAt - 1 && stores.length > 1 ? `<span class="storeBreak" aria-hidden="true"></span>` : "";
return btn + brk;
})
.join("");
}
function renderAggregates(items) {
if (!items.length) {
$results.innerHTML = `<div class="small">No matches.</div>`;
return;
}
function renderAggregates(items) {
if (!items.length) {
$results.innerHTML = `<div class="small">No matches.</div>`;
return;
}
const limited = items.slice(0, 80);
$results.innerHTML = limited
.map((it) => {
const storeCount = it.stores.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
const store = it.cheapestStoreLabel || [...it.stores][0] || "Store";
const limited = items.slice(0, 80);
$results.innerHTML = limited
.map((it) => {
const storeCount = it.stores.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
const store = it.cheapestStoreLabel || [...it.stores][0] || "Store";
// link must match the displayed store label
const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim();
const storeBadge = href
? `<a class="badge" href="${esc(
href
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
store
)}${esc(plus)}</a>`
: `<span class="badge">${esc(store)}${esc(plus)}</span>`;
// link must match the displayed store label
const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim();
const storeBadge = href
? `<a class="badge" href="${esc(
href,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
store,
)}${esc(plus)}</a>`
: `<span class="badge">${esc(store)}${esc(plus)}</span>`;
const skuLink = `#/link/?left=${encodeURIComponent(
String(it.sku || "")
)}`;
const skuLink = `#/link/?left=${encodeURIComponent(String(it.sku || ""))}`;
return `
return `
<div class="item" data-sku="${esc(it.sku)}">
<div class="itemRow">
<div class="thumbBox">
@ -176,10 +161,10 @@ export function renderSearch($app) {
<div class="itemTop">
<div class="itemName">${esc(it.name || "(no name)")}</div>
<a class="badge mono skuLink" href="${esc(
skuLink
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(it.sku)
)}</a>
skuLink,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(it.sku),
)}</a>
</div>
<div class="metaRow">
<span class="mono price">${esc(price)}</span>
@ -189,299 +174,280 @@ export function renderSearch($app) {
</div>
</div>
`;
})
.join("");
})
.join("");
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
function salePctOff(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null;
if (!(newN < oldN)) return null;
const pct = Math.round(((oldN - newN) / oldN) * 100);
return Number.isFinite(pct) && pct > 0 ? pct : null;
}
function salePctOff(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null;
if (!(newN < oldN)) return null;
const pct = Math.round(((oldN - newN) / oldN) * 100);
return Number.isFinite(pct) && pct > 0 ? pct : null;
}
function pctChange(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null;
const pct = Math.round(((newN - oldN) / oldN) * 100);
return Number.isFinite(pct) ? pct : null;
}
function pctChange(oldRaw, newRaw) {
const oldN = parsePriceToNumber(oldRaw);
const newN = parsePriceToNumber(newRaw);
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
if (!(oldN > 0)) return null;
const pct = Math.round(((newN - oldN) / oldN) * 100);
return Number.isFinite(pct) ? pct : null;
}
function tsValue(r) {
const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms;
const d = String(r?.date || "");
const ms2 = d ? Date.parse(d) : NaN;
return Number.isFinite(ms2) ? ms2 : 0;
}
function tsValue(r) {
const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms;
const d = String(r?.date || "");
const ms2 = d ? Date.parse(d) : NaN;
return Number.isFinite(ms2) ? ms2 : 0;
}
// Custom priority (unchanged)
function rankRecent(r, canonSkuFn) {
const rawSku = String(r?.sku || "");
const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku);
// Custom priority (unchanged)
function rankRecent(r, canonSkuFn) {
const rawSku = String(r?.sku || "");
const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku);
const agg = aggBySku.get(sku) || null;
const agg = aggBySku.get(sku) || null;
const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim();
const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim();
const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim();
const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim();
const normStore = (s) => String(s || "").trim().toLowerCase();
const normStore = (s) =>
String(s || "")
.trim()
.toLowerCase();
// Normalize kind
let kind = String(r?.kind || "");
if (kind === "price_change") {
const o = parsePriceToNumber(r?.oldPrice || "");
const n = parsePriceToNumber(r?.newPrice || "");
if (Number.isFinite(o) && Number.isFinite(n)) {
if (n < o) kind = "price_down";
else if (n > o) kind = "price_up";
}
}
// Normalize kind
let kind = String(r?.kind || "");
if (kind === "price_change") {
const o = parsePriceToNumber(r?.oldPrice || "");
const n = parsePriceToNumber(r?.newPrice || "");
if (Number.isFinite(o) && Number.isFinite(n)) {
if (n < o) kind = "price_down";
else if (n > o) kind = "price_up";
}
}
const pctOff =
kind === "price_down"
? salePctOff(r?.oldPrice || "", r?.newPrice || "")
: null;
const pctUp =
kind === "price_up"
? pctChange(r?.oldPrice || "", r?.newPrice || "")
: null;
const pctOff = kind === "price_down" ? salePctOff(r?.oldPrice || "", r?.newPrice || "") : null;
const pctUp = kind === "price_up" ? pctChange(r?.oldPrice || "", r?.newPrice || "") : null;
const isNew = kind === "new";
const storeCount = agg?.stores?.size || 0;
const isNewUnique = isNew && storeCount <= 1;
const isNew = kind === "new";
const storeCount = agg?.stores?.size || 0;
const isNewUnique = isNew && storeCount <= 1;
// Cheapest checks (use aggregate index)
const newPriceNum =
kind === "price_down" || kind === "price_up"
? parsePriceToNumber(r?.newPrice || "")
: null;
const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum)
? agg.cheapestPriceNum
: null;
// Cheapest checks (use aggregate index)
const newPriceNum = kind === "price_down" || kind === "price_up" ? parsePriceToNumber(r?.newPrice || "") : null;
const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum) ? agg.cheapestPriceNum : null;
const EPS = 0.01;
const priceMatchesBest =
Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum)
? Math.abs(newPriceNum - bestPriceNum) <= EPS
: false;
const EPS = 0.01;
const priceMatchesBest =
Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum)
? Math.abs(newPriceNum - bestPriceNum) <= EPS
: false;
const storeIsBest =
normStore(storeLabelRaw) &&
normStore(bestStoreRaw) &&
normStore(storeLabelRaw) === normStore(bestStoreRaw);
const storeIsBest =
normStore(storeLabelRaw) && normStore(bestStoreRaw) && normStore(storeLabelRaw) === normStore(bestStoreRaw);
const saleIsCheapestHere =
kind === "price_down" && storeIsBest && priceMatchesBest;
const saleIsTiedCheapest =
kind === "price_down" && !storeIsBest && priceMatchesBest;
const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
const saleIsCheapestHere = kind === "price_down" && storeIsBest && priceMatchesBest;
const saleIsTiedCheapest = kind === "price_down" && !storeIsBest && priceMatchesBest;
const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
// Bucketed scoring (higher = earlier)
let score = 0;
// Bucketed scoring (higher = earlier)
let score = 0;
function saleBucketScore(isCheapest, pct) {
const p = Number.isFinite(pct) ? pct : 0;
function saleBucketScore(isCheapest, pct) {
const p = Number.isFinite(pct) ? pct : 0;
if (isCheapest) {
if (p >= 20) return 9000 + p;
if (p >= 10) return 7000 + p;
if (p > 0) return 6000 + p;
return 5900;
} else {
if (p >= 20) return 4500 + p;
if (p >= 10) return 1500 + p;
if (p > 0) return 1200 + p;
return 1000;
}
}
if (isCheapest) {
if (p >= 20) return 9000 + p;
if (p >= 10) return 7000 + p;
if (p > 0) return 6000 + p;
return 5900;
} else {
if (p >= 20) return 4500 + p;
if (p >= 10) return 1500 + p;
if (p > 0) return 1200 + p;
return 1000;
}
}
if (kind === "price_down") {
score = saleBucketScore(saleIsCheapest, pctOff);
} else if (isNewUnique) {
score = 8000;
} else if (kind === "removed") {
score = 3000;
} else if (kind === "price_up") {
score = 2000 + Math.min(99, Math.max(0, pctUp || 0));
} else if (kind === "new") {
score = 1100;
} else if (kind === "restored") {
score = 5000;
} else {
score = 0;
}
if (kind === "price_down") {
score = saleBucketScore(saleIsCheapest, pctOff);
} else if (isNewUnique) {
score = 8000;
} else if (kind === "removed") {
score = 3000;
} else if (kind === "price_up") {
score = 2000 + Math.min(99, Math.max(0, pctUp || 0));
} else if (kind === "new") {
score = 1100;
} else if (kind === "restored") {
score = 5000;
} else {
score = 0;
}
let tie = 0;
if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r);
else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r);
else tie = tsValue(r);
let tie = 0;
if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r);
else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r);
else tie = tsValue(r);
return { sku, kind, pctOff, storeCount, isNewUnique, score, tie };
}
return { sku, kind, pctOff, storeCount, isNewUnique, score, tie };
}
function renderRecent(recent, canonicalSkuFn) {
const items = Array.isArray(recent?.items) ? recent.items : [];
if (!items.length) {
$results.innerHTML = `<div class="small">Type to search…</div>`;
return;
}
function renderRecent(recent, canonicalSkuFn) {
const items = Array.isArray(recent?.items) ? recent.items : [];
if (!items.length) {
$results.innerHTML = `<div class="small">Type to search…</div>`;
return;
}
const canon =
typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
const canon = typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
const nowMs = Date.now();
const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000;
const nowMs = Date.now();
const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000;
function eventMs(r) {
const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms;
function eventMs(r) {
const t = String(r?.ts || "");
const ms = t ? Date.parse(t) : NaN;
if (Number.isFinite(ms)) return ms;
const d = String(r?.date || "");
const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN;
return Number.isFinite(ms2) ? ms2 : 0;
}
const d = String(r?.date || "");
const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN;
return Number.isFinite(ms2) ? ms2 : 0;
}
const inWindow = items.filter((r) => {
const ms = eventMs(r);
return ms >= cutoffMs && ms <= nowMs;
});
const inWindow = items.filter((r) => {
const ms = eventMs(r);
return ms >= cutoffMs && ms <= nowMs;
});
if (!inWindow.length) {
$results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`;
return;
}
if (!inWindow.length) {
$results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`;
return;
}
const bySkuStore = new Map();
const bySkuStore = new Map();
for (const r of inWindow) {
const rawSku = String(r?.sku || "").trim();
if (!rawSku) continue;
for (const r of inWindow) {
const rawSku = String(r?.sku || "").trim();
if (!rawSku) continue;
const sku = String(canon(rawSku) || "").trim();
if (!sku) continue;
const sku = String(canon(rawSku) || "").trim();
if (!sku) continue;
const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store";
const ms = eventMs(r);
const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store";
const ms = eventMs(r);
let storeMap = bySkuStore.get(sku);
if (!storeMap) bySkuStore.set(sku, (storeMap = new Map()));
let storeMap = bySkuStore.get(sku);
if (!storeMap) bySkuStore.set(sku, (storeMap = new Map()));
const prev = storeMap.get(storeLabel);
if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r);
}
const prev = storeMap.get(storeLabel);
if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r);
}
const picked = [];
for (const [sku, storeMap] of bySkuStore.entries()) {
let best = null;
const picked = [];
for (const [sku, storeMap] of bySkuStore.entries()) {
let best = null;
for (const r of storeMap.values()) {
const meta = rankRecent(r, canon);
const ms = eventMs(r);
for (const r of storeMap.values()) {
const meta = rankRecent(r, canon);
const ms = eventMs(r);
if (
!best ||
meta.score > best.meta.score ||
(meta.score === best.meta.score && meta.tie > best.meta.tie) ||
(meta.score === best.meta.score &&
meta.tie === best.meta.tie &&
ms > best.ms)
) {
best = { r, meta, ms };
}
}
if (
!best ||
meta.score > best.meta.score ||
(meta.score === best.meta.score && meta.tie > best.meta.tie) ||
(meta.score === best.meta.score && meta.tie === best.meta.tie && ms > best.ms)
) {
best = { r, meta, ms };
}
}
if (best) picked.push(best);
}
if (best) picked.push(best);
}
const ranked = picked.sort((a, b) => {
if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score;
if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie;
return String(a.meta.sku || "").localeCompare(String(b.meta.sku || ""));
});
const ranked = picked.sort((a, b) => {
if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score;
if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie;
return String(a.meta.sku || "").localeCompare(String(b.meta.sku || ""));
});
const limited = ranked.slice(0, 140);
const limited = ranked.slice(0, 140);
$results.innerHTML =
`<div class="small">Recently changed (last 3 days):</div>` +
limited
.map(({ r, meta }) => {
const kindLabel =
meta.kind === "new"
? "NEW"
: meta.kind === "restored"
? "RESTORED"
: meta.kind === "removed"
? "REMOVED"
: meta.kind === "price_down"
? "PRICE ↓"
: meta.kind === "price_up"
? "PRICE ↑"
: meta.kind === "price_change"
? "PRICE"
: "CHANGE";
$results.innerHTML =
`<div class="small">Recently changed (last 3 days):</div>` +
limited
.map(({ r, meta }) => {
const kindLabel =
meta.kind === "new"
? "NEW"
: meta.kind === "restored"
? "RESTORED"
: meta.kind === "removed"
? "REMOVED"
: meta.kind === "price_down"
? "PRICE ↓"
: meta.kind === "price_up"
? "PRICE ↑"
: meta.kind === "price_change"
? "PRICE"
: "CHANGE";
const priceLine =
meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed"
? `${esc(r.price || "")}`
: `${esc(r.oldPrice || "")}${esc(r.newPrice || "")}`;
const priceLine =
meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed"
? `${esc(r.price || "")}`
: `${esc(r.oldPrice || "")}${esc(r.newPrice || "")}`;
const when = r.ts ? prettyTs(r.ts) : r.date || "";
const when = r.ts ? prettyTs(r.ts) : r.date || "";
const sku = meta.sku; // canonical SKU
const agg = aggBySku.get(sku) || null;
const img = agg?.img || "";
const sku = meta.sku; // canonical SKU
const agg = aggBySku.get(sku) || null;
const img = agg?.img || "";
const storeCount = agg?.stores?.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const storeCount = agg?.stores?.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const href = String(r.url || "").trim();
const storeBadge = href
? `<a class="badge" href="${esc(
href
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
(r.storeLabel || r.store || "") + plus
)}</a>`
: `<span class="badge">${esc(
(r.storeLabel || r.store || "") + plus
)}</span>`;
const href = String(r.url || "").trim();
const storeBadge = href
? `<a class="badge" href="${esc(
href,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
(r.storeLabel || r.store || "") + plus,
)}</a>`
: `<span class="badge">${esc((r.storeLabel || r.store || "") + plus)}</span>`;
const dateBadge = when
? `<span class="badge mono">${esc(when)}</span>`
: "";
const dateBadge = when ? `<span class="badge mono">${esc(when)}</span>` : "";
const offBadge =
meta.kind === "price_down" && meta.pctOff !== null
? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc(
meta.pctOff
)}% Off]</span>`
: "";
const offBadge =
meta.kind === "price_down" && meta.pctOff !== null
? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc(
meta.pctOff,
)}% Off]</span>`
: "";
const kindBadgeStyle =
meta.kind === "new" && meta.isNewUnique
? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"`
: "";
const kindBadgeStyle =
meta.kind === "new" && meta.isNewUnique
? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"`
: "";
const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`;
const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`;
return `
return `
<div class="item" data-sku="${esc(sku)}">
<div class="itemRow">
<div class="thumbBox">
@ -491,10 +457,10 @@ export function renderSearch($app) {
<div class="itemTop">
<div class="itemName">${esc(r.name || "(no name)")}</div>
<a class="badge mono skuLink" href="${esc(
skuLink
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(sku)
)}</a>
skuLink,
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
displaySku(sku),
)}</a>
</div>
<div class="metaRow">
<span class="badge"${kindBadgeStyle}>${esc(kindLabel)}</span>
@ -507,113 +473,103 @@ export function renderSearch($app) {
</div>
</div>
`;
})
.join("");
})
.join("");
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
sessionStorage.setItem("viz:lastRoute", location.hash);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
function applySearch() {
if (!indexReady) return;
function applySearch() {
if (!indexReady) return;
const tokens = tokenizeQuery($q.value);
if (!tokens.length) return;
const tokens = tokenizeQuery($q.value);
if (!tokens.length) return;
const matches = allAgg.filter((it) =>
matchesAllTokens(it.searchText, tokens)
);
const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens));
const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value));
if (!wantCodes.size) {
renderAggregates(matches);
return;
}
const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value));
if (!wantCodes.size) {
renderAggregates(matches);
return;
}
const seen = new Set(matches.map((it) => String(it?.sku || "")));
const extra = [];
for (const it of allAgg) {
const sku = String(it?.sku || "");
if (!sku || seen.has(sku)) continue;
const dCode = smwsDistilleryCodeFromName(it?.name || "");
if (dCode && wantCodes.has(String(dCode))) {
extra.push(it);
seen.add(sku);
}
}
const seen = new Set(matches.map((it) => String(it?.sku || "")));
const extra = [];
for (const it of allAgg) {
const sku = String(it?.sku || "");
if (!sku || seen.has(sku)) continue;
const dCode = smwsDistilleryCodeFromName(it?.name || "");
if (dCode && wantCodes.has(String(dCode))) {
extra.push(it);
seen.add(sku);
}
}
renderAggregates([...extra, ...matches]);
}
renderAggregates([...extra, ...matches]);
}
$results.innerHTML = `<div class="small">Loading index…</div>`;
$results.innerHTML = `<div class="small">Loading index…</div>`;
Promise.all([loadIndex(), loadSkuRules()])
.then(([idx, rules]) => {
const listings = Array.isArray(idx.items) ? idx.items : [];
Promise.all([loadIndex(), loadSkuRules()])
.then(([idx, rules]) => {
const listings = Array.isArray(idx.items) ? idx.items : [];
renderStoreButtons(listings);
renderStoreButtons(listings);
allAgg = aggregateBySku(listings, rules.canonicalSku);
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku);
allAgg = aggregateBySku(listings, rules.canonicalSku);
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku);
indexReady = true;
$q.focus();
indexReady = true;
$q.focus();
const tokens = tokenizeQuery($q.value);
if (tokens.length) {
applySearch();
} else {
return loadRecent().then((recent) =>
renderRecent(recent, rules.canonicalSku)
);
}
})
.catch((e) => {
$results.innerHTML = `<div class="small">Failed to load: ${esc(
e.message
)}</div>`;
});
const tokens = tokenizeQuery($q.value);
if (tokens.length) {
applySearch();
} else {
return loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku));
}
})
.catch((e) => {
$results.innerHTML = `<div class="small">Failed to load: ${esc(e.message)}</div>`;
});
$clearSearch.addEventListener("click", () => {
if ($q.value) {
$q.value = "";
saveQuery("");
}
loadSkuRules()
.then((rules) =>
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku))
)
.catch(() => {
$results.innerHTML = `<div class="small">Type to search…</div>`;
});
$q.focus();
});
$clearSearch.addEventListener("click", () => {
if ($q.value) {
$q.value = "";
saveQuery("");
}
loadSkuRules()
.then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
.catch(() => {
$results.innerHTML = `<div class="small">Type to search…</div>`;
});
$q.focus();
});
let t = null;
$q.addEventListener("input", () => {
saveQuery($q.value);
if (t) clearTimeout(t);
t = setTimeout(() => {
const tokens = tokenizeQuery($q.value);
if (!tokens.length) {
loadSkuRules()
.then((rules) =>
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku))
)
.catch(() => {
$results.innerHTML = `<div class="small">Type to search…</div>`;
});
return;
}
applySearch();
}, 50);
});
let t = null;
$q.addEventListener("input", () => {
saveQuery($q.value);
if (t) clearTimeout(t);
t = setTimeout(() => {
const tokens = tokenizeQuery($q.value);
if (!tokens.length) {
loadSkuRules()
.then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
.catch(() => {
$results.innerHTML = `<div class="small">Type to search…</div>`;
});
return;
}
applySearch();
}, 50);
});
}

View file

@ -1,60 +1,59 @@
export function parsePriceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
export function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
}
export function makeSyntheticSku(r) {
const store = String(r?.storeLabel || r?.store || "store");
const url = String(r?.url || "");
const key = `${store}|${url}`;
return `u:${fnv1a32(key)}`;
}
export function keySkuForRow(r) {
const real0 = String(r?.sku || "").trim();
if (real0) {
const m = real0.match(/^id:(\d{1,6})$/i);
return m ? String(m[1]).padStart(6, "0") : real0;
}
return makeSyntheticSku(r);
}
export function displaySku(key) {
const s = String(key || "");
return s.startsWith("u:") ? "unknown" : s;
}
export function isUnknownSkuKey(key) {
return String(key || "").startsWith("u:");
}
// Normalize for search: lowercase, punctuation -> space, collapse spaces
export function normSearchText(s) {
return String(s ?? "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.replace(/\s+/g, " ")
.trim();
}
export function tokenizeQuery(q) {
const n = normSearchText(q);
return n ? n.split(" ").filter(Boolean) : [];
}
export function matchesAllTokens(hayNorm, tokens) {
if (!tokens.length) return true;
for (const t of tokens) if (!hayNorm.includes(t)) return false;
return true;
}
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
export function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return (h >>> 0).toString(16).padStart(8, "0");
}
export function makeSyntheticSku(r) {
const store = String(r?.storeLabel || r?.store || "store");
const url = String(r?.url || "");
const key = `${store}|${url}`;
return `u:${fnv1a32(key)}`;
}
export function keySkuForRow(r) {
const real0 = String(r?.sku || "").trim();
if (real0) {
const m = real0.match(/^id:(\d{1,6})$/i);
return m ? String(m[1]).padStart(6, "0") : real0;
}
return makeSyntheticSku(r);
}
export function displaySku(key) {
const s = String(key || "");
return s.startsWith("u:") ? "unknown" : s;
}
export function isUnknownSkuKey(key) {
return String(key || "").startsWith("u:");
}
// Normalize for search: lowercase, punctuation -> space, collapse spaces
export function normSearchText(s) {
return String(s ?? "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.replace(/\s+/g, " ")
.trim();
}
export function tokenizeQuery(q) {
const n = normSearchText(q);
return n ? n.split(" ").filter(Boolean) : [];
}
export function matchesAllTokens(hayNorm, tokens) {
if (!tokens.length) return true;
for (const t of tokens) if (!hayNorm.includes(t)) return false;
return true;
}

View file

@ -2,196 +2,196 @@
import { normSearchText } from "./sku.js";
const DISTILLERIES = [
{ code: "1", name: "Glenfarclas" },
{ code: "2", name: "Glenlivet" },
{ code: "3", name: "Bowmore" },
{ code: "4", name: "Highland Park" },
{ code: "5", name: "Auchentoshan" },
{ code: "6", name: "Macduff" },
{ code: "7", name: "Longmorn" },
{ code: "8", name: "Tamdhu" },
{ code: "9", name: "Glen Grant" },
{ code: "10", name: "Bunnahabhain" },
{ code: "11", name: "Tomatin" },
{ code: "12", name: "BenRiach" },
{ code: "13", name: "Dalmore" },
{ code: "14", name: "Talisker" },
{ code: "15", name: "Glenfiddich" },
{ code: "16", name: "Glenturret" },
{ code: "17", name: "Scapa" },
{ code: "18", name: "Inchgower" },
{ code: "19", name: "Glen Garioch" },
{ code: "20", name: "Inverleven" },
{ code: "21", name: "Glenglassaugh" },
{ code: "22", name: "Glenkinchie" },
{ code: "23", name: "Bruichladdich" },
{ code: "24", name: "Macallan" },
{ code: "25", name: "Rosebank" },
{ code: "26", name: "Clynelish" },
{ code: "27", name: "Springbank" },
{ code: "28", name: "Tullibardine" },
{ code: "29", name: "Laphroaig" },
{ code: "30", name: "Glenrothes" },
{ code: "31", name: "Isle of Jura" },
{ code: "32", name: "Edradour" },
{ code: "33", name: "Ardbeg" },
{ code: "34", name: "Tamnavulin" },
{ code: "35", name: "Glen Moray" },
{ code: "36", name: "Benrinnes" },
{ code: "37", name: "Cragganmore" },
{ code: "38", name: "Caperdonich" },
{ code: "39", name: "Linkwood" },
{ code: "40", name: "Balvenie" },
{ code: "41", name: "Dailuaine" },
{ code: "42", name: "Tobermory" },
{ code: "43", name: "Port Ellen" },
{ code: "44", name: "Craigellachie" },
{ code: "45", name: "Dallas Dhu" },
{ code: "46", name: "Glenlossie" },
{ code: "47", name: "Benromach" },
{ code: "48", name: "Balmenach" },
{ code: "49", name: "St. Magdalene" },
{ code: "50", name: "Bladnoch" },
{ code: "51", name: "Bushmills" },
{ code: "52", name: "Old Pulteney" },
{ code: "53", name: "Caol Ila" },
{ code: "54", name: "Aberlour" },
{ code: "55", name: "Royal Brackla" },
{ code: "56", name: "Coleburn" },
{ code: "57", name: "Glen Mhor" },
{ code: "58", name: "Strathisla" },
{ code: "59", name: "Teaninich" },
{ code: "60", name: "Aberfeldy" },
{ code: "61", name: "Brora" },
{ code: "62", name: "Glenlochy" },
{ code: "63", name: "Glentauchers" },
{ code: "64", name: "Mannochmore" },
{ code: "65", name: "Imperial" },
{ code: "66", name: "Ardmore" },
{ code: "67", name: "Banff" },
{ code: "68", name: "Blair Athol" },
{ code: "69", name: "Glen Albyn" },
{ code: "70", name: "Balblair" },
{ code: "71", name: "Glenburgie" },
{ code: "72", name: "Miltonduff" },
{ code: "73", name: "Aultmore" },
{ code: "74", name: "North Port" },
{ code: "75", name: "Glenury / Glenury Royal" },
{ code: "76", name: "Mortlach" },
{ code: "77", name: "Glen Ord" },
{ code: "78", name: "Ben Nevis" },
{ code: "79", name: "Deanston" },
{ code: "80", name: "Glen Spey" },
{ code: "81", name: "Glen Keith" },
{ code: "82", name: "Glencadam" },
{ code: "83", name: "Convalmore" },
{ code: "84", name: "Glendullan" },
{ code: "85", name: "Glen Elgin" },
{ code: "86", name: "Glenesk" },
{ code: "87", name: "Millburn" },
{ code: "88", name: "Speyburn" },
{ code: "89", name: "Tomintoul" },
{ code: "90", name: "Pittyvaich" },
{ code: "91", name: "Dufftown" },
{ code: "92", name: "Lochside" },
{ code: "93", name: "Glen Scotia" },
{ code: "94", name: "Fettercairn" },
{ code: "95", name: "Auchroisk" },
{ code: "96", name: "GlenDronach" },
{ code: "97", name: "Littlemill" },
{ code: "98", name: "Inverleven" },
{ code: "99", name: "Glenugie" },
{ code: "100", name: "Strathmill" },
{ code: "101", name: "Knockando" },
{ code: "102", name: "Dalwhinnie" },
{ code: "103", name: "Royal Lochnagar" },
{ code: "104", name: "Glenburgie (Glencraig)" },
{ code: "105", name: "Tormore" },
{ code: "106", name: "Cardhu" },
{ code: "107", name: "Glenallachie" },
{ code: "108", name: "Allt-a-Bhainne" },
{ code: "109", name: "Miltonduff (Mosstowie)" },
{ code: "110", name: "Oban" },
{ code: "111", name: "Lagavulin" },
{ code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" },
{ code: "113", name: "Braeval (Braes of Glenlivet)" },
{ code: "114", name: "Springbank (Longrow)" },
{ code: "115", name: "Knockdhu (AnCnoc)" },
{ code: "116", name: "Yoichi" },
{ code: "117", name: "Cooley (Unpeated)" },
{ code: "118", name: "Cooley / Connemara (Peated)" },
{ code: "119", name: "Yamazaki" },
{ code: "120", name: "Hakushu" },
{ code: "121", name: "Isle of Arran" },
{ code: "122", name: "Loch Lomond (Croftengea)" },
{ code: "123", name: "Glengoyne" },
{ code: "124", name: "Miyagikyo" },
{ code: "125", name: "Glenmorangie" },
{ code: "126", name: "Springbank (Hazelburn)" },
{ code: "127", name: "Bruichladdich (Port Charlotte)" },
{ code: "128", name: "Penderyn" },
{ code: "129", name: "Kilchoman" },
{ code: "130", name: "Chichibu" },
{ code: "131", name: "Hanyu" },
{ code: "132", name: "Karuizawa" },
{ code: "133", name: "Westland" },
{ code: "134", name: "Paul John" },
{ code: "135", name: "Loch Lomond" },
{ code: "136", name: "Eden Mill" },
{ code: "137", name: "St. Georges (The English Whisky Co.)" },
{ code: "138", name: "Nantou" },
{ code: "139", name: "Kavalan" },
{ code: "140", name: "Balcones" },
{ code: "141", name: "Fary Lochan" },
{ code: "142", name: "Breuckelen Distilling" },
{ code: "143", name: "Copperworks Distilling Co." },
{ code: "144", name: "High Coast Distillery" },
{ code: "145", name: "Smögen Whisky" },
{ code: "146", name: "Cotswolds" },
{ code: "147", name: "Archie Rose" },
{ code: "148", name: "Starward" },
{ code: "149", name: "Ardnamurchan" },
{ code: "150", name: "West Cork Distillers" },
{ code: "151", name: "Mackmyra" },
{ code: "152", name: "Shelter Point" },
{ code: "153", name: "Thy Whisky" },
{ code: "154", name: "Mosgaard Whisky" },
{ code: "155", name: "Milk & Honey Distillery" },
{ code: "156", name: "Glasgow Distillery" },
{ code: "157", name: "Distillerie de Warenghem" },
{ code: "158", name: "Yuza Distillery" },
{ code: "159", name: "Mars Shinshu" },
{ code: "160", name: "Mars Tsunuki" },
{ code: "161", name: "Nc'nean Distillery" },
{ code: "162", name: "Isle of Raasay" },
{ code: "163", name: "Isle of Harris Distillery" },
{ code: "164", name: "Penderyn" },
{ code: "165", name: "Wolfburn" },
{ code: "1", name: "Glenfarclas" },
{ code: "2", name: "Glenlivet" },
{ code: "3", name: "Bowmore" },
{ code: "4", name: "Highland Park" },
{ code: "5", name: "Auchentoshan" },
{ code: "6", name: "Macduff" },
{ code: "7", name: "Longmorn" },
{ code: "8", name: "Tamdhu" },
{ code: "9", name: "Glen Grant" },
{ code: "10", name: "Bunnahabhain" },
{ code: "11", name: "Tomatin" },
{ code: "12", name: "BenRiach" },
{ code: "13", name: "Dalmore" },
{ code: "14", name: "Talisker" },
{ code: "15", name: "Glenfiddich" },
{ code: "16", name: "Glenturret" },
{ code: "17", name: "Scapa" },
{ code: "18", name: "Inchgower" },
{ code: "19", name: "Glen Garioch" },
{ code: "20", name: "Inverleven" },
{ code: "21", name: "Glenglassaugh" },
{ code: "22", name: "Glenkinchie" },
{ code: "23", name: "Bruichladdich" },
{ code: "24", name: "Macallan" },
{ code: "25", name: "Rosebank" },
{ code: "26", name: "Clynelish" },
{ code: "27", name: "Springbank" },
{ code: "28", name: "Tullibardine" },
{ code: "29", name: "Laphroaig" },
{ code: "30", name: "Glenrothes" },
{ code: "31", name: "Isle of Jura" },
{ code: "32", name: "Edradour" },
{ code: "33", name: "Ardbeg" },
{ code: "34", name: "Tamnavulin" },
{ code: "35", name: "Glen Moray" },
{ code: "36", name: "Benrinnes" },
{ code: "37", name: "Cragganmore" },
{ code: "38", name: "Caperdonich" },
{ code: "39", name: "Linkwood" },
{ code: "40", name: "Balvenie" },
{ code: "41", name: "Dailuaine" },
{ code: "42", name: "Tobermory" },
{ code: "43", name: "Port Ellen" },
{ code: "44", name: "Craigellachie" },
{ code: "45", name: "Dallas Dhu" },
{ code: "46", name: "Glenlossie" },
{ code: "47", name: "Benromach" },
{ code: "48", name: "Balmenach" },
{ code: "49", name: "St. Magdalene" },
{ code: "50", name: "Bladnoch" },
{ code: "51", name: "Bushmills" },
{ code: "52", name: "Old Pulteney" },
{ code: "53", name: "Caol Ila" },
{ code: "54", name: "Aberlour" },
{ code: "55", name: "Royal Brackla" },
{ code: "56", name: "Coleburn" },
{ code: "57", name: "Glen Mhor" },
{ code: "58", name: "Strathisla" },
{ code: "59", name: "Teaninich" },
{ code: "60", name: "Aberfeldy" },
{ code: "61", name: "Brora" },
{ code: "62", name: "Glenlochy" },
{ code: "63", name: "Glentauchers" },
{ code: "64", name: "Mannochmore" },
{ code: "65", name: "Imperial" },
{ code: "66", name: "Ardmore" },
{ code: "67", name: "Banff" },
{ code: "68", name: "Blair Athol" },
{ code: "69", name: "Glen Albyn" },
{ code: "70", name: "Balblair" },
{ code: "71", name: "Glenburgie" },
{ code: "72", name: "Miltonduff" },
{ code: "73", name: "Aultmore" },
{ code: "74", name: "North Port" },
{ code: "75", name: "Glenury / Glenury Royal" },
{ code: "76", name: "Mortlach" },
{ code: "77", name: "Glen Ord" },
{ code: "78", name: "Ben Nevis" },
{ code: "79", name: "Deanston" },
{ code: "80", name: "Glen Spey" },
{ code: "81", name: "Glen Keith" },
{ code: "82", name: "Glencadam" },
{ code: "83", name: "Convalmore" },
{ code: "84", name: "Glendullan" },
{ code: "85", name: "Glen Elgin" },
{ code: "86", name: "Glenesk" },
{ code: "87", name: "Millburn" },
{ code: "88", name: "Speyburn" },
{ code: "89", name: "Tomintoul" },
{ code: "90", name: "Pittyvaich" },
{ code: "91", name: "Dufftown" },
{ code: "92", name: "Lochside" },
{ code: "93", name: "Glen Scotia" },
{ code: "94", name: "Fettercairn" },
{ code: "95", name: "Auchroisk" },
{ code: "96", name: "GlenDronach" },
{ code: "97", name: "Littlemill" },
{ code: "98", name: "Inverleven" },
{ code: "99", name: "Glenugie" },
{ code: "100", name: "Strathmill" },
{ code: "101", name: "Knockando" },
{ code: "102", name: "Dalwhinnie" },
{ code: "103", name: "Royal Lochnagar" },
{ code: "104", name: "Glenburgie (Glencraig)" },
{ code: "105", name: "Tormore" },
{ code: "106", name: "Cardhu" },
{ code: "107", name: "Glenallachie" },
{ code: "108", name: "Allt-a-Bhainne" },
{ code: "109", name: "Miltonduff (Mosstowie)" },
{ code: "110", name: "Oban" },
{ code: "111", name: "Lagavulin" },
{ code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" },
{ code: "113", name: "Braeval (Braes of Glenlivet)" },
{ code: "114", name: "Springbank (Longrow)" },
{ code: "115", name: "Knockdhu (AnCnoc)" },
{ code: "116", name: "Yoichi" },
{ code: "117", name: "Cooley (Unpeated)" },
{ code: "118", name: "Cooley / Connemara (Peated)" },
{ code: "119", name: "Yamazaki" },
{ code: "120", name: "Hakushu" },
{ code: "121", name: "Isle of Arran" },
{ code: "122", name: "Loch Lomond (Croftengea)" },
{ code: "123", name: "Glengoyne" },
{ code: "124", name: "Miyagikyo" },
{ code: "125", name: "Glenmorangie" },
{ code: "126", name: "Springbank (Hazelburn)" },
{ code: "127", name: "Bruichladdich (Port Charlotte)" },
{ code: "128", name: "Penderyn" },
{ code: "129", name: "Kilchoman" },
{ code: "130", name: "Chichibu" },
{ code: "131", name: "Hanyu" },
{ code: "132", name: "Karuizawa" },
{ code: "133", name: "Westland" },
{ code: "134", name: "Paul John" },
{ code: "135", name: "Loch Lomond" },
{ code: "136", name: "Eden Mill" },
{ code: "137", name: "St. Georges (The English Whisky Co.)" },
{ code: "138", name: "Nantou" },
{ code: "139", name: "Kavalan" },
{ code: "140", name: "Balcones" },
{ code: "141", name: "Fary Lochan" },
{ code: "142", name: "Breuckelen Distilling" },
{ code: "143", name: "Copperworks Distilling Co." },
{ code: "144", name: "High Coast Distillery" },
{ code: "145", name: "Smögen Whisky" },
{ code: "146", name: "Cotswolds" },
{ code: "147", name: "Archie Rose" },
{ code: "148", name: "Starward" },
{ code: "149", name: "Ardnamurchan" },
{ code: "150", name: "West Cork Distillers" },
{ code: "151", name: "Mackmyra" },
{ code: "152", name: "Shelter Point" },
{ code: "153", name: "Thy Whisky" },
{ code: "154", name: "Mosgaard Whisky" },
{ code: "155", name: "Milk & Honey Distillery" },
{ code: "156", name: "Glasgow Distillery" },
{ code: "157", name: "Distillerie de Warenghem" },
{ code: "158", name: "Yuza Distillery" },
{ code: "159", name: "Mars Shinshu" },
{ code: "160", name: "Mars Tsunuki" },
{ code: "161", name: "Nc'nean Distillery" },
{ code: "162", name: "Isle of Raasay" },
{ code: "163", name: "Isle of Harris Distillery" },
{ code: "164", name: "Penderyn" },
{ code: "165", name: "Wolfburn" },
];
const DIST = DISTILLERIES.map((d) => ({
code: String(d.code),
nameNorm: normSearchText(d.name),
code: String(d.code),
nameNorm: normSearchText(d.name),
}));
const SMWS_WORD_RE = /\bsmws\b/i;
const SMWS_CODE_RE = /\b(\d{1,3})\.(\d{1,4})\b/;
export function smwsDistilleryCodesForQueryPrefix(qRaw) {
const q = normSearchText(qRaw);
if (!q || q.length < 2) return [];
const q = normSearchText(qRaw);
if (!q || q.length < 2) return [];
const out = new Set();
for (const d of DIST) {
if (d.nameNorm.startsWith(q)) out.add(d.code);
}
return Array.from(out);
const out = new Set();
for (const d of DIST) {
if (d.nameNorm.startsWith(q)) out.add(d.code);
}
return Array.from(out);
}
// If a listing name contains "SMWS" and an SMWS code like 35.123, returns "35" (distillery code).
export function smwsDistilleryCodeFromName(name) {
const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE);
return m ? String(m[1] || "") : "";
const s = String(name || "");
if (!SMWS_WORD_RE.test(s)) return "";
const m = s.match(SMWS_CODE_RE);
return m ? String(m[1] || "") : "";
}

View file

@ -4,34 +4,34 @@ let INDEX = null;
let RECENT = null;
export async function loadIndex() {
if (INDEX) return INDEX;
INDEX = await fetchJson("./data/index.json");
return INDEX;
if (INDEX) return INDEX;
INDEX = await fetchJson("./data/index.json");
return INDEX;
}
export async function loadRecent() {
if (RECENT) return RECENT;
try {
RECENT = await fetchJson("./data/recent.json");
} catch {
RECENT = { count: 0, items: [] };
}
return RECENT;
if (RECENT) return RECENT;
try {
RECENT = await fetchJson("./data/recent.json");
} catch {
RECENT = { count: 0, items: [] };
}
return RECENT;
}
// persist search box value across navigation
const Q_LS_KEY = "stviz:v1:search:q";
export function loadSavedQuery() {
try {
return localStorage.getItem(Q_LS_KEY) || "";
} catch {
return "";
}
try {
return localStorage.getItem(Q_LS_KEY) || "";
} catch {
return "";
}
}
export function saveQuery(v) {
try {
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
} catch {}
try {
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
} catch {}
}

File diff suppressed because it is too large Load diff

View file

@ -1,184 +1,213 @@
function normalizeId(s) {
return String(s || "").toLowerCase().replace(/[^a-z0-9]+/g, "");
return String(s || "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, "");
}
// Map normalized store *labels* to canonical ids used by OVERRIDES
const ALIASES = {
strathliquor: "strath",
vesselliquor: "vessel",
tudorhouse: "tudor",
coopworldofwhisky: "coop",
strathliquor: "strath",
vesselliquor: "vessel",
tudorhouse: "tudor",
coopworldofwhisky: "coop",
kensingtonwinemarket: "kensingtonwinemarket",
gullliquor: "gullliquor",
legacyliquor: "legacyliquor",
vintagespirits: "vintagespirits",
kegncork: "kegncork",
kensingtonwinemarket: "kensingtonwinemarket",
gullliquor: "gullliquor",
legacyliquor: "legacyliquor",
vintagespirits: "vintagespirits",
kegncork: "kegncork",
// short forms
gull: "gullliquor",
legacy: "legacyliquor",
vintage: "vintagespirits",
kwm: "kensingtonwinemarket",
// short forms
gull: "gullliquor",
legacy: "legacyliquor",
vintage: "vintagespirits",
kwm: "kensingtonwinemarket",
};
// Your pinned colors
const OVERRIDES = {
strath: "#76B7FF",
bsw: "#E9DF7A",
kensingtonwinemarket: "#F2C200",
vessel: "#FFFFFF",
gullliquor: "#6B0F1A",
kegncork: "#111111",
legacyliquor: "#7B4A12",
vintagespirits: "#E34A2C",
strath: "#76B7FF",
bsw: "#E9DF7A",
kensingtonwinemarket: "#F2C200",
vessel: "#FFFFFF",
gullliquor: "#6B0F1A",
kegncork: "#111111",
legacyliquor: "#7B4A12",
vintagespirits: "#E34A2C",
craftcellars: "#E31B23",
maltsandgrains: "#A67C52",
craftcellars: "#E31B23",
maltsandgrains: "#A67C52",
// aliases
gull: "#6B0F1A",
legacy: "#7B4A12",
vintage: "#E34A2C",
kwm: "#F2C200",
// aliases
gull: "#6B0F1A",
legacy: "#7B4A12",
vintage: "#E34A2C",
kwm: "#F2C200",
};
// High-contrast qualitative palette
const PALETTE = [
"#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD",
"#8C564B", "#E377C2", "#7F7F7F", "#17BECF", "#BCBD22",
"#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5",
"#C49C94", "#F7B6D2", "#C7C7C7", "#9EDAE5", "#DBDB8D",
"#393B79", "#637939", "#8C6D31", "#843C39", "#7B4173",
"#3182BD", "#31A354", "#756BB1", "#636363", "#E6550D",
"#1F77B4",
"#FF7F0E",
"#2CA02C",
"#D62728",
"#9467BD",
"#8C564B",
"#E377C2",
"#7F7F7F",
"#17BECF",
"#BCBD22",
"#AEC7E8",
"#FFBB78",
"#98DF8A",
"#FF9896",
"#C5B0D5",
"#C49C94",
"#F7B6D2",
"#C7C7C7",
"#9EDAE5",
"#DBDB8D",
"#393B79",
"#637939",
"#8C6D31",
"#843C39",
"#7B4173",
"#3182BD",
"#31A354",
"#756BB1",
"#636363",
"#E6550D",
];
function uniq(arr) {
return [...new Set(arr)];
return [...new Set(arr)];
}
function canonicalId(s) {
const id = normalizeId(s);
return ALIASES[id] || id;
const id = normalizeId(s);
return ALIASES[id] || id;
}
function buildUniverse(base, extra) {
const a = Array.isArray(base) ? base : [];
const b = Array.isArray(extra) ? extra : [];
return uniq([...a, ...b].map(canonicalId).filter(Boolean));
const a = Array.isArray(base) ? base : [];
const b = Array.isArray(extra) ? extra : [];
return uniq([...a, ...b].map(canonicalId).filter(Boolean));
}
// Keep mapping stable even if page sees a subset
const DEFAULT_UNIVERSE = buildUniverse(Object.keys(OVERRIDES), [
"bcl",
"bsw",
"coop",
"craftcellars",
"gullliquor",
"gull",
"kegncork",
"kwm",
"kensingtonwinemarket",
"legacy",
"legacyliquor",
"maltsandgrains",
"sierrasprings",
"strath",
"tudor",
"vessel",
"vintage",
"vintagespirits",
"willowpark",
"arc"
"bcl",
"bsw",
"coop",
"craftcellars",
"gullliquor",
"gull",
"kegncork",
"kwm",
"kensingtonwinemarket",
"legacy",
"legacyliquor",
"maltsandgrains",
"sierrasprings",
"strath",
"tudor",
"vessel",
"vintage",
"vintagespirits",
"willowpark",
"arc",
]);
function isWhiteHex(c) {
return String(c || "").trim().toUpperCase() === "#FFFFFF";
return (
String(c || "")
.trim()
.toUpperCase() === "#FFFFFF"
);
}
export function buildStoreColorMap(extraUniverse = []) {
const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort();
const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort();
const used = new Set();
const map = new Map();
const used = new Set();
const map = new Map();
// Pin overrides first
for (const id of universe) {
const c = OVERRIDES[id];
if (c) {
map.set(id, c);
used.add(String(c).toUpperCase());
}
}
// Pin overrides first
for (const id of universe) {
const c = OVERRIDES[id];
if (c) {
map.set(id, c);
used.add(String(c).toUpperCase());
}
}
// Filter palette to avoid collisions and keep white/black reserved
const palette = PALETTE
.map((c) => String(c).toUpperCase())
.filter((c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111");
// Filter palette to avoid collisions and keep white/black reserved
const palette = PALETTE.map((c) => String(c).toUpperCase()).filter(
(c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111",
);
let pi = 0;
for (const id of universe) {
if (map.has(id)) continue;
if (pi >= palette.length) pi = 0;
const c = palette[pi++];
map.set(id, c);
used.add(c);
}
let pi = 0;
for (const id of universe) {
if (map.has(id)) continue;
if (pi >= palette.length) pi = 0;
const c = palette[pi++];
map.set(id, c);
used.add(c);
}
return map;
return map;
}
export function storeColor(storeKeyOrLabel, colorMap) {
const id = canonicalId(storeKeyOrLabel);
if (!id) return "#7F7F7F";
const id = canonicalId(storeKeyOrLabel);
if (!id) return "#7F7F7F";
const forced = OVERRIDES[id];
if (forced) return forced;
const forced = OVERRIDES[id];
if (forced) return forced;
if (colorMap && typeof colorMap.get === "function") {
const c = colorMap.get(id);
if (c) return c;
}
if (colorMap && typeof colorMap.get === "function") {
const c = colorMap.get(id);
if (c) return c;
}
return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length];
return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length];
}
export function datasetStrokeWidth(color) {
return isWhiteHex(color) ? 2.5 : 1.5;
return isWhiteHex(color) ? 2.5 : 1.5;
}
export function datasetPointRadius(color) {
return isWhiteHex(color) ? 2.8 : 2.2;
return isWhiteHex(color) ? 2.8 : 2.2;
}
function clamp(v, lo, hi) {
return Math.max(lo, Math.min(hi, v));
return Math.max(lo, Math.min(hi, v));
}
function hexToRgb(hex) {
const m = String(hex).replace("#", "");
if (m.length !== 6) return null;
const n = parseInt(m, 16);
return {
r: (n >> 16) & 255,
g: (n >> 8) & 255,
b: n & 255,
};
const m = String(hex).replace("#", "");
if (m.length !== 6) return null;
const n = parseInt(m, 16);
return {
r: (n >> 16) & 255,
g: (n >> 8) & 255,
b: n & 255,
};
}
function rgbToHex({ r, g, b }) {
const h = (x) =>
clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0");
return `#${h(r)}${h(g)}${h(b)}`;
const h = (x) => clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0");
return `#${h(r)}${h(g)}${h(b)}`;
}
// Lighten by mixing with white (01)
export function lighten(hex, amount = 0.25) {
const rgb = hexToRgb(hex);
if (!rgb) return hex;
return rgbToHex({
r: rgb.r + (255 - rgb.r) * amount,
g: rgb.g + (255 - rgb.g) * amount,
b: rgb.b + (255 - rgb.b) * amount,
});
const rgb = hexToRgb(hex);
if (!rgb) return hex;
return rgbToHex({
r: rgb.r + (255 - rgb.r) * amount,
g: rgb.g + (255 - rgb.g) * amount,
b: rgb.b + (255 - rgb.b) * amount,
});
}

File diff suppressed because it is too large Load diff

View file

@ -9,151 +9,151 @@ const root = path.resolve(__dirname); // viz/
const projectRoot = path.resolve(__dirname, ".."); // repo root
const MIME = {
".html": "text/html; charset=utf-8",
".js": "application/javascript; charset=utf-8",
".css": "text/css; charset=utf-8",
".json": "application/json; charset=utf-8",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".svg": "image/svg+xml",
".html": "text/html; charset=utf-8",
".js": "application/javascript; charset=utf-8",
".css": "text/css; charset=utf-8",
".json": "application/json; charset=utf-8",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".svg": "image/svg+xml",
};
function safePath(urlPath) {
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
const joined = path.join(root, p);
const norm = path.normalize(joined);
if (!norm.startsWith(root)) return null;
return norm;
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
const joined = path.join(root, p);
const norm = path.normalize(joined);
if (!norm.startsWith(root)) return null;
return norm;
}
// Project-level file (shared by viz + report tooling)
const LINKS_FILE = path.join(projectRoot, "data", "sku_links.json");
function readMeta() {
try {
const raw = fs.readFileSync(LINKS_FILE, "utf8");
const obj = JSON.parse(raw);
try {
const raw = fs.readFileSync(LINKS_FILE, "utf8");
const obj = JSON.parse(raw);
const links = obj && Array.isArray(obj.links) ? obj.links : [];
const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : [];
const links = obj && Array.isArray(obj.links) ? obj.links : [];
const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : [];
return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores };
} catch {}
return { generatedAt: new Date().toISOString(), links: [], ignores: [] };
return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores };
} catch {}
return { generatedAt: new Date().toISOString(), links: [], ignores: [] };
}
function writeMeta(obj) {
obj.generatedAt = new Date().toISOString();
fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true });
fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8");
obj.generatedAt = new Date().toISOString();
fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true });
fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8");
}
function send(res, code, body, headers) {
res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) });
res.end(body);
res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) });
res.end(body);
}
function sendJson(res, code, obj) {
res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" });
res.end(JSON.stringify(obj));
res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" });
res.end(JSON.stringify(obj));
}
const server = http.createServer((req, res) => {
const u = req.url || "/";
const url = new URL(u, "http://127.0.0.1");
const u = req.url || "/";
const url = new URL(u, "http://127.0.0.1");
// Local API: read/write sku links + ignore pairs on disk (only exists when using this local server)
// Local API: read/write sku links + ignore pairs on disk (only exists when using this local server)
if (url.pathname === "/__stviz/sku-links") {
if (req.method === "GET") {
const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores });
}
if (url.pathname === "/__stviz/sku-links") {
if (req.method === "GET") {
const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores });
}
if (req.method === "POST") {
let body = "";
req.on("data", (c) => (body += c));
req.on("end", () => {
try {
const inp = JSON.parse(body || "{}");
const fromSku = String(inp.fromSku || "").trim();
const toSku = String(inp.toSku || "").trim();
if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" });
if (req.method === "POST") {
let body = "";
req.on("data", (c) => (body += c));
req.on("end", () => {
try {
const inp = JSON.parse(body || "{}");
const fromSku = String(inp.fromSku || "").trim();
const toSku = String(inp.toSku || "").trim();
if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" });
const obj = readMeta();
obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() });
writeMeta(obj);
const obj = readMeta();
obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() });
writeMeta(obj);
return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" });
} catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
}
});
return;
}
return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" });
} catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
}
});
return;
}
return send(res, 405, "Method Not Allowed");
}
return send(res, 405, "Method Not Allowed");
}
if (url.pathname === "/__stviz/sku-ignores") {
if (req.method === "GET") {
const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores });
}
if (url.pathname === "/__stviz/sku-ignores") {
if (req.method === "GET") {
const obj = readMeta();
return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores });
}
if (req.method === "POST") {
let body = "";
req.on("data", (c) => (body += c));
req.on("end", () => {
try {
const inp = JSON.parse(body || "{}");
const skuA = String(inp.skuA || "").trim();
const skuB = String(inp.skuB || "").trim();
if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" });
if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" });
if (req.method === "POST") {
let body = "";
req.on("data", (c) => (body += c));
req.on("end", () => {
try {
const inp = JSON.parse(body || "{}");
const skuA = String(inp.skuA || "").trim();
const skuB = String(inp.skuB || "").trim();
if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" });
if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" });
const obj = readMeta();
obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() });
writeMeta(obj);
const obj = readMeta();
obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() });
writeMeta(obj);
return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" });
} catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
}
});
return;
}
return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" });
} catch (e) {
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
}
});
return;
}
return send(res, 405, "Method Not Allowed");
}
return send(res, 405, "Method Not Allowed");
}
// Static
let file = safePath(u === "/" ? "/index.html" : u);
if (!file) {
res.writeHead(400);
res.end("Bad path");
return;
}
// Static
let file = safePath(u === "/" ? "/index.html" : u);
if (!file) {
res.writeHead(400);
res.end("Bad path");
return;
}
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
file = path.join(file, "index.html");
}
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
file = path.join(file, "index.html");
}
fs.readFile(file, (err, buf) => {
if (err) {
res.writeHead(404);
res.end("Not found");
return;
}
const ext = path.extname(file);
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
res.end(buf);
});
fs.readFile(file, (err, buf) => {
if (err) {
res.writeHead(404);
res.end("Not found");
return;
}
const ext = path.extname(file);
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
res.end(buf);
});
});
const port = Number(process.env.PORT || 8080);
server.listen(port, "127.0.0.1", () => {
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
process.stdout.write(`SKU links file: ${LINKS_FILE}\n`);
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
process.stdout.write(`SKU links file: ${LINKS_FILE}\n`);
});