mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-03-25 09:25:51 +00:00
UX Improvements
This commit is contained in:
parent
e9f8f805c5
commit
7a33d51c90
73 changed files with 13094 additions and 13094 deletions
|
|
@ -4,7 +4,7 @@
|
||||||
const { main } = require("../src/main");
|
const { main } = require("../src/main");
|
||||||
|
|
||||||
main().catch((e) => {
|
main().catch((e) => {
|
||||||
const msg = e && e.stack ? e.stack : String(e);
|
const msg = e && e.stack ? e.stack : String(e);
|
||||||
console.error(msg);
|
console.error(msg);
|
||||||
process.exitCode = 1;
|
process.exitCode = 1;
|
||||||
});
|
});
|
||||||
|
|
|
||||||
474
src/core/http.js
474
src/core/http.js
|
|
@ -7,327 +7,327 @@ const { setTimeout: setTimeoutCb, clearTimeout } = require("timers");
|
||||||
/* ---------------- Errors ---------------- */
|
/* ---------------- Errors ---------------- */
|
||||||
|
|
||||||
class RetryableError extends Error {
|
class RetryableError extends Error {
|
||||||
constructor(msg) {
|
constructor(msg) {
|
||||||
super(msg);
|
super(msg);
|
||||||
this.name = "RetryableError";
|
this.name = "RetryableError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isRetryable(e) {
|
function isRetryable(e) {
|
||||||
if (!e) return false;
|
if (!e) return false;
|
||||||
if (e.name === "AbortError") return true;
|
if (e.name === "AbortError") return true;
|
||||||
if (e instanceof RetryableError) return true;
|
if (e instanceof RetryableError) return true;
|
||||||
const msg = String(e.message || e);
|
const msg = String(e.message || e);
|
||||||
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
|
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Backoff ---------------- */
|
/* ---------------- Backoff ---------------- */
|
||||||
|
|
||||||
function backoffMs(attempt) {
|
function backoffMs(attempt) {
|
||||||
const base = Math.min(12000, 500 * Math.pow(2, attempt));
|
const base = Math.min(12000, 500 * Math.pow(2, attempt));
|
||||||
const jitter = Math.floor(Math.random() * 400);
|
const jitter = Math.floor(Math.random() * 400);
|
||||||
return base + jitter;
|
return base + jitter;
|
||||||
}
|
}
|
||||||
|
|
||||||
function retryAfterMs(res) {
|
function retryAfterMs(res) {
|
||||||
const ra = res?.headers?.get ? res.headers.get("retry-after") : null;
|
const ra = res?.headers?.get ? res.headers.get("retry-after") : null;
|
||||||
if (!ra) return 0;
|
if (!ra) return 0;
|
||||||
|
|
||||||
const secs = Number(String(ra).trim());
|
const secs = Number(String(ra).trim());
|
||||||
if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
|
if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
|
||||||
|
|
||||||
const dt = Date.parse(String(ra));
|
const dt = Date.parse(String(ra));
|
||||||
if (Number.isFinite(dt)) return Math.max(0, dt - Date.now());
|
if (Number.isFinite(dt)) return Math.max(0, dt - Date.now());
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Utils ---------------- */
|
/* ---------------- Utils ---------------- */
|
||||||
|
|
||||||
async function safeText(res) {
|
async function safeText(res) {
|
||||||
try {
|
try {
|
||||||
return await res.text();
|
return await res.text();
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function hostFromUrl(u) {
|
function hostFromUrl(u) {
|
||||||
try {
|
try {
|
||||||
return new URL(u).host || "";
|
return new URL(u).host || "";
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Cookies (simple jar) ---------------- */
|
/* ---------------- Cookies (simple jar) ---------------- */
|
||||||
|
|
||||||
// host -> Map(cookieName -> "name=value")
|
// host -> Map(cookieName -> "name=value")
|
||||||
function createCookieJar() {
|
function createCookieJar() {
|
||||||
const jar = new Map();
|
const jar = new Map();
|
||||||
|
|
||||||
function parseSetCookieLine(line) {
|
function parseSetCookieLine(line) {
|
||||||
const s = String(line || "").trim();
|
const s = String(line || "").trim();
|
||||||
if (!s) return null;
|
if (!s) return null;
|
||||||
const first = s.split(";")[0] || "";
|
const first = s.split(";")[0] || "";
|
||||||
const eq = first.indexOf("=");
|
const eq = first.indexOf("=");
|
||||||
if (eq <= 0) return null;
|
if (eq <= 0) return null;
|
||||||
const name = first.slice(0, eq).trim();
|
const name = first.slice(0, eq).trim();
|
||||||
const value = first.slice(eq + 1).trim();
|
const value = first.slice(eq + 1).trim();
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
return { name, pair: `${name}=${value}` };
|
return { name, pair: `${name}=${value}` };
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSetCookieArray(headers) {
|
function getSetCookieArray(headers) {
|
||||||
if (headers && typeof headers.getSetCookie === "function") {
|
if (headers && typeof headers.getSetCookie === "function") {
|
||||||
try {
|
try {
|
||||||
const arr = headers.getSetCookie();
|
const arr = headers.getSetCookie();
|
||||||
return Array.isArray(arr) ? arr : [];
|
return Array.isArray(arr) ? arr : [];
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
const one = headers?.get ? headers.get("set-cookie") : null;
|
const one = headers?.get ? headers.get("set-cookie") : null;
|
||||||
if (!one) return [];
|
if (!one) return [];
|
||||||
|
|
||||||
return String(one)
|
return String(one)
|
||||||
.split(/,(?=[^;,]*=)/g)
|
.split(/,(?=[^;,]*=)/g)
|
||||||
.map((x) => x.trim())
|
.map((x) => x.trim())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
function storeFromResponse(url, res) {
|
function storeFromResponse(url, res) {
|
||||||
const host = hostFromUrl(res?.url || url);
|
const host = hostFromUrl(res?.url || url);
|
||||||
if (!host) return;
|
if (!host) return;
|
||||||
|
|
||||||
const lines = getSetCookieArray(res?.headers);
|
const lines = getSetCookieArray(res?.headers);
|
||||||
if (!lines.length) return;
|
if (!lines.length) return;
|
||||||
|
|
||||||
let m = jar.get(host);
|
let m = jar.get(host);
|
||||||
if (!m) {
|
if (!m) {
|
||||||
m = new Map();
|
m = new Map();
|
||||||
jar.set(host, m);
|
jar.set(host, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const c = parseSetCookieLine(line);
|
const c = parseSetCookieLine(line);
|
||||||
if (c) m.set(c.name, c.pair);
|
if (c) m.set(c.name, c.pair);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function cookieHeaderFor(url) {
|
function cookieHeaderFor(url) {
|
||||||
const host = hostFromUrl(url);
|
const host = hostFromUrl(url);
|
||||||
if (!host) return "";
|
if (!host) return "";
|
||||||
const m = jar.get(host);
|
const m = jar.get(host);
|
||||||
if (!m || m.size === 0) return "";
|
if (!m || m.size === 0) return "";
|
||||||
return [...m.values()].join("; ");
|
return [...m.values()].join("; ");
|
||||||
}
|
}
|
||||||
|
|
||||||
return { storeFromResponse, cookieHeaderFor };
|
return { storeFromResponse, cookieHeaderFor };
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- HTTP client ---------------- */
|
/* ---------------- HTTP client ---------------- */
|
||||||
|
|
||||||
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
||||||
let inflight = 0;
|
let inflight = 0;
|
||||||
let reqSeq = 0;
|
let reqSeq = 0;
|
||||||
|
|
||||||
const cookieJar = createCookieJar();
|
const cookieJar = createCookieJar();
|
||||||
|
|
||||||
// host -> epoch ms when next request is allowed
|
// host -> epoch ms when next request is allowed
|
||||||
const hostNextOkAt = new Map();
|
const hostNextOkAt = new Map();
|
||||||
|
|
||||||
// Conservative pacing defaults (slow > blocked)
|
// Conservative pacing defaults (slow > blocked)
|
||||||
const minHostIntervalMs = 2500;
|
const minHostIntervalMs = 2500;
|
||||||
|
|
||||||
// Per-host inflight clamp (prevents bursts when global concurrency is high)
|
// Per-host inflight clamp (prevents bursts when global concurrency is high)
|
||||||
const hostInflight = new Map();
|
const hostInflight = new Map();
|
||||||
const maxHostInflight = 1;
|
const maxHostInflight = 1;
|
||||||
|
|
||||||
function inflightStr() {
|
function inflightStr() {
|
||||||
return `inflight=${inflight}`;
|
return `inflight=${inflight}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function acquireHost(url) {
|
async function acquireHost(url) {
|
||||||
const host = hostFromUrl(url);
|
const host = hostFromUrl(url);
|
||||||
if (!host) return () => {};
|
if (!host) return () => {};
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const cur = hostInflight.get(host) || 0;
|
const cur = hostInflight.get(host) || 0;
|
||||||
if (cur < maxHostInflight) {
|
if (cur < maxHostInflight) {
|
||||||
hostInflight.set(host, cur + 1);
|
hostInflight.set(host, cur + 1);
|
||||||
return () => {
|
return () => {
|
||||||
const n = (hostInflight.get(host) || 1) - 1;
|
const n = (hostInflight.get(host) || 1) - 1;
|
||||||
if (n <= 0) hostInflight.delete(host);
|
if (n <= 0) hostInflight.delete(host);
|
||||||
else hostInflight.set(host, n);
|
else hostInflight.set(host, n);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
await sleep(50);
|
await sleep(50);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
|
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
|
||||||
async function throttleHost(url) {
|
async function throttleHost(url) {
|
||||||
const host = hostFromUrl(url);
|
const host = hostFromUrl(url);
|
||||||
if (!host) return;
|
if (!host) return;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const next = hostNextOkAt.get(host) || 0;
|
const next = hostNextOkAt.get(host) || 0;
|
||||||
const wait = next - now;
|
const wait = next - now;
|
||||||
|
|
||||||
if (wait > 0) {
|
if (wait > 0) {
|
||||||
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
|
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
|
||||||
await sleep(wait);
|
await sleep(wait);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reserve immediately to prevent concurrent pass-through
|
// Reserve immediately to prevent concurrent pass-through
|
||||||
hostNextOkAt.set(host, now + minHostIntervalMs);
|
hostNextOkAt.set(host, now + minHostIntervalMs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function noteHost(url, extraDelayMs = 0) {
|
function noteHost(url, extraDelayMs = 0) {
|
||||||
const host = hostFromUrl(url);
|
const host = hostFromUrl(url);
|
||||||
if (!host) return;
|
if (!host) return;
|
||||||
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const current = hostNextOkAt.get(host) || 0;
|
const current = hostNextOkAt.get(host) || 0;
|
||||||
|
|
||||||
// Extend (never shorten) any existing cooldown
|
// Extend (never shorten) any existing cooldown
|
||||||
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
|
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
|
||||||
hostNextOkAt.set(host, Math.max(current, target));
|
hostNextOkAt.set(host, Math.max(current, target));
|
||||||
|
|
||||||
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
|
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchWithRetry(
|
async function fetchWithRetry(
|
||||||
url,
|
url,
|
||||||
tag,
|
tag,
|
||||||
ua,
|
ua,
|
||||||
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {}
|
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {},
|
||||||
) {
|
) {
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
const reqId = ++reqSeq;
|
const reqId = ++reqSeq;
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
|
||||||
inflight++;
|
inflight++;
|
||||||
logger?.dbg?.(`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`);
|
logger?.dbg?.(
|
||||||
|
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`,
|
||||||
|
);
|
||||||
|
|
||||||
const releaseHost = await acquireHost(url);
|
const releaseHost = await acquireHost(url);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await throttleHost(url);
|
await throttleHost(url);
|
||||||
|
|
||||||
const ctrl = new AbortController();
|
const ctrl = new AbortController();
|
||||||
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
|
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
|
||||||
|
|
||||||
const cookieHdr =
|
const cookieHdr =
|
||||||
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
|
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await fetch(url, {
|
||||||
method,
|
method,
|
||||||
redirect: "follow",
|
redirect: "follow",
|
||||||
headers: {
|
headers: {
|
||||||
"user-agent": ua || defaultUa,
|
"user-agent": ua || defaultUa,
|
||||||
"accept-language": "en-US,en;q=0.9",
|
"accept-language": "en-US,en;q=0.9",
|
||||||
...(mode === "text"
|
...(mode === "text"
|
||||||
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
|
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
|
||||||
: { accept: "application/json, text/plain, */*" }),
|
: { accept: "application/json, text/plain, */*" }),
|
||||||
...(cookieHdr ? { cookie: cookieHdr } : {}),
|
...(cookieHdr ? { cookie: cookieHdr } : {}),
|
||||||
...headers,
|
...headers,
|
||||||
},
|
},
|
||||||
body,
|
body,
|
||||||
signal: ctrl.signal,
|
signal: ctrl.signal,
|
||||||
}).finally(() => clearTimeout(t));
|
}).finally(() => clearTimeout(t));
|
||||||
|
|
||||||
const status = res.status;
|
const status = res.status;
|
||||||
const finalUrl = res.url || url;
|
const finalUrl = res.url || url;
|
||||||
const elapsed = Date.now() - start;
|
const elapsed = Date.now() - start;
|
||||||
|
|
||||||
// Always pace the host a bit after any response
|
// Always pace the host a bit after any response
|
||||||
noteHost(finalUrl);
|
noteHost(finalUrl);
|
||||||
if (cookies) cookieJar.storeFromResponse(url, res);
|
if (cookies) cookieJar.storeFromResponse(url, res);
|
||||||
|
|
||||||
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
|
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
|
||||||
|
|
||||||
if (status === 429) {
|
if (status === 429) {
|
||||||
let raMs = retryAfterMs(res);
|
let raMs = retryAfterMs(res);
|
||||||
|
|
||||||
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
|
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
|
||||||
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
|
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
|
||||||
|
|
||||||
noteHost(finalUrl, raMs);
|
noteHost(finalUrl, raMs);
|
||||||
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
|
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
|
||||||
throw new RetryableError("HTTP 429");
|
throw new RetryableError("HTTP 429");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status === 408 || (status >= 500 && status <= 599)) {
|
if (status === 408 || (status >= 500 && status <= 599)) {
|
||||||
throw new RetryableError(`HTTP ${status}`);
|
throw new RetryableError(`HTTP ${status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status >= 400) {
|
if (status >= 400) {
|
||||||
const bodyTxt = await safeText(res);
|
const bodyTxt = await safeText(res);
|
||||||
throw new Error(
|
throw new Error(`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`);
|
||||||
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`
|
}
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode === "json") {
|
if (mode === "json") {
|
||||||
const txt = await res.text();
|
const txt = await res.text();
|
||||||
let json;
|
let json;
|
||||||
try {
|
try {
|
||||||
json = JSON.parse(txt);
|
json = JSON.parse(txt);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
|
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
|
||||||
}
|
}
|
||||||
return { json, ms: elapsed, bytes: txt.length, status, finalUrl };
|
return { json, ms: elapsed, bytes: txt.length, status, finalUrl };
|
||||||
}
|
}
|
||||||
|
|
||||||
const text = await res.text();
|
const text = await res.text();
|
||||||
if (!text || text.length < 200) {
|
if (!text || text.length < 200) {
|
||||||
throw new RetryableError(`Short HTML bytes=${text.length}`);
|
throw new RetryableError(`Short HTML bytes=${text.length}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { text, ms: elapsed, bytes: text.length, status, finalUrl };
|
return { text, ms: elapsed, bytes: text.length, status, finalUrl };
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
const retryable = isRetryable(e);
|
const retryable = isRetryable(e);
|
||||||
const host = hostFromUrl(url);
|
const host = hostFromUrl(url);
|
||||||
const nextOk = hostNextOkAt.get(host) || 0;
|
const nextOk = hostNextOkAt.get(host) || 0;
|
||||||
|
|
||||||
logger?.dbg?.(
|
logger?.dbg?.(
|
||||||
`REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max(
|
`REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max(
|
||||||
0,
|
0,
|
||||||
nextOk - Date.now()
|
nextOk - Date.now(),
|
||||||
)}ms`
|
)}ms`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!retryable || attempt === maxRetries) throw e;
|
if (!retryable || attempt === maxRetries) throw e;
|
||||||
|
|
||||||
let delay = backoffMs(attempt);
|
let delay = backoffMs(attempt);
|
||||||
if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now());
|
if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now());
|
||||||
|
|
||||||
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
|
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
|
||||||
await sleep(delay);
|
await sleep(delay);
|
||||||
} finally {
|
} finally {
|
||||||
releaseHost();
|
releaseHost();
|
||||||
inflight--;
|
inflight--;
|
||||||
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
|
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error("unreachable");
|
throw new Error("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
function fetchTextWithRetry(url, tag, ua, opts) {
|
function fetchTextWithRetry(url, tag, ua, opts) {
|
||||||
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
|
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
|
||||||
}
|
}
|
||||||
|
|
||||||
function fetchJsonWithRetry(url, tag, ua, opts) {
|
function fetchJsonWithRetry(url, tag, ua, opts) {
|
||||||
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
|
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
|
||||||
}
|
}
|
||||||
|
|
||||||
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
|
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createHttpClient, RetryableError };
|
module.exports = { createHttpClient, RetryableError };
|
||||||
|
|
|
||||||
|
|
@ -4,55 +4,55 @@ const { C, color } = require("../utils/ansi");
|
||||||
const { ts } = require("../utils/time");
|
const { ts } = require("../utils/time");
|
||||||
|
|
||||||
function createLogger({ debug = false, colorize: wantColor = true } = {}) {
|
function createLogger({ debug = false, colorize: wantColor = true } = {}) {
|
||||||
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
|
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
|
||||||
const enabled = Boolean(wantColor && isTTY);
|
const enabled = Boolean(wantColor && isTTY);
|
||||||
|
|
||||||
function ok(msg) {
|
function ok(msg) {
|
||||||
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
|
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
function warn(msg) {
|
function warn(msg) {
|
||||||
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
|
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
function err(msg) {
|
function err(msg) {
|
||||||
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
|
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
function info(msg) {
|
function info(msg) {
|
||||||
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
|
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
function dbg(msg) {
|
function dbg(msg) {
|
||||||
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
|
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
function dim(s) {
|
function dim(s) {
|
||||||
return color(s, C.dim, enabled);
|
return color(s, C.dim, enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
function bold(s) {
|
function bold(s) {
|
||||||
return color(s, C.bold, enabled);
|
return color(s, C.bold, enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
function paint(s, code) {
|
function paint(s, code) {
|
||||||
return color(s, code, enabled);
|
return color(s, code, enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
debug,
|
debug,
|
||||||
isTTY,
|
isTTY,
|
||||||
colorize: enabled,
|
colorize: enabled,
|
||||||
C,
|
C,
|
||||||
ok,
|
ok,
|
||||||
warn,
|
warn,
|
||||||
err,
|
err,
|
||||||
info,
|
info,
|
||||||
dbg,
|
dbg,
|
||||||
dim,
|
dim,
|
||||||
bold,
|
bold,
|
||||||
color: paint,
|
color: paint,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createLogger };
|
module.exports = { createLogger };
|
||||||
|
|
|
||||||
268
src/main.js
268
src/main.js
|
|
@ -16,189 +16,169 @@ const { runAllStores } = require("./tracker/run_all");
|
||||||
const { renderFinalReport } = require("./tracker/report");
|
const { renderFinalReport } = require("./tracker/report");
|
||||||
const { ensureDir } = require("./tracker/db");
|
const { ensureDir } = require("./tracker/db");
|
||||||
|
|
||||||
const DEFAULT_UA =
|
const DEFAULT_UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
|
|
||||||
|
|
||||||
function resolveDir(p, fallback) {
|
function resolveDir(p, fallback) {
|
||||||
const v = String(p || "").trim();
|
const v = String(p || "").trim();
|
||||||
if (!v) return fallback;
|
if (!v) return fallback;
|
||||||
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
|
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getFlagValue(argv, flag) {
|
function getFlagValue(argv, flag) {
|
||||||
// Supports:
|
// Supports:
|
||||||
// --stores=a,b
|
// --stores=a,b
|
||||||
// --stores a,b
|
// --stores a,b
|
||||||
const idx = argv.indexOf(flag);
|
const idx = argv.indexOf(flag);
|
||||||
if (idx >= 0) return argv[idx + 1] || "";
|
if (idx >= 0) return argv[idx + 1] || "";
|
||||||
const pref = `${flag}=`;
|
const pref = `${flag}=`;
|
||||||
for (const a of argv) {
|
for (const a of argv) {
|
||||||
if (a.startsWith(pref)) return a.slice(pref.length);
|
if (a.startsWith(pref)) return a.slice(pref.length);
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normToken(s) {
|
function normToken(s) {
|
||||||
return String(s || "")
|
return String(s || "")
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.trim()
|
.trim()
|
||||||
.replace(/[^a-z0-9]+/g, "");
|
.replace(/[^a-z0-9]+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseStoresFilter(raw) {
|
function parseStoresFilter(raw) {
|
||||||
const v = String(raw || "").trim();
|
const v = String(raw || "").trim();
|
||||||
if (!v) return [];
|
if (!v) return [];
|
||||||
return v
|
return v
|
||||||
.split(",")
|
.split(",")
|
||||||
.map((x) => x.trim())
|
.map((x) => x.trim())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterStoresOrThrow(stores, wantedListRaw) {
|
function filterStoresOrThrow(stores, wantedListRaw) {
|
||||||
const wanted = parseStoresFilter(wantedListRaw);
|
const wanted = parseStoresFilter(wantedListRaw);
|
||||||
if (!wanted.length) return stores;
|
if (!wanted.length) return stores;
|
||||||
|
|
||||||
const wantedNorm = wanted.map(normToken).filter(Boolean);
|
const wantedNorm = wanted.map(normToken).filter(Boolean);
|
||||||
|
|
||||||
const matched = [];
|
const matched = [];
|
||||||
const missing = [];
|
const missing = [];
|
||||||
|
|
||||||
for (let i = 0; i < wanted.length; i++) {
|
for (let i = 0; i < wanted.length; i++) {
|
||||||
const w = wanted[i];
|
const w = wanted[i];
|
||||||
const wn = wantedNorm[i];
|
const wn = wantedNorm[i];
|
||||||
if (!wn) continue;
|
if (!wn) continue;
|
||||||
|
|
||||||
// match against key/name/host (normalized)
|
// match against key/name/host (normalized)
|
||||||
const hit = stores.find((s) => {
|
const hit = stores.find((s) => {
|
||||||
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
|
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
|
||||||
return candidates.includes(wn);
|
return candidates.includes(wn);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (hit) matched.push(hit);
|
if (hit) matched.push(hit);
|
||||||
else missing.push(w);
|
else missing.push(w);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (missing.length) {
|
if (missing.length) {
|
||||||
const avail = stores
|
const avail = stores.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`).join(", ");
|
||||||
.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`)
|
throw new Error(`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`);
|
||||||
.join(", ");
|
}
|
||||||
throw new Error(
|
|
||||||
`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// de-dupe by key (in case name+key both matched)
|
// de-dupe by key (in case name+key both matched)
|
||||||
const uniq = [];
|
const uniq = [];
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
for (const s of matched) {
|
for (const s of matched) {
|
||||||
if (seen.has(s.key)) continue;
|
if (seen.has(s.key)) continue;
|
||||||
seen.add(s.key);
|
seen.add(s.key);
|
||||||
uniq.push(s);
|
uniq.push(s);
|
||||||
}
|
}
|
||||||
return uniq;
|
return uniq;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
if (typeof fetch !== "function") {
|
if (typeof fetch !== "function") {
|
||||||
throw new Error(
|
throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). ");
|
||||||
"Global fetch() not found. Please use Node.js 18+ (or newer). "
|
}
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const argv = process.argv.slice(2);
|
const argv = process.argv.slice(2);
|
||||||
const args = parseArgs(argv);
|
const args = parseArgs(argv);
|
||||||
|
|
||||||
const logger = createLogger({ debug: args.debug, colorize: true });
|
const logger = createLogger({ debug: args.debug, colorize: true });
|
||||||
|
|
||||||
const config = {
|
const config = {
|
||||||
debug: args.debug,
|
debug: args.debug,
|
||||||
maxPages: args.maxPages,
|
maxPages: args.maxPages,
|
||||||
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
|
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
|
||||||
staggerMs:
|
staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
|
||||||
args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
|
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
|
||||||
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
|
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
|
||||||
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
|
discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
|
||||||
discoveryGuess:
|
discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
|
||||||
args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
|
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
|
||||||
discoveryStep:
|
defaultUa: DEFAULT_UA,
|
||||||
args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
|
defaultParseProducts: parseProductsSierra,
|
||||||
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
|
dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")),
|
||||||
defaultUa: DEFAULT_UA,
|
reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")),
|
||||||
defaultParseProducts: parseProductsSierra,
|
};
|
||||||
dbDir: resolveDir(
|
|
||||||
args.dataDir ?? process.env.DATA_DIR,
|
|
||||||
path.join(process.cwd(), "data", "db")
|
|
||||||
),
|
|
||||||
reportDir: resolveDir(
|
|
||||||
args.reportDir ?? process.env.REPORT_DIR,
|
|
||||||
path.join(process.cwd(), "reports")
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
ensureDir(config.dbDir);
|
ensureDir(config.dbDir);
|
||||||
ensureDir(config.reportDir);
|
ensureDir(config.reportDir);
|
||||||
|
|
||||||
const http = createHttpClient({
|
const http = createHttpClient({
|
||||||
maxRetries: config.maxRetries,
|
maxRetries: config.maxRetries,
|
||||||
timeoutMs: config.timeoutMs,
|
timeoutMs: config.timeoutMs,
|
||||||
defaultUa: config.defaultUa,
|
defaultUa: config.defaultUa,
|
||||||
logger,
|
logger,
|
||||||
});
|
});
|
||||||
const stores = createStores({ defaultUa: config.defaultUa });
|
const stores = createStores({ defaultUa: config.defaultUa });
|
||||||
|
|
||||||
const storesFilterRaw =
|
const storesFilterRaw = getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
|
||||||
getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
|
|
||||||
|
|
||||||
const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
|
const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
|
||||||
if (storesFilterRaw) {
|
if (storesFilterRaw) {
|
||||||
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
|
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const report = await runAllStores(storesToRun, { config, logger, http });
|
const report = await runAllStores(storesToRun, { config, logger, http });
|
||||||
|
|
||||||
const meaningful =
|
const meaningful =
|
||||||
(report?.totals?.newCount || 0) +
|
(report?.totals?.newCount || 0) +
|
||||||
(report?.totals?.updatedCount || 0) +
|
(report?.totals?.updatedCount || 0) +
|
||||||
(report?.totals?.removedCount || 0) +
|
(report?.totals?.removedCount || 0) +
|
||||||
(report?.totals?.restoredCount || 0) +
|
(report?.totals?.restoredCount || 0) +
|
||||||
(report?.totals?.metaChangedCount || 0) >
|
(report?.totals?.metaChangedCount || 0) >
|
||||||
0;
|
0;
|
||||||
|
|
||||||
const reportTextColor = renderFinalReport(report, {
|
const reportTextColor = renderFinalReport(report, {
|
||||||
dbDir: config.dbDir,
|
dbDir: config.dbDir,
|
||||||
colorize: logger.colorize,
|
colorize: logger.colorize,
|
||||||
});
|
});
|
||||||
process.stdout.write(reportTextColor);
|
process.stdout.write(reportTextColor);
|
||||||
|
|
||||||
if (!meaningful) {
|
if (!meaningful) {
|
||||||
logger.ok("No meaningful changes; skipping report write.");
|
logger.ok("No meaningful changes; skipping report write.");
|
||||||
process.exitCode = 3; // special "no-op" code
|
process.exitCode = 3; // special "no-op" code
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const reportTextPlain = renderFinalReport(report, {
|
const reportTextPlain = renderFinalReport(report, {
|
||||||
dbDir: config.dbDir,
|
dbDir: config.dbDir,
|
||||||
colorize: false,
|
colorize: false,
|
||||||
});
|
});
|
||||||
const file = path.join(
|
const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`);
|
||||||
config.reportDir,
|
try {
|
||||||
`${isoTimestampFileSafe(new Date())}.txt`
|
fs.writeFileSync(file, reportTextPlain, "utf8");
|
||||||
);
|
logger.ok(`Report saved: ${logger.dim(file)}`);
|
||||||
try {
|
} catch (e) {
|
||||||
fs.writeFileSync(file, reportTextPlain, "utf8");
|
logger.warn(`Report save failed: ${e?.message || e}`);
|
||||||
logger.ok(`Report saved: ${logger.dim(file)}`);
|
}
|
||||||
} catch (e) {
|
|
||||||
logger.warn(`Report save failed: ${e?.message || e}`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { main };
|
module.exports = { main };
|
||||||
|
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
main().catch((e) => {
|
main().catch((e) => {
|
||||||
const msg = e && e.stack ? e.stack : String(e);
|
const msg = e && e.stack ? e.stack : String(e);
|
||||||
// no logger here; keep simple
|
// no logger here; keep simple
|
||||||
console.error(msg);
|
console.error(msg);
|
||||||
process.exitCode = 1;
|
process.exitCode = 1;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,368 +11,374 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes || 0).padStart(8, " ");
|
return humanBytes(bytes || 0).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`;
|
const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function toNum(v) {
|
function toNum(v) {
|
||||||
const s = String(v ?? "").trim();
|
const s = String(v ?? "").trim();
|
||||||
if (!s) return NaN;
|
if (!s) return NaN;
|
||||||
const n = Number(s.replace(/[^0-9.]/g, ""));
|
const n = Number(s.replace(/[^0-9.]/g, ""));
|
||||||
return Number.isFinite(n) ? n : NaN;
|
return Number.isFinite(n) ? n : NaN;
|
||||||
}
|
}
|
||||||
|
|
||||||
function money(v) {
|
function money(v) {
|
||||||
const n = toNum(v);
|
const n = toNum(v);
|
||||||
if (!Number.isFinite(n) || n <= 0) return "";
|
if (!Number.isFinite(n) || n <= 0) return "";
|
||||||
return `$${n.toFixed(2)}`;
|
return `$${n.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickBestPrice(p) {
|
function pickBestPrice(p) {
|
||||||
const reg = toNum(p?.regular_price);
|
const reg = toNum(p?.regular_price);
|
||||||
const sale = toNum(p?.sale_price);
|
const sale = toNum(p?.sale_price);
|
||||||
const net = toNum(p?.net_price);
|
const net = toNum(p?.net_price);
|
||||||
|
|
||||||
// Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular.
|
// Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular.
|
||||||
if (Number.isFinite(sale) && sale > 0) {
|
if (Number.isFinite(sale) && sale > 0) {
|
||||||
if (p?.is_sale === true) return money(sale);
|
if (p?.is_sale === true) return money(sale);
|
||||||
if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale);
|
if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale);
|
||||||
// Some feeds put the current price in sale_price even without flags:
|
// Some feeds put the current price in sale_price even without flags:
|
||||||
if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale);
|
if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Number.isFinite(net) && net > 0) return money(net);
|
if (Number.isFinite(net) && net > 0) return money(net);
|
||||||
if (Number.isFinite(reg) && reg > 0) return money(reg);
|
if (Number.isFinite(reg) && reg > 0) return money(reg);
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normAbsUrl(raw, base) {
|
function normAbsUrl(raw, base) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s.replace(/^\/+/, ""), base).toString();
|
return new URL(s.replace(/^\/+/, ""), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isInStock(p) {
|
function isInStock(p) {
|
||||||
// Keep this strict: user asked "only show in stock items".
|
// Keep this strict: user asked "only show in stock items".
|
||||||
// available_for_sale is the strongest signal; on_hand is a good secondary signal.
|
// available_for_sale is the strongest signal; on_hand is a good secondary signal.
|
||||||
if (p && p.available_for_sale === false) return false;
|
if (p && p.available_for_sale === false) return false;
|
||||||
|
|
||||||
const onHand = Number(p?.on_hand);
|
const onHand = Number(p?.on_hand);
|
||||||
if (Number.isFinite(onHand)) return onHand > 0;
|
if (Number.isFinite(onHand)) return onHand > 0;
|
||||||
|
|
||||||
// If on_hand is missing, fall back to available_for_sale truthiness.
|
// If on_hand is missing, fall back to available_for_sale truthiness.
|
||||||
return Boolean(p?.available_for_sale);
|
return Boolean(p?.available_for_sale);
|
||||||
}
|
}
|
||||||
|
|
||||||
function arcNormalizeImg(raw) {
|
function arcNormalizeImg(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
|
|
||||||
// already public
|
// already public
|
||||||
if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s;
|
if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s;
|
||||||
|
|
||||||
// site-relative -> public CDN
|
// site-relative -> public CDN
|
||||||
const noProto = s.replace(/^https?:\/\/[^/]+/i, "");
|
const noProto = s.replace(/^https?:\/\/[^/]+/i, "");
|
||||||
const rel = noProto.replace(/^\/+/, "");
|
const rel = noProto.replace(/^\/+/, "");
|
||||||
|
|
||||||
// common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg"
|
// common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg"
|
||||||
if (/^(custom\/|bc_lrs\/)/i.test(rel)) {
|
if (/^(custom\/|bc_lrs\/)/i.test(rel)) {
|
||||||
return `https://s.barnetnetwork.com/img/m/${rel}`;
|
return `https://s.barnetnetwork.com/img/m/${rel}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback: if it's any path, still try the CDN
|
// fallback: if it's any path, still try the CDN
|
||||||
if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`;
|
if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`;
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function arcItemToTracked(p, ctx) {
|
function arcItemToTracked(p, ctx) {
|
||||||
if (!p) return null;
|
if (!p) return null;
|
||||||
if (!isInStock(p)) return null;
|
if (!isInStock(p)) return null;
|
||||||
|
|
||||||
const url = normAbsUrl(p.url, `https://${ctx.store.host}/`);
|
const url = normAbsUrl(p.url, `https://${ctx.store.host}/`);
|
||||||
if (!url) return null;
|
if (!url) return null;
|
||||||
|
|
||||||
const name = cleanText(p.description || p.name || "");
|
const name = cleanText(p.description || p.name || "");
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
const price = pickBestPrice(p);
|
const price = pickBestPrice(p);
|
||||||
|
|
||||||
const rawCspcId = String(p?.cspcid ?? "").trim();
|
const rawCspcId = String(p?.cspcid ?? "").trim();
|
||||||
const hasCspcId = /^\d{1,11}$/.test(rawCspcId);
|
const hasCspcId = /^\d{1,11}$/.test(rawCspcId);
|
||||||
|
|
||||||
const id = Number(p?.id);
|
const id = Number(p?.id);
|
||||||
const rawSku =
|
const rawSku = hasCspcId ? `id:${rawCspcId}` : Number.isFinite(id) ? `id:${id}` : "";
|
||||||
hasCspcId ? `id:${rawCspcId}` :
|
|
||||||
Number.isFinite(id) ? `id:${id}` :
|
|
||||||
"";
|
|
||||||
|
|
||||||
const sku =
|
const sku = normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
|
||||||
normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || "";
|
|
||||||
|
|
||||||
|
const img = arcNormalizeImg(p.image || p.image_url || p.img || "");
|
||||||
|
|
||||||
const img = arcNormalizeImg(p.image || p.image_url || p.img || "");
|
return { name, price, url, sku, img };
|
||||||
|
}
|
||||||
return { name, price, url, sku, img };
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
function parseCategoryParamsFromStartUrl(startUrl) {
|
function parseCategoryParamsFromStartUrl(startUrl) {
|
||||||
try {
|
try {
|
||||||
const u = new URL(startUrl);
|
const u = new URL(startUrl);
|
||||||
const category = u.searchParams.get("category") || "";
|
const category = u.searchParams.get("category") || "";
|
||||||
const sub = u.searchParams.get("sub_category") || "";
|
const sub = u.searchParams.get("sub_category") || "";
|
||||||
return { category, sub };
|
return { category, sub };
|
||||||
} catch {
|
} catch {
|
||||||
return { category: "", sub: "" };
|
return { category: "", sub: "" };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
|
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
|
||||||
const prevSize = prevDb?.byUrl?.size || 0;
|
const prevSize = prevDb?.byUrl?.size || 0;
|
||||||
const discSize = discovered?.size || 0;
|
const discSize = discovered?.size || 0;
|
||||||
|
|
||||||
if (prevSize <= 0 || discSize <= 0) return false;
|
if (prevSize <= 0 || discSize <= 0) return false;
|
||||||
|
|
||||||
const ratio = discSize / Math.max(1, prevSize);
|
const ratio = discSize / Math.max(1, prevSize);
|
||||||
if (ratio >= 0.6) return false;
|
if (ratio >= 0.6) return false;
|
||||||
|
|
||||||
ctx.logger.warn?.(
|
ctx.logger.warn?.(
|
||||||
`${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
|
`${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Preserve prior active items not seen this run.
|
// Preserve prior active items not seen this run.
|
||||||
for (const [u, it] of prevDb.byUrl.entries()) {
|
for (const [u, it] of prevDb.byUrl.entries()) {
|
||||||
if (!it || it.removed) continue;
|
if (!it || it.removed) continue;
|
||||||
if (!discovered.has(u)) discovered.set(u, it);
|
if (!discovered.has(u)) discovered.set(u, it);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryArcApi(ctx, prevDb, report) {
|
async function scanCategoryArcApi(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
// Warm cookies / session (Barnet-based shops sometimes need this)
|
// Warm cookies / session (Barnet-based shops sometimes need this)
|
||||||
try {
|
try {
|
||||||
await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua);
|
await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua);
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|
||||||
const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl);
|
const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl);
|
||||||
const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim();
|
const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim();
|
||||||
const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim();
|
const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim();
|
||||||
|
|
||||||
if (!subCategory) {
|
if (!subCategory) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`);
|
const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`);
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
||||||
const hardCap = Math.min(5000, Math.max(1, maxPagesCap));
|
const hardCap = Math.min(5000, Math.max(1, maxPagesCap));
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
let aborted = false;
|
let aborted = false;
|
||||||
|
|
||||||
// Pagination safety
|
// Pagination safety
|
||||||
let pageSize = 0; // inferred from first non-empty page
|
let pageSize = 0; // inferred from first non-empty page
|
||||||
const seenPageFingerprints = new Set();
|
const seenPageFingerprints = new Set();
|
||||||
let stagnantPages = 0;
|
let stagnantPages = 0;
|
||||||
|
|
||||||
for (let page = 1; page <= hardCap; page++) {
|
for (let page = 1; page <= hardCap; page++) {
|
||||||
const u = new URL(apiBase.toString());
|
const u = new URL(apiBase.toString());
|
||||||
u.searchParams.set("p", String(page));
|
u.searchParams.set("p", String(page));
|
||||||
u.searchParams.set("show_on_web", "true");
|
u.searchParams.set("show_on_web", "true");
|
||||||
u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc"));
|
u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc"));
|
||||||
u.searchParams.set("category", category);
|
u.searchParams.set("category", category);
|
||||||
u.searchParams.set("sub_category", subCategory);
|
u.searchParams.set("sub_category", subCategory);
|
||||||
u.searchParams.set("varital_name", "");
|
u.searchParams.set("varital_name", "");
|
||||||
u.searchParams.set("no_item_found", "No item found.");
|
u.searchParams.set("no_item_found", "No item found.");
|
||||||
u.searchParams.set("avail_for_sale", "false");
|
u.searchParams.set("avail_for_sale", "false");
|
||||||
u.searchParams.set("_dc", String(Date.now()));
|
u.searchParams.set("_dc", String(Date.now()));
|
||||||
|
|
||||||
let r;
|
let r;
|
||||||
try {
|
try {
|
||||||
r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json, */*",
|
Accept: "application/json, */*",
|
||||||
"X-Requested-With": "XMLHttpRequest",
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
Referer: ctx.cat.startUrl,
|
Referer: ctx.cat.startUrl,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`);
|
||||||
aborted = true;
|
aborted = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
|
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
|
||||||
donePages++;
|
donePages++;
|
||||||
|
|
||||||
const rawCount = arr.length;
|
const rawCount = arr.length;
|
||||||
|
|
||||||
// Log early (even for empty)
|
// Log early (even for empty)
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(
|
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
|
||||||
3
|
.toString()
|
||||||
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
|
.padEnd(3)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
|
||||||
ctx.http.inflightStr(),
|
ctx.http.inflightStr(),
|
||||||
11
|
11,
|
||||||
)} | ${secStr(r.ms)}`
|
)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!rawCount) break;
|
if (!rawCount) break;
|
||||||
|
|
||||||
// Infer page size from first non-empty page
|
// Infer page size from first non-empty page
|
||||||
if (!pageSize) pageSize = rawCount;
|
if (!pageSize) pageSize = rawCount;
|
||||||
|
|
||||||
// Detect wrap/repeat: fingerprint by ids+urls (stable enough)
|
// Detect wrap/repeat: fingerprint by ids+urls (stable enough)
|
||||||
const fp = arr
|
const fp = arr
|
||||||
.map((p) => `${p?.id || ""}:${p?.url || ""}`)
|
.map((p) => `${p?.id || ""}:${p?.url || ""}`)
|
||||||
.sort()
|
.sort()
|
||||||
.join("|");
|
.join("|");
|
||||||
if (fp && seenPageFingerprints.has(fp)) {
|
if (fp && seenPageFingerprints.has(fp)) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (fp) seenPageFingerprints.add(fp);
|
if (fp) seenPageFingerprints.add(fp);
|
||||||
|
|
||||||
const before = discovered.size;
|
const before = discovered.size;
|
||||||
|
|
||||||
let kept = 0;
|
let kept = 0;
|
||||||
for (const p of arr) {
|
for (const p of arr) {
|
||||||
const it = arcItemToTracked(p, ctx);
|
const it = arcItemToTracked(p, ctx);
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
kept++;
|
kept++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-log with kept filled in (overwrite-style isn’t possible; just emit a second line)
|
// Re-log with kept filled in (overwrite-style isn’t possible; just emit a second line)
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(
|
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "")
|
||||||
3
|
.toString()
|
||||||
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
|
.padEnd(
|
||||||
ctx.http.inflightStr(),
|
3,
|
||||||
11
|
)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight(
|
||||||
)} | ${secStr(r.ms)}`
|
ctx.http.inflightStr(),
|
||||||
);
|
11,
|
||||||
|
)} | ${secStr(r.ms)}`,
|
||||||
|
);
|
||||||
|
|
||||||
// Stop condition #1: last page (short page)
|
// Stop condition #1: last page (short page)
|
||||||
if (pageSize && rawCount < pageSize) break;
|
if (pageSize && rawCount < pageSize) break;
|
||||||
|
|
||||||
// Stop condition #2: no new uniques for 2 pages (safety)
|
// Stop condition #2: no new uniques for 2 pages (safety)
|
||||||
if (discovered.size === before) stagnantPages++;
|
if (discovered.size === before) stagnantPages++;
|
||||||
else stagnantPages = 0;
|
else stagnantPages = 0;
|
||||||
|
|
||||||
if (stagnantPages >= 2) {
|
if (stagnantPages >= 2) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aborted) {
|
if (aborted) {
|
||||||
avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`);
|
avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } =
|
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
|
||||||
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
prevDb,
|
||||||
|
discovered,
|
||||||
|
{ storeLabel: ctx.store.name },
|
||||||
|
);
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsedMs = Date.now() - t0;
|
const elapsedMs = Date.now() - t0;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: Math.max(1, donePages),
|
scannedPages: Math.max(1, donePages),
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
metaChangedCount: metaChangedItems.length,
|
metaChangedCount: metaChangedItems.length,
|
||||||
elapsedMs,
|
elapsedMs,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
report.totals.metaChangedCount += metaChangedItems.length;
|
report.totals.metaChangedCount += metaChangedItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "arc",
|
key: "arc",
|
||||||
name: "ARC Liquor",
|
name: "ARC Liquor",
|
||||||
host: "kelownaharveyave.armstrong.coop",
|
host: "kelownaharveyave.armstrong.coop",
|
||||||
shopId: "644-290",
|
shopId: "644-290",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryArcApi,
|
scanCategory: scanCategoryArcApi,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "spirits-rum",
|
key: "spirits-rum",
|
||||||
label: "Spirits - Rum",
|
label: "Spirits - Rum",
|
||||||
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum",
|
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum",
|
||||||
arcCategory: "Spirits",
|
arcCategory: "Spirits",
|
||||||
arcSubCategory: "Rum",
|
arcSubCategory: "Rum",
|
||||||
sortBy: "price_desc",
|
sortBy: "price_desc",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "spirits-scotch",
|
key: "spirits-scotch",
|
||||||
label: "Spirits - Scotch",
|
label: "Spirits - Scotch",
|
||||||
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch",
|
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch",
|
||||||
arcCategory: "Spirits",
|
arcCategory: "Spirits",
|
||||||
arcSubCategory: "Scotch",
|
arcSubCategory: "Scotch",
|
||||||
sortBy: "price_desc",
|
sortBy: "price_desc",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "spirits-whiskey",
|
key: "spirits-whiskey",
|
||||||
label: "Spirits - Whiskey",
|
label: "Spirits - Whiskey",
|
||||||
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey",
|
startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey",
|
||||||
arcCategory: "Spirits",
|
arcCategory: "Spirits",
|
||||||
arcSubCategory: "Whiskey",
|
arcSubCategory: "Whiskey",
|
||||||
sortBy: "price_desc",
|
sortBy: "price_desc",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -9,332 +9,360 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cad(n) {
|
function cad(n) {
|
||||||
const x = Number(n);
|
const x = Number(n);
|
||||||
if (!Number.isFinite(x)) return "";
|
if (!Number.isFinite(x)) return "";
|
||||||
return `$${x.toFixed(2)}`;
|
return `$${x.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function asNumber(n) {
|
function asNumber(n) {
|
||||||
if (n == null) return NaN;
|
if (n == null) return NaN;
|
||||||
if (typeof n === "number") return n;
|
if (typeof n === "number") return n;
|
||||||
const t = String(n).trim();
|
const t = String(n).trim();
|
||||||
if (!t) return NaN;
|
if (!t) return NaN;
|
||||||
const x = Number(t.replace(/[^0-9.]/g, ""));
|
const x = Number(t.replace(/[^0-9.]/g, ""));
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bclTotalHits(json) {
|
function bclTotalHits(json) {
|
||||||
const t = json?.hits?.total;
|
const t = json?.hits?.total;
|
||||||
if (typeof t === "number") return t;
|
if (typeof t === "number") return t;
|
||||||
if (t && typeof t.value === "number") return t.value; // ES-style
|
if (t && typeof t.value === "number") return t.value; // ES-style
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bclIsInStock(src) {
|
function bclIsInStock(src) {
|
||||||
const candidates = [
|
const candidates = [
|
||||||
src?.availability_override, // <-- add this
|
src?.availability_override, // <-- add this
|
||||||
src?.availability,
|
src?.availability,
|
||||||
src?.availabilityText,
|
src?.availabilityText,
|
||||||
src?.availabilityStatus,
|
src?.availabilityStatus,
|
||||||
src?.availability_status,
|
src?.availability_status,
|
||||||
src?.stockStatus,
|
src?.stockStatus,
|
||||||
src?.stock_status,
|
src?.stock_status,
|
||||||
src?.status,
|
src?.status,
|
||||||
src?.statusText,
|
src?.statusText,
|
||||||
]
|
]
|
||||||
.map((v) => (v == null ? "" : String(v)))
|
.map((v) => (v == null ? "" : String(v)))
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
for (const s of candidates) {
|
for (const s of candidates) {
|
||||||
if (/out of stock/i.test(s)) return false;
|
if (/out of stock/i.test(s)) return false;
|
||||||
if (/\bin stock\b/i.test(s)) return true;
|
if (/\bin stock\b/i.test(s)) return true;
|
||||||
if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07"
|
if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07"
|
||||||
}
|
}
|
||||||
|
|
||||||
const units = Number(src?.availableUnits);
|
const units = Number(src?.availableUnits);
|
||||||
if (Number.isFinite(units)) return units > 0;
|
if (Number.isFinite(units)) return units > 0;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bclNormalizeAbsUrl(raw) {
|
function bclNormalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, "https://www.bcliquorstores.com/").toString();
|
return new URL(s, "https://www.bcliquorstores.com/").toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function bclPickImage(src) {
|
function bclPickImage(src) {
|
||||||
const cands = [
|
const cands = [
|
||||||
src?.imageUrl,
|
src?.imageUrl,
|
||||||
src?.imageURL,
|
src?.imageURL,
|
||||||
src?.image,
|
src?.image,
|
||||||
src?.thumbnail,
|
src?.thumbnail,
|
||||||
src?.thumbnailUrl,
|
src?.thumbnailUrl,
|
||||||
src?.thumbnailURL,
|
src?.thumbnailURL,
|
||||||
src?.primaryImage,
|
src?.primaryImage,
|
||||||
src?.primaryImageUrl,
|
src?.primaryImageUrl,
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const c of cands) {
|
for (const c of cands) {
|
||||||
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
|
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
|
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
|
||||||
for (const a of arrs) {
|
for (const a of arrs) {
|
||||||
if (!Array.isArray(a) || !a.length) continue;
|
if (!Array.isArray(a) || !a.length) continue;
|
||||||
const v = a[0];
|
const v = a[0];
|
||||||
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
|
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
|
||||||
if (v && typeof v === "object") {
|
if (v && typeof v === "object") {
|
||||||
const s = String(v.src || v.url || "").trim();
|
const s = String(v.src || v.url || "").trim();
|
||||||
if (s) return bclNormalizeAbsUrl(s);
|
if (s) return bclNormalizeAbsUrl(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function bclHitToItem(hit) {
|
function bclHitToItem(hit) {
|
||||||
const src = hit?._source || null;
|
const src = hit?._source || null;
|
||||||
if (!src) return null;
|
if (!src) return null;
|
||||||
|
|
||||||
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
|
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
|
||||||
if (!skuRaw) return null;
|
if (!skuRaw) return null;
|
||||||
|
|
||||||
// SKU in URL (requested)
|
// SKU in URL (requested)
|
||||||
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
|
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
|
||||||
|
|
||||||
const name = String(src.name || "").trim();
|
const name = String(src.name || "").trim();
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
// Sale support: pick currentPrice when present; otherwise regularPrice.
|
// Sale support: pick currentPrice when present; otherwise regularPrice.
|
||||||
const current = asNumber(src.currentPrice);
|
const current = asNumber(src.currentPrice);
|
||||||
const regular = asNumber(src.regularPrice);
|
const regular = asNumber(src.regularPrice);
|
||||||
const price = cad(Number.isFinite(current) ? current : regular);
|
const price = cad(Number.isFinite(current) ? current : regular);
|
||||||
|
|
||||||
// SKU key:
|
// SKU key:
|
||||||
// - Keep CSPC 6-digit when present (rare for BCL, but safe)
|
// - Keep CSPC 6-digit when present (rare for BCL, but safe)
|
||||||
// - Otherwise upgrade to an explicit soft key: id:<digits>
|
// - Otherwise upgrade to an explicit soft key: id:<digits>
|
||||||
//
|
//
|
||||||
// ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits>
|
// ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id:<digits>
|
||||||
// only fall back to raw (NOT u:) if it’s genuinely non-numeric.
|
// only fall back to raw (NOT u:) if it’s genuinely non-numeric.
|
||||||
let sku = normalizeCspc(skuRaw);
|
let sku = normalizeCspc(skuRaw);
|
||||||
if (!sku) {
|
if (!sku) {
|
||||||
const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc.
|
const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc.
|
||||||
sku = m ? `id:${m[0]}` : `id:${skuRaw}`;
|
sku = m ? `id:${m[0]}` : `id:${skuRaw}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const inStock = bclIsInStock(src);
|
const inStock = bclIsInStock(src);
|
||||||
if (!inStock) return null;
|
if (!inStock) return null;
|
||||||
|
|
||||||
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
|
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
|
||||||
// Also use https.
|
// Also use https.
|
||||||
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
|
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
|
||||||
skuRaw
|
skuRaw,
|
||||||
)}.jpg`;
|
)}.jpg`;
|
||||||
|
|
||||||
return { name, price, url, sku, img };
|
return { name, price, url, sku, img };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function bclFetchBrowsePage(ctx, page1, size) {
|
async function bclFetchBrowsePage(ctx, page1, size) {
|
||||||
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
|
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
|
||||||
const category = "spirits";
|
const category = "spirits";
|
||||||
const sort = "featuredProducts:desc";
|
const sort = "featuredProducts:desc";
|
||||||
|
|
||||||
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
|
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
|
||||||
u.searchParams.set("category", category);
|
u.searchParams.set("category", category);
|
||||||
u.searchParams.set("type", type);
|
u.searchParams.set("type", type);
|
||||||
u.searchParams.set("sort", sort);
|
u.searchParams.set("sort", sort);
|
||||||
u.searchParams.set("size", String(size));
|
u.searchParams.set("size", String(size));
|
||||||
u.searchParams.set("page", String(page1));
|
u.searchParams.set("page", String(page1));
|
||||||
|
|
||||||
const referer =
|
const referer =
|
||||||
`https://www.bcliquorstores.com/product-catalogue?` +
|
`https://www.bcliquorstores.com/product-catalogue?` +
|
||||||
`category=${encodeURIComponent(category)}` +
|
`category=${encodeURIComponent(category)}` +
|
||||||
`&type=${encodeURIComponent(type)}` +
|
`&type=${encodeURIComponent(type)}` +
|
||||||
`&sort=${encodeURIComponent(sort)}` +
|
`&sort=${encodeURIComponent(sort)}` +
|
||||||
`&page=${encodeURIComponent(String(page1))}`;
|
`&page=${encodeURIComponent(String(page1))}`;
|
||||||
|
|
||||||
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json, text/plain, */*",
|
Accept: "application/json, text/plain, */*",
|
||||||
Referer: referer,
|
Referer: referer,
|
||||||
Origin: "https://www.bcliquorstores.com",
|
Origin: "https://www.bcliquorstores.com",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryBCLAjax(ctx, prevDb, report) {
|
async function scanCategoryBCLAjax(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const size = 24;
|
const size = 24;
|
||||||
|
|
||||||
let first;
|
let first;
|
||||||
try {
|
try {
|
||||||
first = await bclFetchBrowsePage(ctx, 1, size);
|
first = await bclFetchBrowsePage(ctx, 1, size);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
prevDb,
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
discovered,
|
||||||
|
{ storeLabel: ctx.store.name },
|
||||||
|
);
|
||||||
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: 1,
|
scannedPages: 1,
|
||||||
discoveredUnique: 0,
|
discoveredUnique: 0,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
return;
|
report,
|
||||||
}
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const total = bclTotalHits(first?.json);
|
const total = bclTotalHits(first?.json);
|
||||||
const totalPages = Math.max(1, Math.ceil(total / size));
|
const totalPages = Math.max(1, Math.ceil(total / size));
|
||||||
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const pageNums = [];
|
const pageNums = [];
|
||||||
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
|
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
|
|
||||||
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
||||||
pageNums,
|
pageNums,
|
||||||
ctx.config.concurrency,
|
ctx.config.concurrency,
|
||||||
ctx.config.staggerMs,
|
ctx.config.staggerMs,
|
||||||
async (page1, idx) => {
|
async (page1, idx) => {
|
||||||
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
|
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
|
||||||
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
|
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (const h of hits) {
|
for (const h of hits) {
|
||||||
const it = bclHitToItem(h);
|
const it = bclHitToItem(h);
|
||||||
if (it) items.push(it);
|
if (it) items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
donePages++;
|
donePages++;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
|
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
|
||||||
items.length,
|
items.length,
|
||||||
3
|
3,
|
||||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
}
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
let dups = 0;
|
let dups = 0;
|
||||||
for (const arr of perPageItems) {
|
for (const arr of perPageItems) {
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (discovered.has(it.url)) dups++;
|
if (discovered.has(it.url)) dups++;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
|
storeLabel: ctx.store.name,
|
||||||
|
});
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: scanPages,
|
scannedPages: scanPages,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "bcl",
|
key: "bcl",
|
||||||
name: "BCL",
|
name: "BCL",
|
||||||
host: "www.bcliquorstores.com",
|
host: "www.bcliquorstores.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
|
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky / Whiskey",
|
label: "Whisky / Whiskey",
|
||||||
// informational only; scan uses ajax/browse
|
// informational only; scan uses ajax/browse
|
||||||
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
|
startUrl:
|
||||||
bclType: "whisky / whiskey",
|
"https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
|
||||||
},
|
bclType: "whisky / whiskey",
|
||||||
{
|
},
|
||||||
key: "rum",
|
{
|
||||||
label: "Rum",
|
key: "rum",
|
||||||
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
|
label: "Rum",
|
||||||
bclType: "rum",
|
startUrl:
|
||||||
},
|
"https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
|
||||||
],
|
bclType: "rum",
|
||||||
};
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -14,380 +14,416 @@ const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2";
|
||||||
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
|
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
|
||||||
|
|
||||||
function usd(n) {
|
function usd(n) {
|
||||||
if (!Number.isFinite(n)) return "";
|
if (!Number.isFinite(n)) return "";
|
||||||
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswExtractCollectionIdFromHtml(html) {
|
function bswExtractCollectionIdFromHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const patterns = [
|
const patterns = [
|
||||||
/collection_ids%3A(\d{6,})/i,
|
/collection_ids%3A(\d{6,})/i,
|
||||||
/collection_ids\s*:\s*(\d{6,})/i,
|
/collection_ids\s*:\s*(\d{6,})/i,
|
||||||
/"collection_ids"\s*:\s*(\d{6,})/i,
|
/"collection_ids"\s*:\s*(\d{6,})/i,
|
||||||
/"collection_id"\s*:\s*(\d{6,})/i,
|
/"collection_id"\s*:\s*(\d{6,})/i,
|
||||||
/collection_id\s*=\s*(\d{6,})/i,
|
/collection_id\s*=\s*(\d{6,})/i,
|
||||||
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
|
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
|
||||||
/data-collection-id=["'](\d{6,})["']/i,
|
/data-collection-id=["'](\d{6,})["']/i,
|
||||||
];
|
];
|
||||||
for (const re of patterns) {
|
for (const re of patterns) {
|
||||||
const m = s.match(re);
|
const m = s.match(re);
|
||||||
if (m && m[1]) return Number.parseInt(m[1], 10);
|
if (m && m[1]) return Number.parseInt(m[1], 10);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswFormatPrice(value, hintCents) {
|
function bswFormatPrice(value, hintCents) {
|
||||||
if (value === null || value === undefined) return "";
|
if (value === null || value === undefined) return "";
|
||||||
|
|
||||||
if (typeof value === "string") {
|
if (typeof value === "string") {
|
||||||
const t = value.trim();
|
const t = value.trim();
|
||||||
if (!t) return "";
|
if (!t) return "";
|
||||||
if (t.includes("$")) return t.replace(/\s+/g, "");
|
if (t.includes("$")) return t.replace(/\s+/g, "");
|
||||||
const n = Number(t.replace(/[^0-9.]/g, ""));
|
const n = Number(t.replace(/[^0-9.]/g, ""));
|
||||||
if (!Number.isFinite(n)) return t;
|
if (!Number.isFinite(n)) return t;
|
||||||
return usd(n);
|
return usd(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof value === "number") {
|
if (typeof value === "number") {
|
||||||
let n = value;
|
let n = value;
|
||||||
|
|
||||||
if (hintCents) n = n / 100;
|
if (hintCents) n = n / 100;
|
||||||
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
|
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
|
||||||
|
|
||||||
return usd(n);
|
return usd(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswPickPrice(hit) {
|
function bswPickPrice(hit) {
|
||||||
const pick = (val, cents) => ({ val, cents });
|
const pick = (val, cents) => ({ val, cents });
|
||||||
|
|
||||||
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
|
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
|
||||||
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
|
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
|
||||||
|
|
||||||
if (hit && hit.price != null) return pick(hit.price, false);
|
if (hit && hit.price != null) return pick(hit.price, false);
|
||||||
if (hit && hit.price_min != null) return pick(hit.price_min, false);
|
if (hit && hit.price_min != null) return pick(hit.price_min, false);
|
||||||
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
|
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
|
||||||
if (hit && hit.min_price != null) return pick(hit.min_price, false);
|
if (hit && hit.min_price != null) return pick(hit.min_price, false);
|
||||||
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
|
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
|
||||||
|
|
||||||
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
|
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
|
||||||
const v = hit.variants[0];
|
const v = hit.variants[0];
|
||||||
if (v.price_cents != null) return pick(v.price_cents, true);
|
if (v.price_cents != null) return pick(v.price_cents, true);
|
||||||
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
|
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
|
||||||
if (v.price != null) return pick(v.price, false);
|
if (v.price != null) return pick(v.price, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
return pick(null, false);
|
return pick(null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function bswHitToItem(hit) {
|
function bswHitToItem(hit) {
|
||||||
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
|
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
|
||||||
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
|
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
|
||||||
const url =
|
const url =
|
||||||
(hit && (hit.url || hit.product_url)) ||
|
(hit && (hit.url || hit.product_url)) ||
|
||||||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
|
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
|
||||||
|
|
||||||
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
|
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
|
||||||
const price = bswFormatPrice(priceVal, hintCents);
|
const price = bswFormatPrice(priceVal, hintCents);
|
||||||
|
|
||||||
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
|
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
|
||||||
|
|
||||||
const img = bswPickImage(hit);
|
const img = bswPickImage(hit);
|
||||||
|
|
||||||
if (!name || !url) return null;
|
if (!name || !url) return null;
|
||||||
return { name, price, url, sku, img };
|
return { name, price, url, sku, img };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
|
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
|
||||||
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
|
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
|
||||||
|
|
||||||
const params =
|
const params =
|
||||||
`facets=%5B%22price%22%2C%22*%22%5D` +
|
`facets=%5B%22price%22%2C%22*%22%5D` +
|
||||||
`&filters=${encodeURIComponent(filtersExpr)}` +
|
`&filters=${encodeURIComponent(filtersExpr)}` +
|
||||||
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
|
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
|
||||||
`&page=${encodeURIComponent(String(page0))}` +
|
`&page=${encodeURIComponent(String(page0))}` +
|
||||||
`&query=` +
|
`&query=` +
|
||||||
`&clickAnalytics=true` +
|
`&clickAnalytics=true` +
|
||||||
`&maxValuesPerFacet=100` +
|
`&maxValuesPerFacet=100` +
|
||||||
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
|
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
|
||||||
|
|
||||||
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
|
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
|
||||||
|
|
||||||
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "*/*",
|
Accept: "*/*",
|
||||||
"content-type": "application/x-www-form-urlencoded",
|
"content-type": "application/x-www-form-urlencoded",
|
||||||
Origin: "https://www.bswliquor.com",
|
Origin: "https://www.bswliquor.com",
|
||||||
Referer: "https://www.bswliquor.com/",
|
Referer: "https://www.bswliquor.com/",
|
||||||
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
|
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
|
||||||
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
|
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(bodyObj),
|
body: JSON.stringify(bodyObj),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswNormalizeAbsUrl(raw) {
|
function bswNormalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, "https://www.bswliquor.com/").toString();
|
return new URL(s, "https://www.bswliquor.com/").toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswNormalizeImg(v) {
|
function bswNormalizeImg(v) {
|
||||||
if (!v) return "";
|
if (!v) return "";
|
||||||
if (typeof v === "string") return bswNormalizeAbsUrl(v);
|
if (typeof v === "string") return bswNormalizeAbsUrl(v);
|
||||||
if (typeof v === "object") {
|
if (typeof v === "object") {
|
||||||
const cands = [
|
const cands = [
|
||||||
v.src,
|
v.src,
|
||||||
v.url,
|
v.url,
|
||||||
v.originalSrc,
|
v.originalSrc,
|
||||||
v.original_src,
|
v.original_src,
|
||||||
v.original,
|
v.original,
|
||||||
v.secure_url,
|
v.secure_url,
|
||||||
v.large,
|
v.large,
|
||||||
v.medium,
|
v.medium,
|
||||||
v.small,
|
v.small,
|
||||||
];
|
];
|
||||||
for (const c of cands) {
|
for (const c of cands) {
|
||||||
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
|
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function bswPickImage(hit) {
|
function bswPickImage(hit) {
|
||||||
const cands = [
|
const cands = [
|
||||||
hit?.image,
|
hit?.image,
|
||||||
hit?.image_url,
|
hit?.image_url,
|
||||||
hit?.imageUrl,
|
hit?.imageUrl,
|
||||||
hit?.imageURL,
|
hit?.imageURL,
|
||||||
hit?.featured_image,
|
hit?.featured_image,
|
||||||
hit?.featured_image_url,
|
hit?.featured_image_url,
|
||||||
hit?.featuredImage,
|
hit?.featuredImage,
|
||||||
hit?.featuredImageUrl,
|
hit?.featuredImageUrl,
|
||||||
hit?.product_image,
|
hit?.product_image,
|
||||||
hit?.product_image_url,
|
hit?.product_image_url,
|
||||||
hit?.productImage,
|
hit?.productImage,
|
||||||
hit?.productImageUrl,
|
hit?.productImageUrl,
|
||||||
hit?.thumbnail,
|
hit?.thumbnail,
|
||||||
hit?.thumbnail_url,
|
hit?.thumbnail_url,
|
||||||
hit?.thumbnailUrl,
|
hit?.thumbnailUrl,
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const c of cands) {
|
for (const c of cands) {
|
||||||
const s = bswNormalizeImg(c);
|
const s = bswNormalizeImg(c);
|
||||||
if (s) return s;
|
if (s) return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Array.isArray(hit?.images)) {
|
if (Array.isArray(hit?.images)) {
|
||||||
for (const im of hit.images) {
|
for (const im of hit.images) {
|
||||||
const s = bswNormalizeImg(im);
|
const s = bswNormalizeImg(im);
|
||||||
if (s) return s;
|
if (s) return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Array.isArray(hit?.media)) {
|
if (Array.isArray(hit?.media)) {
|
||||||
for (const im of hit.media) {
|
for (const im of hit.media) {
|
||||||
const s = bswNormalizeImg(im);
|
const s = bswNormalizeImg(im);
|
||||||
if (s) return s;
|
if (s) return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
|
async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
|
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
|
||||||
if (!collectionId) {
|
if (!collectionId) {
|
||||||
try {
|
try {
|
||||||
const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua);
|
const { text: html } = await ctx.http.fetchTextWithRetry(
|
||||||
collectionId = bswExtractCollectionIdFromHtml(html);
|
ctx.cat.startUrl,
|
||||||
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
|
`bsw:html:${ctx.cat.key}`,
|
||||||
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
|
ctx.store.ua,
|
||||||
} catch (e) {
|
);
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`);
|
collectionId = bswExtractCollectionIdFromHtml(html);
|
||||||
}
|
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
|
||||||
}
|
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
|
||||||
|
} catch (e) {
|
||||||
|
ctx.logger.warn(
|
||||||
|
`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!collectionId) {
|
if (!collectionId) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
prevDb,
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
discovered,
|
||||||
|
{ storeLabel: ctx.store.name },
|
||||||
|
);
|
||||||
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: 1,
|
scannedPages: 1,
|
||||||
discoveredUnique: 0,
|
discoveredUnique: 0,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
return;
|
report,
|
||||||
}
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const ruleContext = ctx.cat.bswRuleContext || "";
|
const ruleContext = ctx.cat.bswRuleContext || "";
|
||||||
const hitsPerPage = 50;
|
const hitsPerPage = 50;
|
||||||
|
|
||||||
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
|
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
|
||||||
const result0 = first?.json?.results?.[0] || null;
|
const result0 = first?.json?.results?.[0] || null;
|
||||||
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
|
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
|
||||||
|
|
||||||
const totalPages = Math.max(1, nbPages);
|
const totalPages = Math.max(1, nbPages);
|
||||||
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const pageIdxs = [];
|
const pageIdxs = [];
|
||||||
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
|
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
|
|
||||||
const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => {
|
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
||||||
const pnum = idx + 1;
|
pageIdxs,
|
||||||
const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
|
ctx.config.concurrency,
|
||||||
|
ctx.config.staggerMs,
|
||||||
|
async (page0, idx) => {
|
||||||
|
const pnum = idx + 1;
|
||||||
|
const r =
|
||||||
|
page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
|
||||||
|
|
||||||
const res0 = r?.json?.results?.[0] || null;
|
const res0 = r?.json?.results?.[0] || null;
|
||||||
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
|
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (const h of hits) {
|
for (const h of hits) {
|
||||||
const it = bswHitToItem(h);
|
const it = bswHitToItem(h);
|
||||||
if (it) items.push(it);
|
if (it) items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
donePages++;
|
donePages++;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
|
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
|
||||||
items.length,
|
items.length,
|
||||||
3
|
3,
|
||||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
let dups = 0;
|
let dups = 0;
|
||||||
for (const arr of perPageItems) {
|
for (const arr of perPageItems) {
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (discovered.has(it.url)) dups++;
|
if (discovered.has(it.url)) dups++;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
|
storeLabel: ctx.store.name,
|
||||||
|
});
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: scanPages,
|
scannedPages: scanPages,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "bsw",
|
key: "bsw",
|
||||||
name: "BSW",
|
name: "BSW",
|
||||||
host: "www.bswliquor.com",
|
host: "www.bswliquor.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryBSWAlgolia,
|
scanCategory: scanCategoryBSWAlgolia,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "scotch-whisky",
|
key: "scotch-whisky",
|
||||||
label: "Scotch Whisky",
|
label: "Scotch Whisky",
|
||||||
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
|
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
|
||||||
bswRuleContext: "scotch-whisky",
|
bswRuleContext: "scotch-whisky",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
|
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
|
||||||
bswRuleContext: "rum",
|
bswRuleContext: "rum",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://www.bswliquor.com/collections/whisky?page=1",
|
startUrl: "https://www.bswliquor.com/collections/whisky?page=1",
|
||||||
bswRuleContext: "whisky",
|
bswRuleContext: "whisky",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -11,20 +11,20 @@ const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
/* ---------------- formatting ---------------- */
|
/* ---------------- formatting ---------------- */
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const t = Math.round(s * 10) / 10;
|
const t = Math.round(s * 10) / 10;
|
||||||
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
||||||
}
|
}
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const w = String(total).length;
|
const w = String(total).length;
|
||||||
return `${padLeft(i, w)}/${total}`;
|
return `${padLeft(i, w)}/${total}`;
|
||||||
}
|
}
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- co-op specifics ---------------- */
|
/* ---------------- co-op specifics ---------------- */
|
||||||
|
|
@ -33,327 +33,352 @@ const BASE = "https://shoponlinewhisky-wine.coopwinespiritsbeer.com";
|
||||||
const REFERER = `${BASE}/worldofwhisky`;
|
const REFERER = `${BASE}/worldofwhisky`;
|
||||||
|
|
||||||
function coopHeaders(ctx, sourcepage) {
|
function coopHeaders(ctx, sourcepage) {
|
||||||
const coop = ctx.store.coop;
|
const coop = ctx.store.coop;
|
||||||
return {
|
return {
|
||||||
Accept: "application/json, text/javascript, */*; q=0.01",
|
Accept: "application/json, text/javascript, */*; q=0.01",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Origin: BASE,
|
Origin: BASE,
|
||||||
Referer: REFERER,
|
Referer: REFERER,
|
||||||
|
|
||||||
// these 4 are required on their API calls (matches browser)
|
// these 4 are required on their API calls (matches browser)
|
||||||
SessionKey: coop.sessionKey,
|
SessionKey: coop.sessionKey,
|
||||||
chainID: coop.chainId,
|
chainID: coop.chainId,
|
||||||
storeID: coop.storeId,
|
storeID: coop.storeId,
|
||||||
appVersion: coop.appVersion,
|
appVersion: coop.appVersion,
|
||||||
|
|
||||||
AUTH_TOKEN: "null",
|
AUTH_TOKEN: "null",
|
||||||
CONNECTION_ID: "null",
|
CONNECTION_ID: "null",
|
||||||
SESSION_ID: coop.sessionId || "null",
|
SESSION_ID: coop.sessionId || "null",
|
||||||
TIMESTAMP: String(Date.now()),
|
TIMESTAMP: String(Date.now()),
|
||||||
sourcepage,
|
sourcepage,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function coopFetchText(ctx, url, label, { headers } = {}) {
|
async function coopFetchText(ctx, url, label, { headers } = {}) {
|
||||||
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
|
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: headers || {},
|
headers: headers || {},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractVar(html, re) {
|
function extractVar(html, re) {
|
||||||
const m = String(html || "").match(re);
|
const m = String(html || "").match(re);
|
||||||
return m ? String(m[1] || "").trim() : "";
|
return m ? String(m[1] || "").trim() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
async function ensureCoopBootstrap(ctx) {
|
async function ensureCoopBootstrap(ctx) {
|
||||||
const coop = ctx.store.coop;
|
const coop = ctx.store.coop;
|
||||||
if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return;
|
if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return;
|
||||||
|
|
||||||
const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", {
|
const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", {
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
Referer: REFERER,
|
Referer: REFERER,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const html = r?.text || "";
|
const html = r?.text || "";
|
||||||
if (r?.status !== 200 || !html) {
|
if (r?.status !== 200 || !html) {
|
||||||
throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`);
|
throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Values are in <script> var SESSIONKEY = "..."; etc.
|
// Values are in <script> var SESSIONKEY = "..."; etc.
|
||||||
coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i);
|
coop.sessionKey = extractVar(html, /var\s+SESSIONKEY\s*=\s*"([^"]+)"/i);
|
||||||
coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i);
|
coop.chainId = extractVar(html, /var\s+chainID\s*=\s*"([^"]+)"/i);
|
||||||
coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i);
|
coop.storeId = extractVar(html, /var\s+store_unique_id\s*=\s*"([^"]+)"/i);
|
||||||
coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i);
|
coop.appVersion = extractVar(html, /var\s+CLIENTVERSION\s*=\s*"([^"]+)"/i);
|
||||||
|
|
||||||
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
|
|
||||||
throw new Error(
|
|
||||||
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (!coop.sessionKey || !coop.chainId || !coop.storeId || !coop.appVersion) {
|
||||||
|
throw new Error(
|
||||||
|
`coop bootstrap missing values: sessionKey=${!!coop.sessionKey} chainId=${!!coop.chainId} storeId=${!!coop.storeId} appVersion=${!!coop.appVersion}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function ensureCoopSession(ctx) {
|
async function ensureCoopSession(ctx) {
|
||||||
const coop = ctx.store.coop;
|
const coop = ctx.store.coop;
|
||||||
if (coop.sessionId) return;
|
if (coop.sessionId) return;
|
||||||
await ensureCoopBootstrap(ctx);
|
await ensureCoopBootstrap(ctx);
|
||||||
|
|
||||||
const r = await ctx.http.fetchJsonWithRetry(
|
const r = await ctx.http.fetchJsonWithRetry(
|
||||||
`${BASE}/api/account/createsession`,
|
`${BASE}/api/account/createsession`,
|
||||||
`coop:createsession`,
|
`coop:createsession`,
|
||||||
ctx.store.ua,
|
ctx.store.ua,
|
||||||
{
|
{
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: coopHeaders(ctx, "/worldofwhisky"),
|
headers: coopHeaders(ctx, "/worldofwhisky"),
|
||||||
// browser sends Content-Length: 0; easiest equivalent:
|
// browser sends Content-Length: 0; easiest equivalent:
|
||||||
body: "",
|
body: "",
|
||||||
}
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
const sid =
|
const sid = r?.json?.SessionID || r?.json?.sessionID || r?.json?.sessionId || r?.json?.SessionId || "";
|
||||||
r?.json?.SessionID ||
|
|
||||||
r?.json?.sessionID ||
|
|
||||||
r?.json?.sessionId ||
|
|
||||||
r?.json?.SessionId ||
|
|
||||||
"";
|
|
||||||
|
|
||||||
if (!sid) {
|
if (!sid) {
|
||||||
throw new Error(
|
throw new Error(`createSession: missing SessionID (status=${r?.status})`);
|
||||||
`createSession: missing SessionID (status=${r?.status})`
|
}
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
coop.sessionId = sid;
|
coop.sessionId = sid;
|
||||||
coop.anonymousUserId = r?.json?.AnonymousUserID ?? null;
|
coop.anonymousUserId = r?.json?.AnonymousUserID ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeAbsUrl(raw) {
|
function normalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, `${BASE}/`).toString();
|
return new URL(s, `${BASE}/`).toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function productUrlFromId(productId) {
|
function productUrlFromId(productId) {
|
||||||
return `${REFERER}#/product/${encodeURIComponent(String(productId))}`;
|
return `${REFERER}#/product/${encodeURIComponent(String(productId))}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function productFromApi(p) {
|
function productFromApi(p) {
|
||||||
if (!p || p.IsActive === false) return null;
|
if (!p || p.IsActive === false) return null;
|
||||||
|
|
||||||
const name = String(p.Name || "").trim();
|
const name = String(p.Name || "").trim();
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
const productId = p.ProductID;
|
const productId = p.ProductID;
|
||||||
if (!productId) return null;
|
if (!productId) return null;
|
||||||
|
|
||||||
const url = productUrlFromId(productId);
|
const url = productUrlFromId(productId);
|
||||||
|
|
||||||
const price =
|
const price = p?.CountDetails?.PriceText || (Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
|
||||||
p?.CountDetails?.PriceText ||
|
|
||||||
(Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
|
|
||||||
|
|
||||||
|
const upc = String(p.UPC || "").trim();
|
||||||
|
|
||||||
const upc = String(p.UPC || "").trim();
|
let rawKey = "";
|
||||||
|
if (upc) rawKey = `upc:${upc}`;
|
||||||
|
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
|
||||||
|
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
|
||||||
|
|
||||||
let rawKey = "";
|
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
|
||||||
if (upc) rawKey = `upc:${upc}`;
|
|
||||||
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
|
|
||||||
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
|
|
||||||
|
|
||||||
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
|
const img = normalizeAbsUrl(p.ImageURL);
|
||||||
|
|
||||||
const img = normalizeAbsUrl(p.ImageURL);
|
return {
|
||||||
|
name,
|
||||||
return {
|
price,
|
||||||
name,
|
url,
|
||||||
price,
|
sku,
|
||||||
url,
|
upc,
|
||||||
sku,
|
productId,
|
||||||
upc,
|
productStoreId: p.ProductStoreID || null,
|
||||||
productId,
|
img,
|
||||||
productStoreId: p.ProductStoreID || null,
|
};
|
||||||
img,
|
}
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------- scanner ---------------- */
|
/* ---------------- scanner ---------------- */
|
||||||
|
|
||||||
async function fetchCategoryPage(ctx, categoryId, page) {
|
async function fetchCategoryPage(ctx, categoryId, page) {
|
||||||
await ensureCoopSession(ctx);
|
await ensureCoopSession(ctx);
|
||||||
|
|
||||||
const doReq = () =>
|
const doReq = () =>
|
||||||
ctx.http.fetchJsonWithRetry(
|
ctx.http.fetchJsonWithRetry(
|
||||||
`${BASE}/api/v2/products/category/${categoryId}`,
|
`${BASE}/api/v2/products/category/${categoryId}`,
|
||||||
`coop:${ctx.cat.key}:p${page}`,
|
`coop:${ctx.cat.key}:p${page}`,
|
||||||
ctx.store.ua,
|
ctx.store.ua,
|
||||||
{
|
{
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`),
|
headers: coopHeaders(ctx, `/category/${ctx.cat.coopSlug}`),
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
page,
|
page,
|
||||||
Filters: {
|
Filters: {
|
||||||
Filters: [],
|
Filters: [],
|
||||||
LastSelectedFilter: null,
|
LastSelectedFilter: null,
|
||||||
SearchWithinTerm: null,
|
SearchWithinTerm: null,
|
||||||
},
|
},
|
||||||
orderby: null,
|
orderby: null,
|
||||||
}),
|
}),
|
||||||
}
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
let r = await doReq();
|
let r = await doReq();
|
||||||
|
|
||||||
// one fast retry on invalid_session: refresh SessionID and repeat
|
// one fast retry on invalid_session: refresh SessionID and repeat
|
||||||
if (r?.json?.type === "invalid_session") {
|
if (r?.json?.type === "invalid_session") {
|
||||||
ctx.store.coop.sessionId = "";
|
ctx.store.coop.sessionId = "";
|
||||||
await ensureCoopSession(ctx);
|
await ensureCoopSession(ctx);
|
||||||
r = await doReq();
|
r = await doReq();
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
function avoidMassRemoval(prevDb, discovered, ctx) {
|
function avoidMassRemoval(prevDb, discovered, ctx) {
|
||||||
const prev = prevDb?.size || 0;
|
const prev = prevDb?.size || 0;
|
||||||
const curr = discovered.size;
|
const curr = discovered.size;
|
||||||
if (!prev || !curr) return;
|
if (!prev || !curr) return;
|
||||||
if (curr / prev >= 0.6) return;
|
if (curr / prev >= 0.6) return;
|
||||||
|
|
||||||
ctx.logger.warn(
|
ctx.logger.warn(`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`);
|
||||||
`${ctx.catPrefixOut} | Partial scan (${curr}/${prev}); preserving DB`
|
|
||||||
);
|
|
||||||
|
|
||||||
for (const [k, v] of prevDb.entries()) {
|
for (const [k, v] of prevDb.entries()) {
|
||||||
if (!discovered.has(k)) discovered.set(k, v);
|
if (!discovered.has(k)) discovered.set(k, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryCoop(ctx, prevDb, report) {
|
async function scanCategoryCoop(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
const maxPages =
|
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
|
||||||
ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
|
|
||||||
|
|
||||||
let done = 0;
|
let done = 0;
|
||||||
|
|
||||||
for (let page = 1; page <= maxPages; page++) {
|
for (let page = 1; page <= maxPages; page++) {
|
||||||
let r;
|
let r;
|
||||||
try {
|
try {
|
||||||
r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page);
|
r = await fetchCategoryPage(ctx, ctx.cat.coopCategoryId, page);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(
|
ctx.logger.warn(`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}`);
|
||||||
`${ctx.catPrefixOut} | page ${page} failed: ${e?.message || e}`
|
break;
|
||||||
);
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const arr = Array.isArray(r?.json?.Products?.Result)
|
const arr = Array.isArray(r?.json?.Products?.Result) ? r.json.Products.Result : [];
|
||||||
? r.json.Products.Result
|
|
||||||
: [];
|
|
||||||
|
|
||||||
done++;
|
done++;
|
||||||
|
|
||||||
let kept = 0;
|
let kept = 0;
|
||||||
for (const p of arr) {
|
for (const p of arr) {
|
||||||
const it = productFromApi(p);
|
const it = productFromApi(p);
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
kept++;
|
kept++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String(
|
`${ctx.catPrefixOut} | Page ${padLeft(page, 3)} | ${String(r.status || "").padEnd(
|
||||||
r.status || ""
|
3,
|
||||||
).padEnd(3)} | items=${padLeft(kept, 3)} | bytes=${kbStr(
|
)} | items=${padLeft(kept, 3)} | bytes=${kbStr(
|
||||||
r.bytes
|
r.bytes,
|
||||||
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!arr.length) break;
|
if (!arr.length) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prevDb) avoidMassRemoval(prevDb, discovered, ctx);
|
if (prevDb) avoidMassRemoval(prevDb, discovered, ctx);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products: ${discovered.size}`);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } =
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
storeLabel: ctx.store.name,
|
||||||
|
});
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: done,
|
scannedPages: done,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
|
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(
|
addCategoryResultToReport(
|
||||||
report,
|
report,
|
||||||
ctx.store.name,
|
ctx.store.name,
|
||||||
ctx.cat.label,
|
ctx.cat.label,
|
||||||
newItems,
|
newItems,
|
||||||
updatedItems,
|
updatedItems,
|
||||||
removedItems,
|
removedItems,
|
||||||
restoredItems
|
restoredItems,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- store ---------------- */
|
/* ---------------- store ---------------- */
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "coop",
|
key: "coop",
|
||||||
name: "Co-op World of Whisky",
|
name: "Co-op World of Whisky",
|
||||||
host: "shoponlinewhisky-wine.coopwinespiritsbeer.com",
|
host: "shoponlinewhisky-wine.coopwinespiritsbeer.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryCoop,
|
scanCategory: scanCategoryCoop,
|
||||||
|
|
||||||
// put your captured values here (or pull from env)
|
// put your captured values here (or pull from env)
|
||||||
coop: {
|
coop: {
|
||||||
sessionKey: "",
|
sessionKey: "",
|
||||||
chainId: "",
|
chainId: "",
|
||||||
storeId: "",
|
storeId: "",
|
||||||
appVersion: "",
|
appVersion: "",
|
||||||
sessionId: "", // set by ensureCoopSession()
|
sessionId: "", // set by ensureCoopSession()
|
||||||
anonymousUserId: null,
|
anonymousUserId: null,
|
||||||
},
|
},
|
||||||
|
|
||||||
categories: [
|
categories: [
|
||||||
{ key: "canadian-whisky", label: "Canadian Whisky", coopSlug: "canadian_whisky", coopCategoryId: 4, startUrl: `${REFERER}#/category/canadian_whisky` },
|
{
|
||||||
{ key: "bourbon-whiskey", label: "Bourbon Whiskey", coopSlug: "bourbon_whiskey", coopCategoryId: 9, startUrl: `${REFERER}#/category/bourbon_whiskey` },
|
key: "canadian-whisky",
|
||||||
{ key: "scottish-single-malts", label: "Scottish Single Malts", coopSlug: "scottish_single_malts", coopCategoryId: 6, startUrl: `${REFERER}#/category/scottish_single_malts` },
|
label: "Canadian Whisky",
|
||||||
{ key: "scottish-blends", label: "Scottish Whisky Blends", coopSlug: "scottish_whisky_blends", coopCategoryId: 5, startUrl: `${REFERER}#/category/scottish_whisky_blends` },
|
coopSlug: "canadian_whisky",
|
||||||
{ key: "american-whiskey", label: "American Whiskey", coopSlug: "american_whiskey", coopCategoryId: 8, startUrl: `${REFERER}#/category/american_whiskey` },
|
coopCategoryId: 4,
|
||||||
{ key: "world-whisky", label: "World Whisky", coopSlug: "world_international", coopCategoryId: 10, startUrl: `${REFERER}#/category/world_international` },
|
startUrl: `${REFERER}#/category/canadian_whisky`,
|
||||||
{ key: "rum", label: "Rum", coopSlug: "spirits_rum", coopCategoryId: 24, startUrl: `${REFERER}#/category/spirits_rum` },
|
},
|
||||||
],
|
{
|
||||||
};
|
key: "bourbon-whiskey",
|
||||||
|
label: "Bourbon Whiskey",
|
||||||
|
coopSlug: "bourbon_whiskey",
|
||||||
|
coopCategoryId: 9,
|
||||||
|
startUrl: `${REFERER}#/category/bourbon_whiskey`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "scottish-single-malts",
|
||||||
|
label: "Scottish Single Malts",
|
||||||
|
coopSlug: "scottish_single_malts",
|
||||||
|
coopCategoryId: 6,
|
||||||
|
startUrl: `${REFERER}#/category/scottish_single_malts`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "scottish-blends",
|
||||||
|
label: "Scottish Whisky Blends",
|
||||||
|
coopSlug: "scottish_whisky_blends",
|
||||||
|
coopCategoryId: 5,
|
||||||
|
startUrl: `${REFERER}#/category/scottish_whisky_blends`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "american-whiskey",
|
||||||
|
label: "American Whiskey",
|
||||||
|
coopSlug: "american_whiskey",
|
||||||
|
coopCategoryId: 8,
|
||||||
|
startUrl: `${REFERER}#/category/american_whiskey`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "world-whisky",
|
||||||
|
label: "World Whisky",
|
||||||
|
coopSlug: "world_international",
|
||||||
|
coopCategoryId: 10,
|
||||||
|
startUrl: `${REFERER}#/category/world_international`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "rum",
|
||||||
|
label: "Rum",
|
||||||
|
coopSlug: "spirits_rum",
|
||||||
|
coopCategoryId: 24,
|
||||||
|
startUrl: `${REFERER}#/category/spirits_rum`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -13,142 +13,123 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function craftCellarsIsEmptyListingPage(html) {
|
function craftCellarsIsEmptyListingPage(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
if (/collection--empty\b/i.test(s)) return true;
|
if (/collection--empty\b/i.test(s)) return true;
|
||||||
if (/No products found/i.test(s)) return true;
|
if (/No products found/i.test(s)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalizeCraftProductUrl(raw) {
|
function canonicalizeCraftProductUrl(raw) {
|
||||||
try {
|
try {
|
||||||
const u = new URL(String(raw));
|
const u = new URL(String(raw));
|
||||||
u.search = "";
|
u.search = "";
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
return u.toString();
|
return u.toString();
|
||||||
} catch {
|
} catch {
|
||||||
return String(raw || "");
|
return String(raw || "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractShopifyCardPrice(block) {
|
function extractShopifyCardPrice(block) {
|
||||||
const b = String(block || "");
|
const b = String(block || "");
|
||||||
const dollars = (txt) =>
|
const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
|
||||||
[...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) =>
|
|
||||||
m[0].replace(/\s+/g, "")
|
|
||||||
);
|
|
||||||
|
|
||||||
const saleRegion = b.split(/sale price/i)[1] || "";
|
const saleRegion = b.split(/sale price/i)[1] || "";
|
||||||
const saleD = dollars(saleRegion);
|
const saleD = dollars(saleRegion);
|
||||||
if (saleD.length) return saleD[0];
|
if (saleD.length) return saleD[0];
|
||||||
|
|
||||||
const regRegion = b.split(/regular price/i)[1] || "";
|
const regRegion = b.split(/regular price/i)[1] || "";
|
||||||
const regD = dollars(regRegion);
|
const regD = dollars(regRegion);
|
||||||
if (regD.length) return regD[0];
|
if (regD.length) return regD[0];
|
||||||
|
|
||||||
const any = dollars(b);
|
const any = dollars(b);
|
||||||
return any[0] || "";
|
return any[0] || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsCraftCellars(html, ctx) {
|
function parseProductsCraftCellars(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
const g1 =
|
const g1 = s.match(/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
|
||||||
s.match(
|
const g2 = s.match(/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
|
||||||
/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i
|
|
||||||
)?.[0] || "";
|
|
||||||
const g2 =
|
|
||||||
s.match(
|
|
||||||
/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i
|
|
||||||
)?.[0] || "";
|
|
||||||
|
|
||||||
const gridCandidate = g1.length > g2.length ? g1 : g2;
|
const gridCandidate = g1.length > g2.length ? g1 : g2;
|
||||||
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
|
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
|
||||||
|
|
||||||
return parseProductsCraftCellarsInner(grid, ctx);
|
return parseProductsCraftCellarsInner(grid, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsCraftCellarsInner(html, ctx) {
|
function parseProductsCraftCellarsInner(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map(
|
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]);
|
||||||
(m) => m[0]
|
if (blocks.length < 5) {
|
||||||
);
|
blocks = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map(
|
||||||
if (blocks.length < 5) {
|
(m) => m[0],
|
||||||
blocks = [
|
);
|
||||||
...s.matchAll(
|
}
|
||||||
/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi
|
|
||||||
),
|
|
||||||
].map((m) => m[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
|
||||||
|
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
const href =
|
const href =
|
||||||
block.match(
|
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] ||
|
||||||
/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i
|
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
||||||
)?.[1] ||
|
if (!href) continue;
|
||||||
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
|
||||||
if (!href) continue;
|
|
||||||
|
|
||||||
let url = "";
|
let url = "";
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(href), base).toString();
|
url = new URL(decodeHtml(href), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
url = canonicalizeCraftProductUrl(url);
|
url = canonicalizeCraftProductUrl(url);
|
||||||
|
|
||||||
const nameHtml =
|
const nameHtml =
|
||||||
block.match(
|
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i)?.[1] ||
|
||||||
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i
|
block.match(
|
||||||
)?.[1] ||
|
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i,
|
||||||
block.match(
|
)?.[1] ||
|
||||||
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i
|
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
|
||||||
)?.[1] ||
|
|
||||||
block.match(
|
|
||||||
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i
|
|
||||||
)?.[1];
|
|
||||||
|
|
||||||
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
|
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractShopifyCardPrice(block);
|
const price = extractShopifyCardPrice(block);
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
items.push({ name, price, url, img });
|
items.push({ name, price, url, img });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function usdFromShopifyPriceStr(s) {
|
function usdFromShopifyPriceStr(s) {
|
||||||
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
|
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
|
||||||
if (!Number.isFinite(n)) return "";
|
if (!Number.isFinite(n)) return "";
|
||||||
return `$${n.toLocaleString("en-US", {
|
return `$${n.toLocaleString("en-US", {
|
||||||
minimumFractionDigits: 2,
|
minimumFractionDigits: 2,
|
||||||
maximumFractionDigits: 2,
|
maximumFractionDigits: 2,
|
||||||
})}`;
|
})}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cfgNum(v, fallback) {
|
function cfgNum(v, fallback) {
|
||||||
return Number.isFinite(v) ? v : fallback;
|
return Number.isFinite(v) ? v : fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------- NEW: product page SKU extractor ---------- */
|
/* ---------- NEW: product page SKU extractor ---------- */
|
||||||
function extractCraftSkuFromProductPageHtml(html) {
|
function extractCraftSkuFromProductPageHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
// allow any content between </strong> and <span> (including newlines, other tags)
|
// allow any content between </strong> and <span> (including newlines, other tags)
|
||||||
const m =
|
const m =
|
||||||
s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) ||
|
s.match(/<strong>\s*SKU:\s*<\/strong>[\s\S]{0,200}?<span>\s*([^<]{1,80}?)\s*<\/span>/i) ||
|
||||||
s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i);
|
s.match(/\bSKU:\s*([A-Za-z0-9][A-Za-z0-9\-_/ ]{0,40})/i);
|
||||||
|
|
||||||
const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : "";
|
const raw = m && m[1] ? stripTags(decodeHtml(m[1])) : "";
|
||||||
return normalizeCspc(raw);
|
return normalizeCspc(raw);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -158,283 +139,233 @@ function extractCraftSkuFromProductPageHtml(html) {
|
||||||
* - product page HTML is final SKU fallback
|
* - product page HTML is final SKU fallback
|
||||||
*/
|
*/
|
||||||
async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
const perPageDelayMs =
|
const perPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))) || 0;
|
||||||
Math.max(
|
|
||||||
0,
|
|
||||||
cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0))
|
|
||||||
) || 0;
|
|
||||||
|
|
||||||
const perJsonPageDelayMs = Math.max(
|
const perJsonPageDelayMs = Math.max(0, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs));
|
||||||
0,
|
|
||||||
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
|
|
||||||
);
|
|
||||||
|
|
||||||
const htmlMap = new Map();
|
const htmlMap = new Map();
|
||||||
|
|
||||||
const maxPages =
|
const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200);
|
||||||
ctx.config.maxPages === null
|
|
||||||
? 200
|
|
||||||
: Math.min(ctx.config.maxPages, 200);
|
|
||||||
|
|
||||||
let htmlPagesFetched = 0;
|
let htmlPagesFetched = 0;
|
||||||
let emptyStreak = 0;
|
let emptyStreak = 0;
|
||||||
|
|
||||||
for (let p = 1; p <= maxPages; p++) {
|
for (let p = 1; p <= maxPages; p++) {
|
||||||
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
|
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
|
||||||
|
|
||||||
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
|
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
|
||||||
const { text: html } = await ctx.http.fetchTextWithRetry(
|
const { text: html } = await ctx.http.fetchTextWithRetry(
|
||||||
pageUrl,
|
pageUrl,
|
||||||
`craft:html:${ctx.cat.key}:p${p}`,
|
`craft:html:${ctx.cat.key}:p${p}`,
|
||||||
ctx.store.ua
|
ctx.store.ua,
|
||||||
);
|
);
|
||||||
htmlPagesFetched++;
|
htmlPagesFetched++;
|
||||||
|
|
||||||
if (craftCellarsIsEmptyListingPage(html)) break;
|
if (craftCellarsIsEmptyListingPage(html)) break;
|
||||||
|
|
||||||
const items = parseProductsCraftCellars(html, ctx);
|
const items = parseProductsCraftCellars(html, ctx);
|
||||||
if (!items.length) {
|
if (!items.length) {
|
||||||
emptyStreak++;
|
emptyStreak++;
|
||||||
if (emptyStreak >= 2) break;
|
if (emptyStreak >= 2) break;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
emptyStreak = 0;
|
emptyStreak = 0;
|
||||||
|
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
const url = canonicalizeCraftProductUrl(it.url);
|
const url = canonicalizeCraftProductUrl(it.url);
|
||||||
if (!url) continue;
|
if (!url) continue;
|
||||||
htmlMap.set(url, {
|
htmlMap.set(url, {
|
||||||
name: it.name || "",
|
name: it.name || "",
|
||||||
price: it.price || "",
|
price: it.price || "",
|
||||||
url,
|
url,
|
||||||
img: it.img || "",
|
img: it.img || "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!htmlMap.size) {
|
if (!htmlMap.size) {
|
||||||
ctx.logger.warn(
|
ctx.logger.warn(`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery`);
|
||||||
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing JSON-only discovery`
|
}
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const jsonMap = new Map();
|
const jsonMap = new Map();
|
||||||
|
|
||||||
if (htmlMap.size) {
|
if (htmlMap.size) {
|
||||||
const start = new URL(ctx.cat.startUrl);
|
const start = new URL(ctx.cat.startUrl);
|
||||||
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
|
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
|
||||||
if (!m)
|
if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`);
|
||||||
throw new Error(
|
const collectionHandle = m[1];
|
||||||
`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`
|
|
||||||
);
|
|
||||||
const collectionHandle = m[1];
|
|
||||||
|
|
||||||
const limit = 250;
|
const limit = 250;
|
||||||
let jsonPage = 1;
|
let jsonPage = 1;
|
||||||
let jsonPagesFetched = 0;
|
let jsonPagesFetched = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (jsonPage > 1 && perJsonPageDelayMs > 0)
|
if (jsonPage > 1 && perJsonPageDelayMs > 0) await sleep(perJsonPageDelayMs);
|
||||||
await sleep(perJsonPageDelayMs);
|
|
||||||
|
|
||||||
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
|
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
|
||||||
const r = await ctx.http.fetchJsonWithRetry(
|
const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
|
||||||
url,
|
|
||||||
`craft:coljson:${ctx.cat.key}:p${jsonPage}`,
|
|
||||||
ctx.store.ua
|
|
||||||
);
|
|
||||||
|
|
||||||
const products = Array.isArray(r?.json?.products)
|
const products = Array.isArray(r?.json?.products) ? r.json.products : [];
|
||||||
? r.json.products
|
jsonPagesFetched++;
|
||||||
: [];
|
|
||||||
jsonPagesFetched++;
|
|
||||||
|
|
||||||
if (!products.length) break;
|
if (!products.length) break;
|
||||||
|
|
||||||
for (const p of products) {
|
for (const p of products) {
|
||||||
const handle = String(p?.handle || "");
|
const handle = String(p?.handle || "");
|
||||||
if (!handle) continue;
|
if (!handle) continue;
|
||||||
|
|
||||||
const prodUrl = canonicalizeCraftProductUrl(
|
const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`);
|
||||||
`https://${ctx.store.host}/products/${handle}`
|
if (!htmlMap.has(prodUrl)) continue;
|
||||||
);
|
|
||||||
if (!htmlMap.has(prodUrl)) continue;
|
|
||||||
|
|
||||||
const variants = Array.isArray(p?.variants) ? p.variants : [];
|
const variants = Array.isArray(p?.variants) ? p.variants : [];
|
||||||
const v =
|
const v = variants.find((x) => x && x.available === true) || variants[0] || null;
|
||||||
variants.find((x) => x && x.available === true) ||
|
|
||||||
variants[0] ||
|
|
||||||
null;
|
|
||||||
|
|
||||||
const sku = normalizeCspc(v?.sku || "");
|
const sku = normalizeCspc(v?.sku || "");
|
||||||
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
|
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
|
||||||
|
|
||||||
let img = "";
|
let img = "";
|
||||||
const images = Array.isArray(p?.images) ? p.images : [];
|
const images = Array.isArray(p?.images) ? p.images : [];
|
||||||
if (images[0]) {
|
if (images[0]) {
|
||||||
img =
|
img = typeof images[0] === "string" ? images[0] : String(images[0]?.src || images[0]?.url || "");
|
||||||
typeof images[0] === "string"
|
}
|
||||||
? images[0]
|
if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || "");
|
||||||
: String(images[0]?.src || images[0]?.url || "");
|
img = String(img || "").trim();
|
||||||
}
|
if (img.startsWith("//")) img = `https:${img}`;
|
||||||
if (!img && p?.image)
|
|
||||||
img = String(p.image?.src || p.image?.url || p.image || "");
|
|
||||||
img = String(img || "").trim();
|
|
||||||
if (img.startsWith("//")) img = `https:${img}`;
|
|
||||||
|
|
||||||
jsonMap.set(prodUrl, { sku, price, img });
|
jsonMap.set(prodUrl, { sku, price, img });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (products.length < limit) break;
|
if (products.length < limit) break;
|
||||||
if (++jsonPage > 200) break;
|
if (++jsonPage > 200) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`);
|
||||||
`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`
|
}
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
for (const [url, it] of htmlMap.entries()) {
|
for (const [url, it] of htmlMap.entries()) {
|
||||||
const j = jsonMap.get(url);
|
const j = jsonMap.get(url);
|
||||||
const prev = prevDb?.byUrl?.get(url) || null;
|
const prev = prevDb?.byUrl?.get(url) || null;
|
||||||
|
|
||||||
discovered.set(url, {
|
discovered.set(url, {
|
||||||
name: it.name,
|
name: it.name,
|
||||||
price: j?.price || it.price || "",
|
price: j?.price || it.price || "",
|
||||||
url,
|
url,
|
||||||
// reuse cached SKU unless we found something better this run
|
// reuse cached SKU unless we found something better this run
|
||||||
sku: pickBetterSku(j?.sku || "", prev?.sku || ""),
|
sku: pickBetterSku(j?.sku || "", prev?.sku || ""),
|
||||||
// reuse cached image if we didn't find one
|
// reuse cached image if we didn't find one
|
||||||
img: (j?.img || it.img || prev?.img || ""),
|
img: j?.img || it.img || prev?.img || "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */
|
/* ---------- NEW: product page SKU fallback (cached; only when needed) ---------- */
|
||||||
const perProductSkuDelayMs = Math.max(
|
const perProductSkuDelayMs = Math.max(
|
||||||
0,
|
0,
|
||||||
cfgNum(
|
cfgNum(ctx?.cat?.skuPageDelayMs, cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)),
|
||||||
ctx?.cat?.skuPageDelayMs,
|
);
|
||||||
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
let skuPagesFetched = 0;
|
let skuPagesFetched = 0;
|
||||||
|
|
||||||
for (const it of discovered.values()) {
|
for (const it of discovered.values()) {
|
||||||
// only hit product pages when missing/synthetic
|
// only hit product pages when missing/synthetic
|
||||||
if (!needsSkuDetail(it.sku)) continue;
|
if (!needsSkuDetail(it.sku)) continue;
|
||||||
|
|
||||||
if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs);
|
if (perProductSkuDelayMs > 0) await sleep(perProductSkuDelayMs);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { text } = await ctx.http.fetchTextWithRetry(
|
const { text } = await ctx.http.fetchTextWithRetry(
|
||||||
it.url,
|
it.url,
|
||||||
`craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url)
|
`craft:prodpage:${ctx.cat.key}:${Buffer.from(it.url).toString("base64").slice(0, 24)}`,
|
||||||
.toString("base64")
|
ctx.store.ua,
|
||||||
.slice(0, 24)}`,
|
);
|
||||||
ctx.store.ua
|
skuPagesFetched++;
|
||||||
);
|
|
||||||
skuPagesFetched++;
|
|
||||||
|
|
||||||
const sku = extractCraftSkuFromProductPageHtml(text);
|
const sku = extractCraftSkuFromProductPageHtml(text);
|
||||||
if (sku) it.sku = sku;
|
if (sku) it.sku = sku;
|
||||||
} catch {
|
} catch {
|
||||||
/* best effort */
|
/* best effort */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`);
|
||||||
`${ctx.catPrefixOut} | SKU fallback pages=${skuPagesFetched}`
|
|
||||||
);
|
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||||
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`
|
|
||||||
);
|
|
||||||
|
|
||||||
const {
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
merged,
|
storeLabel: ctx.store.name,
|
||||||
newItems,
|
});
|
||||||
updatedItems,
|
|
||||||
removedItems,
|
|
||||||
restoredItems,
|
|
||||||
} = mergeDiscoveredIntoDb(prevDb, discovered, {
|
|
||||||
storeLabel: ctx.store.name,
|
|
||||||
});
|
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: htmlPagesFetched,
|
scannedPages: htmlPagesFetched,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
|
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(
|
addCategoryResultToReport(
|
||||||
report,
|
report,
|
||||||
ctx.store.name,
|
ctx.store.name,
|
||||||
ctx.cat.label,
|
ctx.cat.label,
|
||||||
newItems,
|
newItems,
|
||||||
updatedItems,
|
updatedItems,
|
||||||
removedItems,
|
removedItems,
|
||||||
restoredItems
|
restoredItems,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "craftcellars",
|
key: "craftcellars",
|
||||||
name: "Craft Cellars",
|
name: "Craft Cellars",
|
||||||
host: "craftcellars.ca",
|
host: "craftcellars.ca",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
|
|
||||||
scanCategory: scanCategoryCraftCellars,
|
scanCategory: scanCategoryCraftCellars,
|
||||||
|
|
||||||
parseProducts: parseProductsCraftCellars,
|
parseProducts: parseProductsCraftCellars,
|
||||||
makePageUrl: makePageUrlShopifyQueryPage,
|
makePageUrl: makePageUrlShopifyQueryPage,
|
||||||
isEmptyListingPage: craftCellarsIsEmptyListingPage,
|
isEmptyListingPage: craftCellarsIsEmptyListingPage,
|
||||||
|
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl:
|
startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
|
||||||
"https://craftcellars.ca/collections/whisky?filter.v.availability=1",
|
pageConcurrency: 1,
|
||||||
pageConcurrency: 1,
|
pageStaggerMs: 10000,
|
||||||
pageStaggerMs: 10000,
|
discoveryDelayMs: 10000,
|
||||||
discoveryDelayMs: 10000,
|
skuPageDelayMs: 12000,
|
||||||
skuPageDelayMs: 12000,
|
},
|
||||||
},
|
{
|
||||||
{
|
key: "rum",
|
||||||
key: "rum",
|
label: "Rum",
|
||||||
label: "Rum",
|
startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
|
||||||
startUrl:
|
pageConcurrency: 1,
|
||||||
"https://craftcellars.ca/collections/rum?filter.v.availability=1",
|
pageStaggerMs: 10000,
|
||||||
pageConcurrency: 1,
|
discoveryDelayMs: 10000,
|
||||||
pageStaggerMs: 10000,
|
skuPageDelayMs: 12000,
|
||||||
discoveryDelayMs: 10000,
|
},
|
||||||
skuPageDelayMs: 12000,
|
],
|
||||||
},
|
};
|
||||||
],
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -6,12 +6,12 @@ const { normalizeCspc, pickBetterSku, needsSkuDetail } = require("../utils/sku")
|
||||||
const { makePageUrl } = require("../utils/url");
|
const { makePageUrl } = require("../utils/url");
|
||||||
|
|
||||||
function looksInStock(block) {
|
function looksInStock(block) {
|
||||||
const s = String(block || "");
|
const s = String(block || "");
|
||||||
if (/\boutofstock\b/i.test(s)) return false;
|
if (/\boutofstock\b/i.test(s)) return false;
|
||||||
if (/\bin-stock\b/i.test(s)) return true;
|
if (/\bin-stock\b/i.test(s)) return true;
|
||||||
if (/\binstock\b/i.test(s)) return true;
|
if (/\binstock\b/i.test(s)) return true;
|
||||||
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
|
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
|
||||||
return /\bin-stock\b/i.test(s);
|
return /\bin-stock\b/i.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gull product tiles commonly contain two amounts:
|
// Gull product tiles commonly contain two amounts:
|
||||||
|
|
@ -19,116 +19,111 @@ function looksInStock(block) {
|
||||||
// - deposit (e.g. 0.10) inside the "price suffix"
|
// - deposit (e.g. 0.10) inside the "price suffix"
|
||||||
// We extract all amounts and pick the last one >= 1.00 (sale price if present).
|
// We extract all amounts and pick the last one >= 1.00 (sale price if present).
|
||||||
function extractGullPriceFromBlock(block) {
|
function extractGullPriceFromBlock(block) {
|
||||||
const s = String(block || "");
|
const s = String(block || "");
|
||||||
const nums = [];
|
const nums = [];
|
||||||
|
|
||||||
// Match WooCommerce "Price amount" blocks, pull out the BDI contents,
|
// Match WooCommerce "Price amount" blocks, pull out the BDI contents,
|
||||||
// then strip tags/entities and parse as float.
|
// then strip tags/entities and parse as float.
|
||||||
const re =
|
const re =
|
||||||
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
|
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
|
||||||
|
|
||||||
for (const m of s.matchAll(re)) {
|
for (const m of s.matchAll(re)) {
|
||||||
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
|
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
|
||||||
const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
|
const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
|
||||||
if (Number.isFinite(n)) nums.push(n);
|
if (Number.isFinite(n)) nums.push(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
|
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
|
||||||
const big = nums.filter((n) => n >= 1.0);
|
const big = nums.filter((n) => n >= 1.0);
|
||||||
|
|
||||||
if (!big.length) return "";
|
if (!big.length) return "";
|
||||||
|
|
||||||
// If sale price exists, Woo often renders old then new; taking the last >=1
|
// If sale price exists, Woo often renders old then new; taking the last >=1
|
||||||
// typically yields the current price.
|
// typically yields the current price.
|
||||||
const chosen = big[big.length - 1];
|
const chosen = big[big.length - 1];
|
||||||
|
|
||||||
// Normalize formatting
|
// Normalize formatting
|
||||||
return `$${chosen.toFixed(2)}`;
|
return `$${chosen.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gull SKUs are often NOT 6 digits (e.g. 67424).
|
// Gull SKUs are often NOT 6 digits (e.g. 67424).
|
||||||
// If it's not 6 digits, represent as id:<digits> to avoid normalizeCspc turning it into u:SHA.
|
// If it's not 6 digits, represent as id:<digits> to avoid normalizeCspc turning it into u:SHA.
|
||||||
function normalizeGullSku(raw) {
|
function normalizeGullSku(raw) {
|
||||||
const s = cleanText(decodeHtml(String(raw || ""))).trim();
|
const s = cleanText(decodeHtml(String(raw || ""))).trim();
|
||||||
|
|
||||||
// already in a stable prefixed form
|
// already in a stable prefixed form
|
||||||
if (/^(id:|u:)/i.test(s)) return s;
|
if (/^(id:|u:)/i.test(s)) return s;
|
||||||
|
|
||||||
// digits-only SKU (from page / tile)
|
// digits-only SKU (from page / tile)
|
||||||
const digits = s.match(/\b(\d{3,10})\b/)?.[1] || "";
|
const digits = s.match(/\b(\d{3,10})\b/)?.[1] || "";
|
||||||
if (digits) {
|
if (digits) {
|
||||||
if (digits.length === 6) return normalizeCspc(digits);
|
if (digits.length === 6) return normalizeCspc(digits);
|
||||||
return `id:${digits}`;
|
return `id:${digits}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// fall back to existing normalizer (may yield u:...)
|
// fall back to existing normalizer (may yield u:...)
|
||||||
return normalizeCspc(s);
|
return normalizeCspc(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// When we fall back to normalizeCspc(url), we may end up with a generated u:XXXXXXXX.
|
// When we fall back to normalizeCspc(url), we may end up with a generated u:XXXXXXXX.
|
||||||
function isGeneratedUrlSku(sku) {
|
function isGeneratedUrlSku(sku) {
|
||||||
const s = String(sku || "");
|
const s = String(sku || "");
|
||||||
// you have u:8hex in the DB, so accept 8+
|
// you have u:8hex in the DB, so accept 8+
|
||||||
return /^u:[0-9a-f]{8,128}$/i.test(s);
|
return /^u:[0-9a-f]{8,128}$/i.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract SKU from Gull product page HTML.
|
// Extract SKU from Gull product page HTML.
|
||||||
function extractGullSkuFromProductPage(html) {
|
function extractGullSkuFromProductPage(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
// Most reliable: <span class="sku">67424</span>
|
// Most reliable: <span class="sku">67424</span>
|
||||||
const m1 = s.match(
|
const m1 = s.match(/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i);
|
||||||
/<span\b[^>]*class=["'][^"']*\bsku\b[^"']*["'][^>]*>\s*([0-9]{3,10})\s*<\/span>/i
|
if (m1?.[1]) return normalizeGullSku(m1[1]);
|
||||||
);
|
|
||||||
if (m1?.[1]) return normalizeGullSku(m1[1]);
|
|
||||||
|
|
||||||
// Fallback: "SKU: 67424" text
|
// Fallback: "SKU: 67424" text
|
||||||
const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i);
|
const m2 = s.match(/\bSKU:\s*([0-9]{3,10})\b/i);
|
||||||
if (m2?.[1]) return normalizeGullSku(m2[1]);
|
if (m2?.[1]) return normalizeGullSku(m2[1]);
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Serial limiter: ensures at least minIntervalMs between request starts.
|
// Serial limiter: ensures at least minIntervalMs between request starts.
|
||||||
function createMinIntervalLimiter(minIntervalMs) {
|
function createMinIntervalLimiter(minIntervalMs) {
|
||||||
let lastStart = 0;
|
let lastStart = 0;
|
||||||
let chain = Promise.resolve();
|
let chain = Promise.resolve();
|
||||||
|
|
||||||
return async function schedule(fn) {
|
return async function schedule(fn) {
|
||||||
chain = chain.then(async () => {
|
chain = chain.then(async () => {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const waitMs = Math.max(0, lastStart + minIntervalMs - now);
|
const waitMs = Math.max(0, lastStart + minIntervalMs - now);
|
||||||
if (waitMs) await new Promise((r) => setTimeout(r, waitMs));
|
if (waitMs) await new Promise((r) => setTimeout(r, waitMs));
|
||||||
lastStart = Date.now();
|
lastStart = Date.now();
|
||||||
return fn();
|
return fn();
|
||||||
});
|
});
|
||||||
return chain;
|
return chain;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
|
async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const res = await fetchFn(url, { headers });
|
const res = await fetchFn(url, { headers });
|
||||||
|
|
||||||
if (res.status !== 429) {
|
if (res.status !== 429) {
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`);
|
||||||
return await res.text();
|
return await res.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`);
|
if (attempt >= maxRetries) throw new Error(`HTTP 429 fetching ${url}`);
|
||||||
|
|
||||||
// Respect Retry-After if present; otherwise progressive backoff.
|
// Respect Retry-After if present; otherwise progressive backoff.
|
||||||
const ra =
|
const ra = res.headers && typeof res.headers.get === "function" ? res.headers.get("retry-after") : null;
|
||||||
res.headers && typeof res.headers.get === "function"
|
|
||||||
? res.headers.get("retry-after")
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1);
|
const waitSec = ra && /^\d+$/.test(ra) ? parseInt(ra, 10) : 15 * (attempt + 1);
|
||||||
await new Promise((r) => setTimeout(r, waitSec * 1000));
|
await new Promise((r) => setTimeout(r, waitSec * 1000));
|
||||||
attempt++;
|
attempt++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -137,142 +132,133 @@ async function fetchWith429Backoff(url, { fetchFn, headers, maxRetries = 2 }) {
|
||||||
*
|
*
|
||||||
* NEW: accepts prevDb so we can skip fetch if URL already has a good SKU cached.
|
* NEW: accepts prevDb so we can skip fetch if URL already has a good SKU cached.
|
||||||
*/
|
*/
|
||||||
async function hydrateGullSkus(
|
async function hydrateGullSkus(items, { fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}) {
|
||||||
items,
|
if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
|
||||||
{ fetchFn, ua, minIntervalMs = 12000, maxRetries = 2, prevDb } = {}
|
|
||||||
) {
|
|
||||||
if (!fetchFn) throw new Error("hydrateGullSkus requires opts.fetchFn");
|
|
||||||
|
|
||||||
const schedule = createMinIntervalLimiter(minIntervalMs);
|
const schedule = createMinIntervalLimiter(minIntervalMs);
|
||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
"user-agent": ua || "Mozilla/5.0",
|
"user-agent": ua || "Mozilla/5.0",
|
||||||
accept: "text/html,application/xhtml+xml",
|
accept: "text/html,application/xhtml+xml",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const it of items || []) {
|
for (const it of items || []) {
|
||||||
if (!it || !it.url) continue;
|
if (!it || !it.url) continue;
|
||||||
|
|
||||||
// NEW: if DB already has a good SKU, reuse it and skip fetch
|
// NEW: if DB already has a good SKU, reuse it and skip fetch
|
||||||
const prev = prevDb?.byUrl?.get(it.url) || null;
|
const prev = prevDb?.byUrl?.get(it.url) || null;
|
||||||
if (prev?.sku && !needsSkuDetail(prev.sku)) {
|
if (prev?.sku && !needsSkuDetail(prev.sku)) {
|
||||||
it.sku = pickBetterSku(it.sku, prev.sku);
|
it.sku = pickBetterSku(it.sku, prev.sku);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isGeneratedUrlSku(it.sku)) continue; // only where required
|
if (!isGeneratedUrlSku(it.sku)) continue; // only where required
|
||||||
|
|
||||||
const html = await schedule(() =>
|
const html = await schedule(() => fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries }));
|
||||||
fetchWith429Backoff(it.url, { fetchFn, headers, maxRetries })
|
|
||||||
);
|
|
||||||
|
|
||||||
const realSku = extractGullSkuFromProductPage(html);
|
const realSku = extractGullSkuFromProductPage(html);
|
||||||
if (realSku) it.sku = pickBetterSku(realSku, it.sku);
|
if (realSku) it.sku = pickBetterSku(realSku, it.sku);
|
||||||
}
|
}
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsGull(html, ctx) {
|
function parseProductsGull(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
// split on <li class="product ...">
|
// split on <li class="product ...">
|
||||||
const parts = s.split(
|
const parts = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
|
||||||
/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i
|
if (parts.length <= 1) return items;
|
||||||
);
|
|
||||||
if (parts.length <= 1) return items;
|
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
|
||||||
|
|
||||||
for (let i = 1; i < parts.length; i++) {
|
for (let i = 1; i < parts.length; i++) {
|
||||||
const block = '<li class="product"' + parts[i];
|
const block = '<li class="product"' + parts[i];
|
||||||
|
|
||||||
if (!looksInStock(block)) continue;
|
if (!looksInStock(block)) continue;
|
||||||
|
|
||||||
const hrefM = block.match(
|
const hrefM = block.match(
|
||||||
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i
|
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i,
|
||||||
);
|
);
|
||||||
if (!hrefM || !hrefM[1]) continue;
|
if (!hrefM || !hrefM[1]) continue;
|
||||||
|
|
||||||
let url;
|
let url;
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(hrefM[1]), base).toString();
|
url = new URL(decodeHtml(hrefM[1]), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const titleM = block.match(
|
const titleM = block.match(
|
||||||
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
|
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
|
||||||
);
|
);
|
||||||
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractGullPriceFromBlock(block);
|
const price = extractGullPriceFromBlock(block);
|
||||||
|
|
||||||
const skuRaw =
|
const skuRaw =
|
||||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||||
block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] ||
|
block.match(/\bSKU\b[^0-9]{0,30}(\d{3,10})\b/i)?.[1] ||
|
||||||
url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:...
|
url; // OK fallback; hydrateGullSkus will only re-fetch when this becomes u:...
|
||||||
|
|
||||||
const sku = normalizeGullSku(skuRaw);
|
const sku = normalizeGullSku(skuRaw);
|
||||||
|
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
items.push({ name, price, url, sku, img });
|
items.push({ name, price, url, sku, img });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "gull",
|
key: "gull",
|
||||||
name: "Gull Liquor",
|
name: "Gull Liquor",
|
||||||
host: "gullliquorstore.com",
|
host: "gullliquorstore.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
parseProducts: parseProductsGull,
|
parseProducts: parseProductsGull,
|
||||||
|
|
||||||
// Optional hook callers can use to post-process items:
|
// Optional hook callers can use to post-process items:
|
||||||
// only hits product pages when sku is u:...
|
// only hits product pages when sku is u:...
|
||||||
hydrateSkus: hydrateGullSkus,
|
hydrateSkus: hydrateGullSkus,
|
||||||
productPageMinIntervalMs: 12000, // slow by default; Gull is strict
|
productPageMinIntervalMs: 12000, // slow by default; Gull is strict
|
||||||
|
|
||||||
makePageUrl, // enables /page/N/ paging
|
makePageUrl, // enables /page/N/ paging
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl:
|
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
|
||||||
"https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
|
discoveryStartPage: 3,
|
||||||
discoveryStartPage: 3,
|
discoveryStep: 2,
|
||||||
discoveryStep: 2,
|
pageConcurrency: 1,
|
||||||
pageConcurrency: 1,
|
pageStaggerMs: 10000,
|
||||||
pageStaggerMs: 10000,
|
discoveryDelayMs: 10000,
|
||||||
discoveryDelayMs: 10000,
|
},
|
||||||
},
|
{
|
||||||
{
|
key: "rum",
|
||||||
key: "rum",
|
label: "Rum",
|
||||||
label: "Rum",
|
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
|
||||||
startUrl:
|
discoveryStartPage: 3,
|
||||||
"https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
|
discoveryStep: 2,
|
||||||
discoveryStartPage: 3,
|
pageConcurrency: 1,
|
||||||
discoveryStep: 2,
|
pageStaggerMs: 10000,
|
||||||
pageConcurrency: 1,
|
discoveryDelayMs: 10000,
|
||||||
pageStaggerMs: 10000,
|
},
|
||||||
discoveryDelayMs: 10000,
|
],
|
||||||
},
|
};
|
||||||
],
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
createStore,
|
createStore,
|
||||||
parseProductsGull,
|
parseProductsGull,
|
||||||
hydrateGullSkus,
|
hydrateGullSkus,
|
||||||
extractGullSkuFromProductPage,
|
extractGullSkuFromProductPage,
|
||||||
isGeneratedUrlSku,
|
isGeneratedUrlSku,
|
||||||
normalizeGullSku,
|
normalizeGullSku,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -18,24 +18,24 @@ const { createStore: createWillowPark } = require("./willowpark");
|
||||||
const { createStore: createArc } = require("./arc");
|
const { createStore: createArc } = require("./arc");
|
||||||
|
|
||||||
function createStores({ defaultUa } = {}) {
|
function createStores({ defaultUa } = {}) {
|
||||||
return [
|
return [
|
||||||
createKWM(defaultUa),
|
createKWM(defaultUa),
|
||||||
createCraftCellars(defaultUa),
|
createCraftCellars(defaultUa),
|
||||||
createSierra(defaultUa),
|
createSierra(defaultUa),
|
||||||
createGull(defaultUa),
|
createGull(defaultUa),
|
||||||
createCoop(defaultUa),
|
createCoop(defaultUa),
|
||||||
createStrath(defaultUa),
|
createStrath(defaultUa),
|
||||||
createBCL(defaultUa),
|
createBCL(defaultUa),
|
||||||
createBSW(defaultUa),
|
createBSW(defaultUa),
|
||||||
createWillowPark(defaultUa),
|
createWillowPark(defaultUa),
|
||||||
createVessel(defaultUa),
|
createVessel(defaultUa),
|
||||||
createMaltsAndGrains(defaultUa),
|
createMaltsAndGrains(defaultUa),
|
||||||
createKegNCork(defaultUa),
|
createKegNCork(defaultUa),
|
||||||
createTudor(defaultUa),
|
createTudor(defaultUa),
|
||||||
createVintage(defaultUa),
|
createVintage(defaultUa),
|
||||||
createLegacy(defaultUa),
|
createLegacy(defaultUa),
|
||||||
createArc(defaultUa),
|
createArc(defaultUa),
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStores, parseProductsSierra };
|
module.exports = { createStores, parseProductsSierra };
|
||||||
|
|
|
||||||
|
|
@ -4,75 +4,74 @@ const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../uti
|
||||||
const { makePageUrlQueryParam } = require("../utils/url");
|
const { makePageUrlQueryParam } = require("../utils/url");
|
||||||
|
|
||||||
function makePageUrlKegNCork(baseUrl, pageNum) {
|
function makePageUrlKegNCork(baseUrl, pageNum) {
|
||||||
return makePageUrlQueryParam(baseUrl, "page", pageNum);
|
return makePageUrlQueryParam(baseUrl, "page", pageNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsKegNCork(html, ctx) {
|
function parseProductsKegNCork(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
|
||||||
|
|
||||||
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
|
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
|
||||||
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
|
||||||
|
|
||||||
for (let i = 1; i < blocks.length; i++) {
|
for (let i = 1; i < blocks.length; i++) {
|
||||||
const block = "<li" + blocks[i];
|
const block = "<li" + blocks[i];
|
||||||
|
|
||||||
const mTitle = block.match(
|
const mTitle = block.match(
|
||||||
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i
|
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i,
|
||||||
);
|
);
|
||||||
if (!mTitle) continue;
|
if (!mTitle) continue;
|
||||||
|
|
||||||
const url = decodeHtml(mTitle[1]).trim();
|
const url = decodeHtml(mTitle[1]).trim();
|
||||||
const name = cleanText(decodeHtml(mTitle[2]));
|
const name = cleanText(decodeHtml(mTitle[2]));
|
||||||
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
|
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
|
||||||
|
|
||||||
let price = "";
|
let price = "";
|
||||||
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
|
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
|
||||||
if (mPrice && mPrice[1]) {
|
if (mPrice && mPrice[1]) {
|
||||||
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
|
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
|
||||||
if (p) price = p.startsWith("$") ? p : `$${p}`;
|
if (p) price = p.startsWith("$") ? p : `$${p}`;
|
||||||
} else {
|
} else {
|
||||||
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
|
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
|
||||||
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
|
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
|
||||||
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
|
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
items.push({ name, price, url, img });
|
items.push({ name, price, url, img });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "kegncork",
|
key: "kegncork",
|
||||||
name: "Keg N Cork",
|
name: "Keg N Cork",
|
||||||
host: "kegncork.com",
|
host: "kegncork.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
parseProducts: parseProductsKegNCork,
|
parseProducts: parseProductsKegNCork,
|
||||||
makePageUrl: makePageUrlKegNCork,
|
makePageUrl: makePageUrlKegNCork,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://kegncork.com/whisky/?page=1",
|
startUrl: "https://kegncork.com/whisky/?page=1",
|
||||||
discoveryStartPage: 5,
|
discoveryStartPage: 5,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://kegncork.com/rum/?page=1",
|
startUrl: "https://kegncork.com/rum/?page=1",
|
||||||
discoveryStartPage: 1,
|
discoveryStartPage: 1,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -6,184 +6,186 @@ const { normalizeCspc } = require("../utils/sku");
|
||||||
const { normalizeBaseUrl } = require("../utils/url");
|
const { normalizeBaseUrl } = require("../utils/url");
|
||||||
|
|
||||||
function makePageUrlKWM(baseUrl, pageNum) {
|
function makePageUrlKWM(baseUrl, pageNum) {
|
||||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
if (pageNum <= 1) {
|
if (pageNum <= 1) {
|
||||||
u.searchParams.delete("page");
|
u.searchParams.delete("page");
|
||||||
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
u.searchParams.set("page", String(pageNum));
|
u.searchParams.set("page", String(pageNum));
|
||||||
u.search = `?${u.searchParams.toString()}`;
|
u.search = `?${u.searchParams.toString()}`;
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractDivBlocksByExactClass(html, className, maxBlocks) {
|
function extractDivBlocksByExactClass(html, className, maxBlocks) {
|
||||||
const out = [];
|
const out = [];
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
|
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
|
||||||
|
|
||||||
let m;
|
let m;
|
||||||
while ((m = re.exec(s))) {
|
while ((m = re.exec(s))) {
|
||||||
if (out.length >= maxBlocks) break;
|
if (out.length >= maxBlocks) break;
|
||||||
|
|
||||||
const startTagEnd = m.index + m[0].length;
|
const startTagEnd = m.index + m[0].length;
|
||||||
let i = startTagEnd;
|
let i = startTagEnd;
|
||||||
let depth = 1;
|
let depth = 1;
|
||||||
|
|
||||||
while (i < s.length) {
|
while (i < s.length) {
|
||||||
const nextOpen = s.indexOf("<div", i);
|
const nextOpen = s.indexOf("<div", i);
|
||||||
const nextClose = s.indexOf("</div>", i);
|
const nextClose = s.indexOf("</div>", i);
|
||||||
if (nextClose === -1) break;
|
if (nextClose === -1) break;
|
||||||
|
|
||||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||||
depth++;
|
depth++;
|
||||||
i = nextOpen + 4;
|
i = nextOpen + 4;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
depth--;
|
depth--;
|
||||||
if (depth === 0) {
|
if (depth === 0) {
|
||||||
out.push(s.slice(m.index, nextClose + 6));
|
out.push(s.slice(m.index, nextClose + 6));
|
||||||
re.lastIndex = nextClose + 6;
|
re.lastIndex = nextClose + 6;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
i = nextClose + 6;
|
i = nextClose + 6;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function kwmExtractProductLinkHref(block) {
|
function kwmExtractProductLinkHref(block) {
|
||||||
let m =
|
let m =
|
||||||
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
|
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
|
||||||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
|
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
|
||||||
|
|
||||||
if (m && m[1]) return m[1].trim();
|
if (m && m[1]) return m[1].trim();
|
||||||
|
|
||||||
m =
|
m =
|
||||||
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
|
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
|
||||||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
|
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
|
||||||
|
|
||||||
return m && m[1] ? m[1].trim() : "";
|
return m && m[1] ? m[1].trim() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function kwmExtractName(block) {
|
function kwmExtractName(block) {
|
||||||
const dataItem = extractHtmlAttr(block, "data-item");
|
const dataItem = extractHtmlAttr(block, "data-item");
|
||||||
if (dataItem) return sanitizeName(dataItem);
|
if (dataItem) return sanitizeName(dataItem);
|
||||||
|
|
||||||
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
|
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
|
||||||
if (m && m[1]) return sanitizeName(stripTags(m[1]));
|
if (m && m[1]) return sanitizeName(stripTags(m[1]));
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function kwmExtractFirstDivByClass(html, className) {
|
function kwmExtractFirstDivByClass(html, className) {
|
||||||
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
|
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
|
||||||
const m = re.exec(html);
|
const m = re.exec(html);
|
||||||
if (!m) return "";
|
if (!m) return "";
|
||||||
const start = m.index + m[0].length;
|
const start = m.index + m[0].length;
|
||||||
|
|
||||||
let i = start;
|
let i = start;
|
||||||
let depth = 1;
|
let depth = 1;
|
||||||
while (i < html.length) {
|
while (i < html.length) {
|
||||||
const nextOpen = html.indexOf("<div", i);
|
const nextOpen = html.indexOf("<div", i);
|
||||||
const nextClose = html.indexOf("</div>", i);
|
const nextClose = html.indexOf("</div>", i);
|
||||||
if (nextClose === -1) break;
|
if (nextClose === -1) break;
|
||||||
|
|
||||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||||
depth++;
|
depth++;
|
||||||
i = nextOpen + 4;
|
i = nextOpen + 4;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
depth--;
|
depth--;
|
||||||
if (depth === 0) return html.slice(start, nextClose);
|
if (depth === 0) return html.slice(start, nextClose);
|
||||||
i = nextClose + 6;
|
i = nextClose + 6;
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function kwmExtractPrice(block) {
|
function kwmExtractPrice(block) {
|
||||||
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
|
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
|
||||||
if (m && m[1]) {
|
if (m && m[1]) {
|
||||||
const raw = String(m[1]).trim();
|
const raw = String(m[1]).trim();
|
||||||
const n = raw.replace(/[^0-9.]/g, "");
|
const n = raw.replace(/[^0-9.]/g, "");
|
||||||
if (n) return `$${Number(n).toFixed(2)}`;
|
if (n) return `$${Number(n).toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
|
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
|
||||||
if (!priceDiv) return "";
|
if (!priceDiv) return "";
|
||||||
|
|
||||||
const cleaned = String(priceDiv).replace(/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
|
const cleaned = String(priceDiv).replace(
|
||||||
|
/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi,
|
||||||
|
" ",
|
||||||
|
);
|
||||||
|
|
||||||
const txt = cleanText(decodeHtml(stripTags(cleaned)));
|
const txt = cleanText(decodeHtml(stripTags(cleaned)));
|
||||||
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
|
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
|
||||||
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
|
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsKWM(html, ctx) {
|
function parseProductsKWM(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
|
||||||
|
|
||||||
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
|
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
|
||||||
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
if (/OUT OF STOCK/i.test(block)) continue;
|
if (/OUT OF STOCK/i.test(block)) continue;
|
||||||
|
|
||||||
const href = kwmExtractProductLinkHref(block);
|
const href = kwmExtractProductLinkHref(block);
|
||||||
if (!href) continue;
|
if (!href) continue;
|
||||||
|
|
||||||
let url;
|
let url;
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(href), base).toString();
|
url = new URL(decodeHtml(href), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const name = kwmExtractName(block);
|
const name = kwmExtractName(block);
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = kwmExtractPrice(block);
|
const price = kwmExtractPrice(block);
|
||||||
const sku = normalizeCspc(url);
|
const sku = normalizeCspc(url);
|
||||||
|
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
items.push({ name, price, url, sku, img });
|
items.push({ name, price, url, sku, img });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "kwm",
|
key: "kwm",
|
||||||
name: "Kensington Wine Market",
|
name: "Kensington Wine Market",
|
||||||
host: "kensingtonwinemarket.com",
|
host: "kensingtonwinemarket.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
parseProducts: parseProductsKWM,
|
parseProducts: parseProductsKWM,
|
||||||
makePageUrl: makePageUrlKWM,
|
makePageUrl: makePageUrlKWM,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "scotch",
|
key: "scotch",
|
||||||
label: "Scotch",
|
label: "Scotch",
|
||||||
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
|
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
|
||||||
discoveryStartPage: 200,
|
discoveryStartPage: 200,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
|
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
|
||||||
discoveryStartPage: 20,
|
discoveryStartPage: 20,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -9,57 +9,57 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeLegacySku(rawSku, { storeLabel, url }) {
|
function normalizeLegacySku(rawSku, { storeLabel, url }) {
|
||||||
const raw = String(rawSku ?? "").trim();
|
const raw = String(rawSku ?? "").trim();
|
||||||
if (!raw) return "";
|
if (!raw) return "";
|
||||||
|
|
||||||
const cspc = normalizeCspc(raw);
|
const cspc = normalizeCspc(raw);
|
||||||
if (cspc) return cspc;
|
if (cspc) return cspc;
|
||||||
|
|
||||||
const m = raw.match(/\b(\d{1,11})\b/);
|
const m = raw.match(/\b(\d{1,11})\b/);
|
||||||
if (m && m[1]) return `id:${m[1]}`;
|
if (m && m[1]) return `id:${m[1]}`;
|
||||||
|
|
||||||
return normalizeSkuKey(raw, { storeLabel, url });
|
return normalizeSkuKey(raw, { storeLabel, url });
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cad(n) {
|
function cad(n) {
|
||||||
const x = Number(n);
|
const x = Number(n);
|
||||||
if (!Number.isFinite(x)) return "";
|
if (!Number.isFinite(x)) return "";
|
||||||
return `$${x.toFixed(2)}`;
|
return `$${x.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeAbsUrl(raw) {
|
function normalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, "https://www.legacyliquorstore.com/").toString();
|
return new URL(s, "https://www.legacyliquorstore.com/").toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const LEGACY_GQL_URL = "https://production-storefront-api-hagnfhf3sq-uc.a.run.app/graphql";
|
const LEGACY_GQL_URL = "https://production-storefront-api-hagnfhf3sq-uc.a.run.app/graphql";
|
||||||
|
|
@ -140,184 +140,201 @@ query(
|
||||||
`;
|
`;
|
||||||
|
|
||||||
function pickInStockVariant(p) {
|
function pickInStockVariant(p) {
|
||||||
const vars = Array.isArray(p?.variants) ? p.variants : [];
|
const vars = Array.isArray(p?.variants) ? p.variants : [];
|
||||||
for (const v of vars) {
|
for (const v of vars) {
|
||||||
const q = Number(v?.quantity);
|
const q = Number(v?.quantity);
|
||||||
if (Number.isFinite(q) && q > 0) return v;
|
if (Number.isFinite(q) && q > 0) return v;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function legacyProductToItem(p, ctx) {
|
function legacyProductToItem(p, ctx) {
|
||||||
const v = pickInStockVariant(p);
|
const v = pickInStockVariant(p);
|
||||||
if (!v) return null;
|
if (!v) return null;
|
||||||
|
|
||||||
const slug = String(p?.slug || "").trim();
|
const slug = String(p?.slug || "").trim();
|
||||||
if (!slug) return null;
|
if (!slug) return null;
|
||||||
|
|
||||||
const base = "https://www.legacyliquorstore.com";
|
const base = "https://www.legacyliquorstore.com";
|
||||||
// Matches observed pattern: /LL/product/spirits/<category>/<slug>
|
// Matches observed pattern: /LL/product/spirits/<category>/<slug>
|
||||||
const url = new URL(`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`, base).toString();
|
const url = new URL(
|
||||||
|
`/LL/product/spirits/${encodeURIComponent(ctx.cat.key)}/${encodeURIComponent(slug)}`,
|
||||||
|
base,
|
||||||
|
).toString();
|
||||||
|
|
||||||
const nameRaw =
|
const nameRaw =
|
||||||
String(v?.fullName || "").trim() ||
|
String(v?.fullName || "").trim() ||
|
||||||
[String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | ");
|
[String(p?.name || "").trim(), String(v?.shortName || "").trim()].filter(Boolean).join(" | ");
|
||||||
const name = String(nameRaw || "").trim();
|
const name = String(nameRaw || "").trim();
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
const price =
|
const price = cad(v?.price) || cad(p?.priceFrom) || cad(p?.priceTo) || "";
|
||||||
cad(v?.price) ||
|
|
||||||
cad(p?.priceFrom) ||
|
|
||||||
cad(p?.priceTo) ||
|
|
||||||
"";
|
|
||||||
|
|
||||||
const sku = normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) || normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||"";
|
const sku =
|
||||||
const img = normalizeAbsUrl(v?.image || "");
|
normalizeLegacySku(v?.sku, { storeLabel: ctx.store.name, url }) ||
|
||||||
|
normalizeLegacySku(url, { storeLabel: ctx.store.name, url }) ||
|
||||||
|
"";
|
||||||
|
const img = normalizeAbsUrl(v?.image || "");
|
||||||
|
|
||||||
return { name, price, url, sku, img };
|
return { name, price, url, sku, img };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function legacyFetchPage(ctx, pageCursor, pageLimit) {
|
async function legacyFetchPage(ctx, pageCursor, pageLimit) {
|
||||||
const body = {
|
const body = {
|
||||||
query: PRODUCTS_QUERY,
|
query: PRODUCTS_QUERY,
|
||||||
variables: {
|
variables: {
|
||||||
allTags: ctx.cat.allTags || null,
|
allTags: ctx.cat.allTags || null,
|
||||||
anyTags: null,
|
anyTags: null,
|
||||||
collectionSlug: null,
|
collectionSlug: null,
|
||||||
countries: null,
|
countries: null,
|
||||||
isBestSeller: null,
|
isBestSeller: null,
|
||||||
isNewArrival: null,
|
isNewArrival: null,
|
||||||
isFeatured: null,
|
isFeatured: null,
|
||||||
isFeaturedOnHomepage: null,
|
isFeaturedOnHomepage: null,
|
||||||
isOnSale: null,
|
isOnSale: null,
|
||||||
isStaffPick: null,
|
isStaffPick: null,
|
||||||
pageCursor: pageCursor || null,
|
pageCursor: pageCursor || null,
|
||||||
pageLimit: pageLimit,
|
pageLimit: pageLimit,
|
||||||
pointsMin: null,
|
pointsMin: null,
|
||||||
priceMin: null,
|
priceMin: null,
|
||||||
priceMax: null,
|
priceMax: null,
|
||||||
quantityMin: null,
|
quantityMin: null,
|
||||||
regions: null,
|
regions: null,
|
||||||
brandValue: null,
|
brandValue: null,
|
||||||
searchValue: null,
|
searchValue: null,
|
||||||
sortOrder: "asc",
|
sortOrder: "asc",
|
||||||
sortBy: "name",
|
sortBy: "name",
|
||||||
storeId: "LL",
|
storeId: "LL",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
return await ctx.http.fetchJsonWithRetry(LEGACY_GQL_URL, `legacy:${ctx.cat.key}:${pageCursor || "first"}`, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(
|
||||||
method: "POST",
|
LEGACY_GQL_URL,
|
||||||
headers: {
|
`legacy:${ctx.cat.key}:${pageCursor || "first"}`,
|
||||||
Accept: "application/json",
|
ctx.store.ua,
|
||||||
"content-type": "application/json",
|
{
|
||||||
Origin: "https://www.legacyliquorstore.com",
|
method: "POST",
|
||||||
Referer: "https://www.legacyliquorstore.com/",
|
headers: {
|
||||||
},
|
Accept: "application/json",
|
||||||
body: JSON.stringify(body),
|
"content-type": "application/json",
|
||||||
});
|
Origin: "https://www.legacyliquorstore.com",
|
||||||
|
Referer: "https://www.legacyliquorstore.com/",
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryLegacyLiquor(ctx, prevDb, report) {
|
async function scanCategoryLegacyLiquor(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const pageLimit = 100;
|
const pageLimit = 100;
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
let cursor = null;
|
let cursor = null;
|
||||||
let page = 0;
|
let page = 0;
|
||||||
let done = 0;
|
let done = 0;
|
||||||
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
||||||
|
|
||||||
while (page < maxPagesCap) {
|
while (page < maxPagesCap) {
|
||||||
page++;
|
page++;
|
||||||
|
|
||||||
let r;
|
let r;
|
||||||
try {
|
try {
|
||||||
r = await legacyFetchPage(ctx, cursor, pageLimit);
|
r = await legacyFetchPage(ctx, cursor, pageLimit);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | LegacyLiquor fetch failed p${page}: ${e?.message || e}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const items = r?.json?.data?.products?.items;
|
const items = r?.json?.data?.products?.items;
|
||||||
const next = r?.json?.data?.products?.nextPageCursor;
|
const next = r?.json?.data?.products?.nextPageCursor;
|
||||||
|
|
||||||
const arr = Array.isArray(items) ? items : [];
|
const arr = Array.isArray(items) ? items : [];
|
||||||
let kept = 0;
|
let kept = 0;
|
||||||
|
|
||||||
for (const p of arr) {
|
for (const p of arr) {
|
||||||
const it = legacyProductToItem(p, ctx);
|
const it = legacyProductToItem(p, ctx);
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
kept++;
|
kept++;
|
||||||
}
|
}
|
||||||
|
|
||||||
done++;
|
done++;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft(
|
`${ctx.catPrefixOut} | Page ${pageStr(done, done)} | ${String(r.status || "").padEnd(3)} | ${pctStr(done, done)} | kept=${padLeft(
|
||||||
kept,
|
kept,
|
||||||
3
|
3,
|
||||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!next || !arr.length) break;
|
if (!next || !arr.length) break;
|
||||||
if (next === cursor) break; // safety
|
if (next === cursor) break; // safety
|
||||||
cursor = next;
|
cursor = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
storeLabel: ctx.store.name,
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
});
|
||||||
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: Math.max(1, page),
|
scannedPages: Math.max(1, page),
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "legacyliquor",
|
key: "legacyliquor",
|
||||||
name: "Legacy Liquor",
|
name: "Legacy Liquor",
|
||||||
host: "www.legacyliquorstore.com",
|
host: "www.legacyliquorstore.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryLegacyLiquor,
|
scanCategory: scanCategoryLegacyLiquor,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky",
|
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/whisky",
|
||||||
allTags: ["spirits", "whisky"],
|
allTags: ["spirits", "whisky"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum",
|
startUrl: "https://www.legacyliquorstore.com/LL/category/spirits/rum",
|
||||||
allTags: ["spirits", "rum"],
|
allTags: ["spirits", "rum"],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -5,103 +5,104 @@ const { normalizeCspc } = require("../utils/sku");
|
||||||
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
|
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
|
||||||
|
|
||||||
function allowMaltsExcludeGinTequilaMezcal(item) {
|
function allowMaltsExcludeGinTequilaMezcal(item) {
|
||||||
if (item && item.inStock === false) return false;
|
if (item && item.inStock === false) return false;
|
||||||
|
|
||||||
const cats = Array.isArray(item?.cats) ? item.cats : [];
|
const cats = Array.isArray(item?.cats) ? item.cats : [];
|
||||||
const has = (re) => cats.some((c) => re.test(String(c || "")));
|
const has = (re) => cats.some((c) => re.test(String(c || "")));
|
||||||
|
|
||||||
if (has(/\bgin\b/i)) return false;
|
if (has(/\bgin\b/i)) return false;
|
||||||
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
|
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsMaltsAndGrains(html, ctx) {
|
function parseProductsMaltsAndGrains(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
|
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
|
||||||
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
|
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
|
||||||
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
|
||||||
|
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
const classAttr = extractHtmlAttr(block, "class");
|
const classAttr = extractHtmlAttr(block, "class");
|
||||||
|
|
||||||
const isOut =
|
const isOut =
|
||||||
/\boutofstock\b/i.test(classAttr) ||
|
/\boutofstock\b/i.test(classAttr) ||
|
||||||
/ast-shop-product-out-of-stock/i.test(block) ||
|
/ast-shop-product-out-of-stock/i.test(block) ||
|
||||||
/>\s*out of stock\s*</i.test(block);
|
/>\s*out of stock\s*</i.test(block);
|
||||||
if (isOut) continue;
|
if (isOut) continue;
|
||||||
|
|
||||||
const cats = [];
|
const cats = [];
|
||||||
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
|
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
|
||||||
const v = String(m[1] || "").trim().toLowerCase();
|
const v = String(m[1] || "")
|
||||||
if (v) cats.push(v);
|
.trim()
|
||||||
}
|
.toLowerCase();
|
||||||
|
if (v) cats.push(v);
|
||||||
|
}
|
||||||
|
|
||||||
let href =
|
let href =
|
||||||
block.match(
|
block.match(
|
||||||
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i
|
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i,
|
||||||
)?.[1] ||
|
)?.[1] ||
|
||||||
block.match(
|
block.match(
|
||||||
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i
|
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i,
|
||||||
)?.[2] ||
|
)?.[2] ||
|
||||||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
|
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
|
||||||
|
|
||||||
if (!href) continue;
|
if (!href) continue;
|
||||||
|
|
||||||
let url = "";
|
let url = "";
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(href), base).toString();
|
url = new URL(decodeHtml(href), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!/^https?:\/\//i.test(url)) continue;
|
if (!/^https?:\/\//i.test(url)) continue;
|
||||||
|
|
||||||
const mTitle = block.match(
|
const mTitle = block.match(
|
||||||
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
|
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
|
||||||
);
|
);
|
||||||
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
|
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractPriceFromTmbBlock(block);
|
const price = extractPriceFromTmbBlock(block);
|
||||||
|
|
||||||
const sku = normalizeCspc(
|
const sku = normalizeCspc(
|
||||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
||||||
""
|
"",
|
||||||
);
|
);
|
||||||
|
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
items.push({ name, price, url, sku, img, cats, inStock: true });
|
items.push({ name, price, url, sku, img, cats, inStock: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "maltsandgrains",
|
key: "maltsandgrains",
|
||||||
name: "Malts & Grains",
|
name: "Malts & Grains",
|
||||||
host: "maltsandgrains.store",
|
host: "maltsandgrains.store",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
parseProducts: parseProductsMaltsAndGrains,
|
parseProducts: parseProductsMaltsAndGrains,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "all-minus-gin-tequila-mezcal",
|
key: "all-minus-gin-tequila-mezcal",
|
||||||
label: "All Spirits",
|
label: "All Spirits",
|
||||||
startUrl: "https://maltsandgrains.store/shop/page/1/",
|
startUrl: "https://maltsandgrains.store/shop/page/1/",
|
||||||
discoveryStartPage: 15,
|
discoveryStartPage: 15,
|
||||||
allowUrl: allowMaltsExcludeGinTequilaMezcal,
|
allowUrl: allowMaltsExcludeGinTequilaMezcal,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -10,226 +10,222 @@ const { mergeDiscoveredIntoDb } = require("../tracker/merge");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function allowSierraUrlRumWhisky(item) {
|
function allowSierraUrlRumWhisky(item) {
|
||||||
const u = (item && item.url) ? String(item.url) : "";
|
const u = item && item.url ? String(item.url) : "";
|
||||||
const s = u.toLowerCase();
|
const s = u.toLowerCase();
|
||||||
if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false;
|
if (!/^https?:\/\/sierraspringsliquor\.ca\//.test(s)) return false;
|
||||||
return /\b(rum|whisk(?:e)?y)\b/.test(s);
|
return /\b(rum|whisk(?:e)?y)\b/.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep old name referenced historically in this store config
|
// Keep old name referenced historically in this store config
|
||||||
const allowSierraSpiritsLiquorUrlRumWhisky = allowSierraUrlRumWhisky;
|
const allowSierraSpiritsLiquorUrlRumWhisky = allowSierraUrlRumWhisky;
|
||||||
|
|
||||||
function formatWooStorePrice(prices) {
|
function formatWooStorePrice(prices) {
|
||||||
if (!prices) return null;
|
if (!prices) return null;
|
||||||
|
|
||||||
const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2;
|
const minor = Number.isFinite(prices.currency_minor_unit) ? prices.currency_minor_unit : 2;
|
||||||
const raw = prices.price ?? prices.regular_price ?? prices.sale_price;
|
const raw = prices.price ?? prices.regular_price ?? prices.sale_price;
|
||||||
if (raw == null) return null;
|
if (raw == null) return null;
|
||||||
|
|
||||||
const n = Number(String(raw).replace(/[^\d]/g, ""));
|
const n = Number(String(raw).replace(/[^\d]/g, ""));
|
||||||
if (!Number.isFinite(n)) return null;
|
if (!Number.isFinite(n)) return null;
|
||||||
|
|
||||||
const value = (n / Math.pow(10, minor)).toFixed(minor);
|
const value = (n / Math.pow(10, minor)).toFixed(minor);
|
||||||
const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$";
|
const prefix = prices.currency_prefix ?? prices.currency_symbol ?? "$";
|
||||||
const suffix = prices.currency_suffix ?? "";
|
const suffix = prices.currency_suffix ?? "";
|
||||||
return `${prefix}${value}${suffix}`;
|
return `${prefix}${value}${suffix}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseWooStoreProductsJson(payload, ctx) {
|
function parseWooStoreProductsJson(payload, ctx) {
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
let data = null;
|
let data = null;
|
||||||
try {
|
try {
|
||||||
data = JSON.parse(payload);
|
data = JSON.parse(payload);
|
||||||
} catch (_) {
|
} catch (_) {
|
||||||
return items;
|
return items;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Array.isArray(data)) return items;
|
if (!Array.isArray(data)) return items;
|
||||||
|
|
||||||
for (const p of data) {
|
for (const p of data) {
|
||||||
const url = (p && p.permalink) ? String(p.permalink) : "";
|
const url = p && p.permalink ? String(p.permalink) : "";
|
||||||
if (!url) continue;
|
if (!url) continue;
|
||||||
|
|
||||||
const name = (p && p.name) ? cleanText(decodeHtml(String(p.name))) : "";
|
const name = p && p.name ? cleanText(decodeHtml(String(p.name))) : "";
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = formatWooStorePrice(p.prices);
|
const price = formatWooStorePrice(p.prices);
|
||||||
|
|
||||||
const rawSku =
|
const rawSku =
|
||||||
(typeof p?.sku === "string" && p.sku.trim()) ? p.sku.trim()
|
typeof p?.sku === "string" && p.sku.trim() ? p.sku.trim() : p && (p.id ?? p.id === 0) ? String(p.id) : "";
|
||||||
: (p && (p.id ?? p.id === 0)) ? String(p.id)
|
|
||||||
: "";
|
|
||||||
|
|
||||||
const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku;
|
const taggedSku = /^\d{1,11}$/.test(rawSku) ? `id:${rawSku}` : rawSku;
|
||||||
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
||||||
|
|
||||||
const img =
|
const img =
|
||||||
(p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src)
|
p.images && Array.isArray(p.images) && p.images[0] && p.images[0].src ? String(p.images[0].src) : null;
|
||||||
? String(p.images[0].src)
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const item = { name, price, url, sku, img };
|
const item = { name, price, url, sku, img };
|
||||||
|
|
||||||
const allowUrl = ctx?.cat?.allowUrl;
|
const allowUrl = ctx?.cat?.allowUrl;
|
||||||
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
||||||
|
|
||||||
items.push(item);
|
items.push(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseWooProductsHtml(html, ctx) {
|
function parseWooProductsHtml(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
|
||||||
const parts = s.split(/<li\b/i);
|
const parts = s.split(/<li\b/i);
|
||||||
|
|
||||||
for (let i = 1; i < parts.length; i++) {
|
for (let i = 1; i < parts.length; i++) {
|
||||||
const chunk = "<li" + parts[i];
|
const chunk = "<li" + parts[i];
|
||||||
|
|
||||||
if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue;
|
if (!/class=["'][^"']*\bproduct\b/i.test(chunk)) continue;
|
||||||
if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue;
|
if (/class=["'][^"']*\bproduct-category\b/i.test(chunk)) continue;
|
||||||
|
|
||||||
const endIdx = chunk.search(/<\/li>/i);
|
const endIdx = chunk.search(/<\/li>/i);
|
||||||
const block = (endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk);
|
const block = endIdx >= 0 ? chunk.slice(0, endIdx + 5) : chunk;
|
||||||
|
|
||||||
const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map(m => m[1]);
|
const hrefs = [...block.matchAll(/<a\b[^>]*href=["']([^"']+)["']/gi)].map((m) => m[1]);
|
||||||
const href = hrefs.find(h => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || "";
|
const href = hrefs.find((h) => !/add-to-cart=|\/cart\/|\/checkout\//i.test(h)) || "";
|
||||||
if (!href) continue;
|
if (!href) continue;
|
||||||
|
|
||||||
const url = new URL(decodeHtml(href), base).toString();
|
const url = new URL(decodeHtml(href), base).toString();
|
||||||
|
|
||||||
const nameHtml =
|
const nameHtml =
|
||||||
block.match(/<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i)?.[1] ||
|
block.match(
|
||||||
block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] ||
|
/<h2\b[^>]*class=["'][^"']*woocommerce-loop-product__title[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i,
|
||||||
"";
|
)?.[1] ||
|
||||||
const name = cleanText(decodeHtml(nameHtml));
|
block.match(/<h3\b[^>]*>([\s\S]*?)<\/h3>/i)?.[1] ||
|
||||||
if (!name) continue;
|
"";
|
||||||
|
const name = cleanText(decodeHtml(nameHtml));
|
||||||
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractPriceFromTmbBlock(block);
|
const price = extractPriceFromTmbBlock(block);
|
||||||
|
|
||||||
const rawSku =
|
const rawSku =
|
||||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||||
block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_id=["']([^"']+)["']/i)?.[1] ||
|
||||||
"";
|
"";
|
||||||
|
|
||||||
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
|
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
|
||||||
? `id:${String(rawSku).trim()}`
|
? `id:${String(rawSku).trim()}`
|
||||||
: String(rawSku || "").trim();
|
: String(rawSku || "").trim();
|
||||||
|
|
||||||
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
const item = { name, price, url, sku, img };
|
const item = { name, price, url, sku, img };
|
||||||
|
|
||||||
const allowUrl = ctx?.cat?.allowUrl;
|
const allowUrl = ctx?.cat?.allowUrl;
|
||||||
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
||||||
|
|
||||||
items.push(item);
|
items.push(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsSierra(body, ctx) {
|
function parseProductsSierra(body, ctx) {
|
||||||
const s = String(body || "");
|
const s = String(body || "");
|
||||||
const t = s.trimStart();
|
const t = s.trimStart();
|
||||||
|
|
||||||
if (t.startsWith("[") || t.startsWith("{")) {
|
if (t.startsWith("[") || t.startsWith("{")) {
|
||||||
const jsonItems = parseWooStoreProductsJson(s, ctx);
|
const jsonItems = parseWooStoreProductsJson(s, ctx);
|
||||||
ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsSierra: storeApiItems=${jsonItems.length} bytes=${s.length}`);
|
||||||
return jsonItems;
|
return jsonItems;
|
||||||
}
|
}
|
||||||
|
|
||||||
const blocks = s.split(/<div class="tmb\b/i);
|
const blocks = s.split(/<div class="tmb\b/i);
|
||||||
ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
|
||||||
|
|
||||||
if (blocks.length > 1) {
|
if (blocks.length > 1) {
|
||||||
const items = [];
|
const items = [];
|
||||||
for (let i = 1; i < blocks.length; i++) {
|
for (let i = 1; i < blocks.length; i++) {
|
||||||
const block = "<div class=\"tmb" + blocks[i];
|
const block = '<div class="tmb' + blocks[i];
|
||||||
|
|
||||||
const titleMatch = block.match(
|
const titleMatch = block.match(
|
||||||
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i
|
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i,
|
||||||
);
|
);
|
||||||
if (!titleMatch) continue;
|
if (!titleMatch) continue;
|
||||||
|
|
||||||
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
|
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
|
||||||
const name = cleanText(decodeHtml(titleMatch[2]));
|
const name = cleanText(decodeHtml(titleMatch[2]));
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractPriceFromTmbBlock(block);
|
const price = extractPriceFromTmbBlock(block);
|
||||||
|
|
||||||
const rawSku =
|
const rawSku =
|
||||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
||||||
"";
|
"";
|
||||||
|
|
||||||
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim())
|
const taggedSku = /^\d{1,11}$/.test(String(rawSku).trim()) ? `id:${String(rawSku).trim()}` : rawSku;
|
||||||
? `id:${String(rawSku).trim()}`
|
|
||||||
: rawSku;
|
|
||||||
|
|
||||||
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
const sku = normalizeSkuKey(taggedSku, { storeLabel: ctx?.store?.name, url });
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
const item = { name, price, url, sku, img };
|
const item = { name, price, url, sku, img };
|
||||||
|
|
||||||
const allowUrl = ctx?.cat?.allowUrl;
|
const allowUrl = ctx?.cat?.allowUrl;
|
||||||
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
if (typeof allowUrl === "function" && !allowUrl(item)) continue;
|
||||||
|
|
||||||
items.push(item);
|
items.push(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
const woo = parseWooProductsHtml(s, ctx);
|
const woo = parseWooProductsHtml(s, ctx);
|
||||||
ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`);
|
ctx.logger?.dbg?.(`parseProductsSierra: wooItems=${woo.length} bytes=${s.length}`);
|
||||||
return woo;
|
return woo;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractProductCatTermId(html) {
|
function extractProductCatTermId(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
// Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..."
|
// Typical body classes contain: "tax-product_cat term-<slug> term-1131 ..."
|
||||||
const m =
|
const m = s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) || s.match(/\bterm-(\d{1,10})\b/i);
|
||||||
s.match(/tax-product_cat[^"']{0,400}\bterm-(\d{1,10})\b/i) ||
|
if (!m) return null;
|
||||||
s.match(/\bterm-(\d{1,10})\b/i);
|
const n = Number(m[1]);
|
||||||
if (!m) return null;
|
return Number.isFinite(n) ? n : null;
|
||||||
const n = Number(m[1]);
|
|
||||||
return Number.isFinite(n) ? n : null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getWooCategoryIdForCat(ctx) {
|
async function getWooCategoryIdForCat(ctx) {
|
||||||
// allow manual override if you ever want it
|
// allow manual override if you ever want it
|
||||||
if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId;
|
if (Number.isFinite(ctx?.cat?.wooCategoryId)) return ctx.cat.wooCategoryId;
|
||||||
|
|
||||||
// cache per category object
|
// cache per category object
|
||||||
if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId;
|
if (Number.isFinite(ctx?.cat?._wooCategoryId)) return ctx.cat._wooCategoryId;
|
||||||
|
|
||||||
// infer from the HTML category page so startUrl stays stable (DB filenames stay stable)
|
// infer from the HTML category page so startUrl stays stable (DB filenames stay stable)
|
||||||
const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua);
|
const { text, finalUrl } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, "discover", ctx.store.ua);
|
||||||
const id = extractProductCatTermId(text);
|
const id = extractProductCatTermId(text);
|
||||||
|
|
||||||
if (!id) {
|
if (!id) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`);
|
ctx.logger.warn(
|
||||||
ctx.cat._wooCategoryId = null;
|
`${ctx.catPrefixOut} | Could not infer product_cat term id from category page; falling back to HTML parsing only.`,
|
||||||
return null;
|
);
|
||||||
}
|
ctx.cat._wooCategoryId = null;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | Woo category id: ${id} (${finalUrl || ctx.cat.startUrl})`);
|
||||||
ctx.cat._wooCategoryId = id;
|
ctx.cat._wooCategoryId = id;
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -237,163 +233,157 @@ async function getWooCategoryIdForCat(ctx) {
|
||||||
* while keeping original startUrl (so DB hashes and "source" stay unchanged).
|
* while keeping original startUrl (so DB hashes and "source" stay unchanged).
|
||||||
*/
|
*/
|
||||||
async function scanCategoryWooStoreApi(ctx, prevDb, report) {
|
async function scanCategoryWooStoreApi(ctx, prevDb, report) {
|
||||||
const { logger } = ctx;
|
const { logger } = ctx;
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100;
|
const perPage = Number.isFinite(ctx.cat.perPage) ? ctx.cat.perPage : 100;
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
const catId = await getWooCategoryIdForCat(ctx);
|
const catId = await getWooCategoryIdForCat(ctx);
|
||||||
if (!catId) return;
|
if (!catId) return;
|
||||||
|
|
||||||
const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`);
|
const apiBase = new URL(`https://${ctx.store.host}/wp-json/wc/store/v1/products`);
|
||||||
apiBase.searchParams.set("per_page", String(perPage));
|
apiBase.searchParams.set("per_page", String(perPage));
|
||||||
apiBase.searchParams.set("category", String(catId));
|
apiBase.searchParams.set("category", String(catId));
|
||||||
|
|
||||||
const hardCap = 500;
|
const hardCap = 500;
|
||||||
let page = 1;
|
let page = 1;
|
||||||
|
|
||||||
while (page <= hardCap) {
|
while (page <= hardCap) {
|
||||||
apiBase.searchParams.set("page", String(page));
|
apiBase.searchParams.set("page", String(page));
|
||||||
const pageUrl = apiBase.toString();
|
const pageUrl = apiBase.toString();
|
||||||
|
|
||||||
const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry(
|
const { text, status, bytes, ms, finalUrl } = await ctx.http.fetchTextWithRetry(
|
||||||
pageUrl,
|
pageUrl,
|
||||||
`page:${ctx.store.key}:${ctx.cat.key}:${page}`,
|
`page:${ctx.store.key}:${ctx.cat.key}:${page}`,
|
||||||
ctx.store.ua
|
ctx.store.ua,
|
||||||
);
|
);
|
||||||
|
|
||||||
// IMPORTANT:
|
// IMPORTANT:
|
||||||
// Parse WITHOUT allowUrl so pagination is based on real API page size
|
// Parse WITHOUT allowUrl so pagination is based on real API page size
|
||||||
const ctxNoFilter =
|
const ctxNoFilter =
|
||||||
typeof ctx?.cat?.allowUrl === "function"
|
typeof ctx?.cat?.allowUrl === "function" ? { ...ctx, cat: { ...ctx.cat, allowUrl: null } } : ctx;
|
||||||
? { ...ctx, cat: { ...ctx.cat, allowUrl: null } }
|
|
||||||
: ctx;
|
|
||||||
|
|
||||||
const itemsAll =
|
const itemsAll = (ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
|
||||||
(ctx.store.parseProducts || ctx.config.defaultParseProducts)(text, ctxNoFilter, finalUrl);
|
|
||||||
|
|
||||||
const rawCount = itemsAll.length;
|
const rawCount = itemsAll.length;
|
||||||
|
|
||||||
// Now apply allowUrl AFTER pagination logic
|
// Now apply allowUrl AFTER pagination logic
|
||||||
const items = [];
|
const items = [];
|
||||||
const allow = ctx?.cat?.allowUrl;
|
const allow = ctx?.cat?.allowUrl;
|
||||||
for (const it of itemsAll) {
|
for (const it of itemsAll) {
|
||||||
if (typeof allow === "function" && !allow(it)) continue;
|
if (typeof allow === "function" && !allow(it)) continue;
|
||||||
items.push(it);
|
items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.ok(
|
logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s`
|
`${ctx.catPrefixOut} | Page ${String(page).padStart(3, " ")} | ${String(status).padStart(3, " ")} | raw=${String(rawCount).padStart(3, " ")} kept=${String(items.length).padStart(3, " ")} | bytes=${String(bytes || 0).padStart(8, " ")} | ${(ms / 1000).toFixed(1).padStart(6, " ")}s`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Stop only when the API page itself is empty
|
// Stop only when the API page itself is empty
|
||||||
if (!rawCount) break;
|
if (!rawCount) break;
|
||||||
|
|
||||||
for (const it of items) discovered.set(it.url, it);
|
for (const it of items) discovered.set(it.url, it);
|
||||||
|
|
||||||
// Last page if API returned fewer than perPage
|
// Last page if API returned fewer than perPage
|
||||||
if (rawCount < perPage) break;
|
if (rawCount < perPage) break;
|
||||||
|
|
||||||
page++;
|
page++;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||||
|
|
||||||
const {
|
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
|
||||||
merged,
|
prevDb,
|
||||||
newItems,
|
discovered,
|
||||||
updatedItems,
|
{ storeLabel: ctx.store.name },
|
||||||
removedItems,
|
);
|
||||||
restoredItems,
|
|
||||||
metaChangedItems,
|
|
||||||
} = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsedMs = Date.now() - t0;
|
const elapsedMs = Date.now() - t0;
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: Math.max(0, page),
|
scannedPages: Math.max(0, page),
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
metaChangedCount: metaChangedItems.length,
|
metaChangedCount: metaChangedItems.length,
|
||||||
elapsedMs,
|
elapsedMs,
|
||||||
});
|
});
|
||||||
|
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
report.totals.metaChangedCount += metaChangedItems.length;
|
report.totals.metaChangedCount += metaChangedItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(
|
addCategoryResultToReport(
|
||||||
report,
|
report,
|
||||||
ctx.store.name,
|
ctx.store.name,
|
||||||
ctx.cat.label,
|
ctx.cat.label,
|
||||||
newItems,
|
newItems,
|
||||||
updatedItems,
|
updatedItems,
|
||||||
removedItems,
|
removedItems,
|
||||||
restoredItems
|
restoredItems,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
const ua = defaultUa;
|
const ua = defaultUa;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
key: "sierrasprings",
|
key: "sierrasprings",
|
||||||
name: "Sierra Springs",
|
name: "Sierra Springs",
|
||||||
host: "sierraspringsliquor.ca",
|
host: "sierraspringsliquor.ca",
|
||||||
ua,
|
ua,
|
||||||
parseProducts: parseProductsSierra,
|
parseProducts: parseProductsSierra,
|
||||||
|
|
||||||
// store-only override (no changes outside this file)
|
// store-only override (no changes outside this file)
|
||||||
scanCategory: scanCategoryWooStoreApi,
|
scanCategory: scanCategoryWooStoreApi,
|
||||||
|
|
||||||
// RESTORED: original 4 categories, unchanged startUrl so DB hashes match
|
// RESTORED: original 4 categories, unchanged startUrl so DB hashes match
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
|
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
|
||||||
discoveryStartPage: 1,
|
discoveryStartPage: 1,
|
||||||
perPage: 100,
|
perPage: 100,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "fine-rare",
|
key: "fine-rare",
|
||||||
label: "Fine & Rare",
|
label: "Fine & Rare",
|
||||||
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
|
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
|
||||||
discoveryStartPage: 1,
|
discoveryStartPage: 1,
|
||||||
perPage: 100,
|
perPage: 100,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "spirits-liquor",
|
key: "spirits-liquor",
|
||||||
label: "Spirits / Liquor",
|
label: "Spirits / Liquor",
|
||||||
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/",
|
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/",
|
||||||
discoveryStartPage: 1,
|
discoveryStartPage: 1,
|
||||||
perPage: 100,
|
perPage: 100,
|
||||||
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
|
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "spirits",
|
key: "spirits",
|
||||||
label: "Spirits",
|
label: "Spirits",
|
||||||
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
|
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
|
||||||
discoveryStartPage: 1,
|
discoveryStartPage: 1,
|
||||||
perPage: 100,
|
perPage: 100,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore, parseProductsSierra };
|
module.exports = { createStore, parseProductsSierra };
|
||||||
|
|
|
||||||
|
|
@ -10,502 +10,508 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractArticles(html) {
|
function extractArticles(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const parts = s.split(/<article\b/i);
|
const parts = s.split(/<article\b/i);
|
||||||
if (parts.length <= 1) return [];
|
if (parts.length <= 1) return [];
|
||||||
const out = [];
|
const out = [];
|
||||||
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
|
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizePrice(str) {
|
function normalizePrice(str) {
|
||||||
const s = String(str || "");
|
const s = String(str || "");
|
||||||
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
|
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
|
||||||
if (!m) return "";
|
if (!m) return "";
|
||||||
const raw = m[0].replace(/\s+/g, "");
|
const raw = m[0].replace(/\s+/g, "");
|
||||||
return raw.replace(/,/g, "");
|
return raw.replace(/,/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickPriceFromArticle(articleHtml) {
|
function pickPriceFromArticle(articleHtml) {
|
||||||
const a = String(articleHtml || "");
|
const a = String(articleHtml || "");
|
||||||
const noMember = a.replace(
|
const noMember = a.replace(/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi, " ");
|
||||||
/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi,
|
|
||||||
" "
|
|
||||||
);
|
|
||||||
|
|
||||||
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
|
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
|
||||||
if (ins && ins[1]) return normalizePrice(ins[1]);
|
if (ins && ins[1]) return normalizePrice(ins[1]);
|
||||||
|
|
||||||
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
|
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
|
||||||
if (reg && reg[1]) return normalizePrice(reg[1]);
|
if (reg && reg[1]) return normalizePrice(reg[1]);
|
||||||
|
|
||||||
const priceDiv = noMember.match(
|
const priceDiv = noMember.match(/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i);
|
||||||
/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i
|
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
|
||||||
);
|
|
||||||
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
|
|
||||||
|
|
||||||
return normalizePrice(scope);
|
return normalizePrice(scope);
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractProductIdFromArticle(articleHtml) {
|
function extractProductIdFromArticle(articleHtml) {
|
||||||
const a = String(articleHtml || "");
|
const a = String(articleHtml || "");
|
||||||
|
|
||||||
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
|
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
|
||||||
if (m && m[1]) return Number(m[1]);
|
if (m && m[1]) return Number(m[1]);
|
||||||
|
|
||||||
m = a.match(/\bpost-(\d{1,10})\b/i);
|
m = a.match(/\bpost-(\d{1,10})\b/i);
|
||||||
if (m && m[1]) return Number(m[1]);
|
if (m && m[1]) return Number(m[1]);
|
||||||
|
|
||||||
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
|
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
|
||||||
if (m && m[1]) return Number(m[1]);
|
if (m && m[1]) return Number(m[1]);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractSkuFromArticle(articleHtml) {
|
function extractSkuFromArticle(articleHtml) {
|
||||||
const a = String(articleHtml || "");
|
const a = String(articleHtml || "");
|
||||||
|
|
||||||
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
|
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
|
||||||
if (m && m[1]) return m[1];
|
if (m && m[1]) return m[1];
|
||||||
|
|
||||||
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
|
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
|
||||||
if (m && m[1]) return m[1];
|
if (m && m[1]) return m[1];
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function idFromImageUrl(imgUrl) {
|
function idFromImageUrl(imgUrl) {
|
||||||
const s = String(imgUrl || "");
|
const s = String(imgUrl || "");
|
||||||
// /1487-1_... or /1487_... or /1487-... => 1487
|
// /1487-1_... or /1487_... or /1487-... => 1487
|
||||||
const m = s.match(/\/(\d{1,11})(?=[-_])/);
|
const m = s.match(/\/(\d{1,11})(?=[-_])/);
|
||||||
return m && m[1] ? `id:${m[1]}` : "";
|
return m && m[1] ? `id:${m[1]}` : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function looksInStock(articleHtml) {
|
function looksInStock(articleHtml) {
|
||||||
const a = String(articleHtml || "");
|
const a = String(articleHtml || "");
|
||||||
|
|
||||||
if (/\boutofstock\b/i.test(a)) return false;
|
if (/\boutofstock\b/i.test(a)) return false;
|
||||||
if (/Currently\s+Unavailable/i.test(a)) return false;
|
if (/Currently\s+Unavailable/i.test(a)) return false;
|
||||||
|
|
||||||
if (/\binstock\b/i.test(a)) return true;
|
if (/\binstock\b/i.test(a)) return true;
|
||||||
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
|
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
|
||||||
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
|
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
|
||||||
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
|
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
|
||||||
|
|
||||||
return /\binstock\b/i.test(a);
|
return /\binstock\b/i.test(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductFromArticle(articleHtml) {
|
function parseProductFromArticle(articleHtml) {
|
||||||
const a = String(articleHtml || "");
|
const a = String(articleHtml || "");
|
||||||
|
|
||||||
if (!looksInStock(a)) return null;
|
if (!looksInStock(a)) return null;
|
||||||
|
|
||||||
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
|
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
|
||||||
if (!hrefM || !hrefM[1]) return null;
|
if (!hrefM || !hrefM[1]) return null;
|
||||||
|
|
||||||
let url;
|
let url;
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
|
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
|
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
|
||||||
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
|
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
|
||||||
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
|
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
|
||||||
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
|
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
|
||||||
const name = cleanText([title, sub].filter(Boolean).join(" - "));
|
const name = cleanText([title, sub].filter(Boolean).join(" - "));
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
const price = pickPriceFromArticle(a);
|
const price = pickPriceFromArticle(a);
|
||||||
const productId = extractProductIdFromArticle(a);
|
const productId = extractProductIdFromArticle(a);
|
||||||
|
|
||||||
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
|
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
|
||||||
|
|
||||||
const skuFromHtml = extractSkuFromArticle(a);
|
const skuFromHtml = extractSkuFromArticle(a);
|
||||||
const skuFromImg = idFromImageUrl(img);
|
const skuFromImg = idFromImageUrl(img);
|
||||||
const fallbackSku = normalizeCspc(url) || "";
|
const fallbackSku = normalizeCspc(url) || "";
|
||||||
|
|
||||||
return {
|
|
||||||
name,
|
|
||||||
price,
|
|
||||||
url,
|
|
||||||
sku: skuFromHtml || skuFromImg || fallbackSku,
|
|
||||||
productId,
|
|
||||||
img,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
price,
|
||||||
|
url,
|
||||||
|
sku: skuFromHtml || skuFromImg || fallbackSku,
|
||||||
|
productId,
|
||||||
|
img,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Store API paging ---------------- */
|
/* ---------------- Store API paging ---------------- */
|
||||||
|
|
||||||
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
|
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
|
||||||
const u = new URL(startUrl);
|
const u = new URL(startUrl);
|
||||||
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
|
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
|
||||||
|
|
||||||
api.searchParams.set("order", "desc");
|
api.searchParams.set("order", "desc");
|
||||||
api.searchParams.set("orderby", "date");
|
api.searchParams.set("orderby", "date");
|
||||||
|
|
||||||
const stock = u.searchParams.get("_sfm__stock_status");
|
const stock = u.searchParams.get("_sfm__stock_status");
|
||||||
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
|
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
|
||||||
|
|
||||||
const pr = u.searchParams.get("_sfm__regular_price");
|
const pr = u.searchParams.get("_sfm__regular_price");
|
||||||
if (pr) {
|
if (pr) {
|
||||||
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
|
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
|
||||||
if (m) {
|
if (m) {
|
||||||
api.searchParams.set("min_price", m[1]);
|
api.searchParams.set("min_price", m[1]);
|
||||||
api.searchParams.set("max_price", m[2]);
|
api.searchParams.set("max_price", m[2]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return api;
|
return api;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasCategorySlug(p, wanted) {
|
function hasCategorySlug(p, wanted) {
|
||||||
const w = String(wanted || "").trim().toLowerCase();
|
const w = String(wanted || "")
|
||||||
if (!w) return true;
|
.trim()
|
||||||
|
.toLowerCase();
|
||||||
|
if (!w) return true;
|
||||||
|
|
||||||
const cats = Array.isArray(p?.categories) ? p.categories : [];
|
const cats = Array.isArray(p?.categories) ? p.categories : [];
|
||||||
for (const c of cats) {
|
for (const c of cats) {
|
||||||
const slug = String(c?.slug || "").trim().toLowerCase();
|
const slug = String(c?.slug || "")
|
||||||
if (slug === w) return true;
|
.trim()
|
||||||
}
|
.toLowerCase();
|
||||||
return false;
|
if (slug === w) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductUrl(p) {
|
function normalizeProductUrl(p) {
|
||||||
const u = String(p?.permalink || p?.link || "").trim();
|
const u = String(p?.permalink || p?.link || "").trim();
|
||||||
return u && u.startsWith("http") ? u : "";
|
return u && u.startsWith("http") ? u : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductName(p) {
|
function normalizeProductName(p) {
|
||||||
// Store API "name" can contain HTML entities like – and sometimes markup like <em>
|
// Store API "name" can contain HTML entities like – and sometimes markup like <em>
|
||||||
const raw = String(p?.name || "");
|
const raw = String(p?.name || "");
|
||||||
return cleanText(decodeHtml(stripTags(raw)));
|
return cleanText(decodeHtml(stripTags(raw)));
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductImage(p) {
|
function normalizeProductImage(p) {
|
||||||
const imgs = Array.isArray(p?.images) ? p.images : [];
|
const imgs = Array.isArray(p?.images) ? p.images : [];
|
||||||
for (const im of imgs) {
|
for (const im of imgs) {
|
||||||
if (!im) continue;
|
if (!im) continue;
|
||||||
const raw =
|
const raw =
|
||||||
(typeof im === "string" ? im : "") ||
|
(typeof im === "string" ? im : "") ||
|
||||||
(typeof im?.src === "string" ? im.src : "") ||
|
(typeof im?.src === "string" ? im.src : "") ||
|
||||||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
|
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
|
||||||
(typeof im?.url === "string" ? im.url : "");
|
(typeof im?.url === "string" ? im.url : "");
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) continue;
|
if (!s) continue;
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
|
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
|
||||||
if (!direct) return "";
|
if (!direct) return "";
|
||||||
return direct.startsWith("//") ? `https:${direct}` : direct;
|
return direct.startsWith("//") ? `https:${direct}` : direct;
|
||||||
}
|
}
|
||||||
|
|
||||||
function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
|
function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
|
||||||
const mu = Number(minorUnit);
|
const mu = Number(minorUnit);
|
||||||
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
|
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
|
||||||
const v = String(valueStr || "").trim();
|
const v = String(valueStr || "").trim();
|
||||||
if (!/^\d+$/.test(v)) return "";
|
if (!/^\d+$/.test(v)) return "";
|
||||||
|
|
||||||
// Use integer math to avoid float rounding issues
|
// Use integer math to avoid float rounding issues
|
||||||
const pad = "0".repeat(mu);
|
const pad = "0".repeat(mu);
|
||||||
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
|
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
|
||||||
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
|
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
|
||||||
const frac = mu === 0 ? "" : s.slice(s.length - mu);
|
const frac = mu === 0 ? "" : s.slice(s.length - mu);
|
||||||
return mu === 0 ? whole : `${whole}.${frac}`;
|
return mu === 0 ? whole : `${whole}.${frac}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductPrice(p) {
|
function normalizeProductPrice(p) {
|
||||||
const prices = p?.prices;
|
const prices = p?.prices;
|
||||||
|
|
||||||
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
|
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
|
||||||
if (prices && typeof prices === "object") {
|
if (prices && typeof prices === "object") {
|
||||||
const minor = prices.currency_minor_unit;
|
const minor = prices.currency_minor_unit;
|
||||||
const sale = String(prices.sale_price || "").trim();
|
const sale = String(prices.sale_price || "").trim();
|
||||||
const regular = String(prices.regular_price || "").trim();
|
const regular = String(prices.regular_price || "").trim();
|
||||||
const chosen = sale || regular;
|
const chosen = sale || regular;
|
||||||
|
|
||||||
if (chosen) {
|
if (chosen) {
|
||||||
let numeric = chosen;
|
let numeric = chosen;
|
||||||
|
|
||||||
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
|
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
|
||||||
const converted = toMoneyStringFromMinorUnits(chosen, minor);
|
const converted = toMoneyStringFromMinorUnits(chosen, minor);
|
||||||
if (converted) numeric = converted;
|
if (converted) numeric = converted;
|
||||||
}
|
}
|
||||||
|
|
||||||
const num = Number(numeric);
|
const num = Number(numeric);
|
||||||
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
|
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const raw = String(p?.price || p?.price_html || "").trim();
|
const raw = String(p?.price || p?.price_html || "").trim();
|
||||||
const norm = normalizePrice(raw);
|
const norm = normalizePrice(raw);
|
||||||
return norm;
|
return norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductSku(p) {
|
function normalizeProductSku(p) {
|
||||||
const sku = String(p?.sku || "").trim();
|
const sku = String(p?.sku || "").trim();
|
||||||
if (/^\d{6}$/.test(sku)) return sku;
|
if (/^\d{6}$/.test(sku)) return sku;
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeProductId(p) {
|
function normalizeProductId(p) {
|
||||||
const id = Number(p?.id);
|
const id = Number(p?.id);
|
||||||
return Number.isFinite(id) ? id : 0;
|
return Number.isFinite(id) ? id : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
|
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
|
||||||
const u = new URL(apiBaseUrl.toString());
|
const u = new URL(apiBaseUrl.toString());
|
||||||
u.searchParams.set("page", String(page));
|
u.searchParams.set("page", String(page));
|
||||||
u.searchParams.set("per_page", String(perPage));
|
u.searchParams.set("per_page", String(perPage));
|
||||||
|
|
||||||
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
Accept: "application/json",
|
||||||
Referer: ctx.cat.startUrl,
|
Referer: ctx.cat.startUrl,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
|
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
|
||||||
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
|
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
|
||||||
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
|
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
|
||||||
|
|
||||||
if (prevSize <= 0 || discSize <= 0) return false;
|
if (prevSize <= 0 || discSize <= 0) return false;
|
||||||
|
|
||||||
const ratio = discSize / Math.max(1, prevSize);
|
const ratio = discSize / Math.max(1, prevSize);
|
||||||
if (ratio >= 0.6) return false;
|
if (ratio >= 0.6) return false;
|
||||||
|
|
||||||
ctx.logger.warn?.(
|
ctx.logger.warn?.(
|
||||||
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
|
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (prevDb && typeof prevDb.entries === "function") {
|
if (prevDb && typeof prevDb.entries === "function") {
|
||||||
for (const [k, v] of prevDb.entries()) {
|
for (const [k, v] of prevDb.entries()) {
|
||||||
if (!discovered.has(k)) discovered.set(k, v);
|
if (!discovered.has(k)) discovered.set(k, v);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryStrath(ctx, prevDb, report) {
|
async function scanCategoryStrath(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
// Listing HTML (seed + sanity)
|
// Listing HTML (seed + sanity)
|
||||||
let html = "";
|
let html = "";
|
||||||
let listingFinalUrl = ctx.cat.startUrl;
|
let listingFinalUrl = ctx.cat.startUrl;
|
||||||
let listingStatus = 0;
|
let listingStatus = 0;
|
||||||
let listingBytes = 0;
|
let listingBytes = 0;
|
||||||
let listingMs = 0;
|
let listingMs = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
|
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
|
||||||
html = r.text || "";
|
html = r.text || "";
|
||||||
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
|
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
|
||||||
listingStatus = r.status || 0;
|
listingStatus = r.status || 0;
|
||||||
listingBytes = r.bytes || 0;
|
listingBytes = r.bytes || 0;
|
||||||
listingMs = r.ms || 0;
|
listingMs = r.ms || 0;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
const listingArticles = extractArticles(html);
|
const listingArticles = extractArticles(html);
|
||||||
let listingItems = 0;
|
let listingItems = 0;
|
||||||
for (const art of listingArticles) {
|
for (const art of listingArticles) {
|
||||||
const it = parseProductFromArticle(art);
|
const it = parseProductFromArticle(art);
|
||||||
if (it) {
|
if (it) {
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
listingItems++;
|
listingItems++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
|
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
|
||||||
listingItems,
|
listingItems,
|
||||||
3
|
3,
|
||||||
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`
|
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
|
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
|
||||||
|
|
||||||
const perPage = 100;
|
const perPage = 100;
|
||||||
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
||||||
|
|
||||||
const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase();
|
const wantedSlug = String(ctx.cat.apiCategorySlug || "")
|
||||||
|
.trim()
|
||||||
|
.toLowerCase();
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
let emptyMatchPages = 0;
|
let emptyMatchPages = 0;
|
||||||
|
|
||||||
for (let page = 1; page <= maxPagesCap; page++) {
|
for (let page = 1; page <= maxPagesCap; page++) {
|
||||||
let r;
|
let r;
|
||||||
try {
|
try {
|
||||||
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
|
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
|
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const arr = Array.isArray(r?.json) ? r.json : [];
|
const arr = Array.isArray(r?.json) ? r.json : [];
|
||||||
donePages++;
|
donePages++;
|
||||||
|
|
||||||
if (!arr.length) break;
|
if (!arr.length) break;
|
||||||
|
|
||||||
let kept = 0;
|
let kept = 0;
|
||||||
|
|
||||||
for (const p of arr) {
|
for (const p of arr) {
|
||||||
const stock = String(p?.stock_status || "").toLowerCase();
|
const stock = String(p?.stock_status || "").toLowerCase();
|
||||||
if (stock && stock !== "instock") continue;
|
if (stock && stock !== "instock") continue;
|
||||||
|
|
||||||
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
|
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
|
||||||
|
|
||||||
const url = normalizeProductUrl(p);
|
const url = normalizeProductUrl(p);
|
||||||
if (!url) continue;
|
if (!url) continue;
|
||||||
|
|
||||||
const name = normalizeProductName(p);
|
const name = normalizeProductName(p);
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = normalizeProductPrice(p);
|
const price = normalizeProductPrice(p);
|
||||||
const sku = normalizeProductSku(p);
|
const sku = normalizeProductSku(p);
|
||||||
const productId = normalizeProductId(p);
|
const productId = normalizeProductId(p);
|
||||||
|
|
||||||
|
const prev = discovered.get(url) || null;
|
||||||
|
|
||||||
const prev = discovered.get(url) || null;
|
const apiImg = normalizeProductImage(p) || "";
|
||||||
|
const img = apiImg || (prev && prev.img) || "";
|
||||||
|
|
||||||
const apiImg = normalizeProductImage(p) || "";
|
const skuFromApiImg = idFromImageUrl(apiImg);
|
||||||
const img = apiImg || (prev && prev.img) || "";
|
const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || "";
|
||||||
|
|
||||||
const skuFromApiImg = idFromImageUrl(apiImg);
|
const newSku = sku || fallbackSku;
|
||||||
const fallbackSku = sku || skuFromApiImg || normalizeCspc(url) || "";
|
const mergedSku = pickBetterSku(newSku, prev && prev.sku);
|
||||||
|
|
||||||
const newSku = sku || fallbackSku;
|
discovered.set(url, {
|
||||||
const mergedSku = pickBetterSku(newSku, prev && prev.sku);
|
name,
|
||||||
|
price,
|
||||||
|
url,
|
||||||
|
sku: mergedSku,
|
||||||
|
productId,
|
||||||
|
img,
|
||||||
|
});
|
||||||
|
kept++;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
|
||||||
|
kept,
|
||||||
|
3,
|
||||||
|
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
|
);
|
||||||
|
|
||||||
discovered.set(url, {
|
if (wantedSlug) {
|
||||||
name,
|
if (kept === 0) emptyMatchPages++;
|
||||||
price,
|
else emptyMatchPages = 0;
|
||||||
url,
|
|
||||||
sku: mergedSku,
|
|
||||||
productId,
|
|
||||||
img,
|
|
||||||
});
|
|
||||||
kept++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.logger.ok(
|
// If filter is tight (rum), stop after 2 empty pages in a row.
|
||||||
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
|
if (emptyMatchPages >= 2) break;
|
||||||
kept,
|
}
|
||||||
3
|
|
||||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (wantedSlug) {
|
if (arr.length < perPage) break;
|
||||||
if (kept === 0) emptyMatchPages++;
|
}
|
||||||
else emptyMatchPages = 0;
|
|
||||||
|
|
||||||
// If filter is tight (rum), stop after 2 empty pages in a row.
|
if (prevDb && typeof prevDb.size === "number") {
|
||||||
if (emptyMatchPages >= 2) break;
|
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arr.length < perPage) break;
|
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||||
}
|
|
||||||
|
|
||||||
if (prevDb && typeof prevDb.size === "number") {
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
|
storeLabel: ctx.store.name,
|
||||||
}
|
});
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
storeLabel: ctx.store.name,
|
|
||||||
});
|
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const elapsed = Date.now() - t0;
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
|
);
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
report.categories.push({
|
||||||
|
store: ctx.store.name,
|
||||||
|
label: ctx.cat.label,
|
||||||
|
key: ctx.cat.key,
|
||||||
|
dbFile: ctx.dbFile,
|
||||||
|
scannedPages: 1 + Math.max(0, donePages),
|
||||||
|
discoveredUnique: discovered.size,
|
||||||
|
newCount: newItems.length,
|
||||||
|
updatedCount: updatedItems.length,
|
||||||
|
removedCount: removedItems.length,
|
||||||
|
restoredCount: restoredItems.length,
|
||||||
|
elapsedMs: elapsed,
|
||||||
|
});
|
||||||
|
report.totals.newCount += newItems.length;
|
||||||
|
report.totals.updatedCount += updatedItems.length;
|
||||||
|
report.totals.removedCount += removedItems.length;
|
||||||
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
addCategoryResultToReport(
|
||||||
ctx.logger.ok(
|
report,
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
ctx.store.name,
|
||||||
);
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
report.categories.push({
|
updatedItems,
|
||||||
store: ctx.store.name,
|
removedItems,
|
||||||
label: ctx.cat.label,
|
restoredItems,
|
||||||
key: ctx.cat.key,
|
);
|
||||||
dbFile: ctx.dbFile,
|
|
||||||
scannedPages: 1 + Math.max(0, donePages),
|
|
||||||
discoveredUnique: discovered.size,
|
|
||||||
newCount: newItems.length,
|
|
||||||
updatedCount: updatedItems.length,
|
|
||||||
removedCount: removedItems.length,
|
|
||||||
restoredCount: restoredItems.length,
|
|
||||||
elapsedMs: elapsed,
|
|
||||||
});
|
|
||||||
report.totals.newCount += newItems.length;
|
|
||||||
report.totals.updatedCount += updatedItems.length;
|
|
||||||
report.totals.removedCount += removedItems.length;
|
|
||||||
report.totals.restoredCount += restoredItems.length;
|
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "strath",
|
key: "strath",
|
||||||
name: "Strath Liquor",
|
name: "Strath Liquor",
|
||||||
host: "www.strathliquor.com",
|
host: "www.strathliquor.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryStrath,
|
scanCategory: scanCategoryStrath,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
apiCategorySlug: "whisky",
|
apiCategorySlug: "whisky",
|
||||||
startUrl:
|
startUrl:
|
||||||
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
|
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "spirits-rum",
|
key: "spirits-rum",
|
||||||
label: "Spirits - Rum",
|
label: "Spirits - Rum",
|
||||||
apiCategorySlug: "rum",
|
apiCategorySlug: "rum",
|
||||||
startUrl:
|
startUrl:
|
||||||
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
|
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -19,192 +19,191 @@ const GQL_URL = "https://production-storefront-api-mlwv4nj3rq-uc.a.run.app/graph
|
||||||
/* ---------------- formatting ---------------- */
|
/* ---------------- formatting ---------------- */
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const t = Math.round(s * 10) / 10;
|
const t = Math.round(s * 10) / 10;
|
||||||
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
||||||
}
|
}
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const w = String(total).length;
|
const w = String(total).length;
|
||||||
return `${padLeft(i, w)}/${total}`;
|
return `${padLeft(i, w)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- helpers ---------------- */
|
/* ---------------- helpers ---------------- */
|
||||||
|
|
||||||
function money(n) {
|
function money(n) {
|
||||||
const x = Number(n);
|
const x = Number(n);
|
||||||
return Number.isFinite(x) ? `$${x.toFixed(2)}` : "";
|
return Number.isFinite(x) ? `$${x.toFixed(2)}` : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function firstNonEmptyStr(...vals) {
|
function firstNonEmptyStr(...vals) {
|
||||||
for (const v of vals) {
|
for (const v of vals) {
|
||||||
const s = typeof v === "string" ? v.trim() : "";
|
const s = typeof v === "string" ? v.trim() : "";
|
||||||
if (s) return s;
|
if (s) return s;
|
||||||
if (Array.isArray(v)) {
|
if (Array.isArray(v)) {
|
||||||
for (const a of v) {
|
for (const a of v) {
|
||||||
if (typeof a === "string" && a.trim()) return a.trim();
|
if (typeof a === "string" && a.trim()) return a.trim();
|
||||||
if (a && typeof a === "object") {
|
if (a && typeof a === "object") {
|
||||||
const u = String(a.url || a.src || a.image || "").trim();
|
const u = String(a.url || a.src || a.image || "").trim();
|
||||||
if (u) return u;
|
if (u) return u;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeAbsUrl(raw) {
|
function normalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, `${BASE}/`).toString();
|
return new URL(s, `${BASE}/`).toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Treat u:* as synthetic (URL-hash fallback) and eligible for repair.
|
// Treat u:* as synthetic (URL-hash fallback) and eligible for repair.
|
||||||
function isSyntheticSku(sku) {
|
function isSyntheticSku(sku) {
|
||||||
const s = String(sku || "").trim();
|
const s = String(sku || "").trim();
|
||||||
return !s || /^u:/i.test(s);
|
return !s || /^u:/i.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If SKU is <6 chars, namespace it (per your request) to reduce collisions.
|
// If SKU is <6 chars, namespace it (per your request) to reduce collisions.
|
||||||
// Also: DO NOT run numeric SKUs through normalizeCspc (some normalizers hash arbitrary strings).
|
// Also: DO NOT run numeric SKUs through normalizeCspc (some normalizers hash arbitrary strings).
|
||||||
function normalizeTudorSku(rawSku) {
|
function normalizeTudorSku(rawSku) {
|
||||||
const s = String(rawSku || "").trim();
|
const s = String(rawSku || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
|
|
||||||
if (/^id:/i.test(s)) return s;
|
if (/^id:/i.test(s)) return s;
|
||||||
if (/^u:/i.test(s)) return s;
|
if (/^u:/i.test(s)) return s;
|
||||||
|
|
||||||
// numeric SKU like 67433
|
// numeric SKU like 67433
|
||||||
if (/^\d+$/.test(s)) {
|
if (/^\d+$/.test(s)) {
|
||||||
return s.length < 6 ? `id:${s}` : s;
|
return s.length < 6 ? `id:${s}` : s;
|
||||||
}
|
}
|
||||||
|
|
||||||
// short alnum SKU -> namespace
|
// short alnum SKU -> namespace
|
||||||
if (s.length < 6) return `id:${s}`;
|
if (s.length < 6) return `id:${s}`;
|
||||||
|
|
||||||
// for other formats, keep your existing normalization
|
// for other formats, keep your existing normalization
|
||||||
// (if normalizeCspc returns empty, fall back to the raw string)
|
// (if normalizeCspc returns empty, fall back to the raw string)
|
||||||
return normalizeCspc(s) || s;
|
return normalizeCspc(s) || s;
|
||||||
}
|
}
|
||||||
|
|
||||||
function tudorProductUrl(ctx, slug) {
|
function tudorProductUrl(ctx, slug) {
|
||||||
// Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug>
|
// Site URLs look like: /TUDOR_HOUSE_0/product/spirits/<subcat>/<slug>
|
||||||
const root = ctx?.cat?.tudorRootSlug || "spirits";
|
const root = ctx?.cat?.tudorRootSlug || "spirits";
|
||||||
const sub = ctx?.cat?.tudorSubSlug || "";
|
const sub = ctx?.cat?.tudorSubSlug || "";
|
||||||
const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`;
|
const path = `/${STORE_ID}/product/${encodeURIComponent(root)}/${encodeURIComponent(sub)}/${encodeURIComponent(slug)}`;
|
||||||
return new URL(path, BASE).toString();
|
return new URL(path, BASE).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseVolumeMl(v) {
|
function parseVolumeMl(v) {
|
||||||
const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase();
|
const raw = String(v?.volume || v?.shortName || v?.fullName || "").toUpperCase();
|
||||||
|
|
||||||
// Match "1.75L", "1L", "750ML", etc.
|
// Match "1.75L", "1L", "750ML", etc.
|
||||||
const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/);
|
const m = raw.match(/(\d+(?:\.\d+)?)\s*(ML|L)\b/);
|
||||||
if (!m) return null;
|
if (!m) return null;
|
||||||
|
|
||||||
const n = Number(m[1]);
|
const n = Number(m[1]);
|
||||||
if (!Number.isFinite(n)) return null;
|
if (!Number.isFinite(n)) return null;
|
||||||
|
|
||||||
return m[2] === "L" ? Math.round(n * 1000) : Math.round(n);
|
return m[2] === "L" ? Math.round(n * 1000) : Math.round(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
function tudorPickVariant(p) {
|
function tudorPickVariant(p) {
|
||||||
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
||||||
const inStock = vs.filter((v) => Number(v?.quantity) > 0);
|
const inStock = vs.filter((v) => Number(v?.quantity) > 0);
|
||||||
const pool = inStock.length ? inStock : vs;
|
const pool = inStock.length ? inStock : vs;
|
||||||
if (!pool.length) return null;
|
if (!pool.length) return null;
|
||||||
if (pool.length === 1) return pool[0];
|
if (pool.length === 1) return pool[0];
|
||||||
|
|
||||||
let best = pool[0];
|
let best = pool[0];
|
||||||
let bestVol = parseVolumeMl(best);
|
let bestVol = parseVolumeMl(best);
|
||||||
let bestPrice = Number(best?.price);
|
let bestPrice = Number(best?.price);
|
||||||
|
|
||||||
for (let i = 1; i < pool.length; i++) {
|
for (let i = 1; i < pool.length; i++) {
|
||||||
const v = pool[i];
|
const v = pool[i];
|
||||||
const vol = parseVolumeMl(v);
|
const vol = parseVolumeMl(v);
|
||||||
const price = Number(v?.price);
|
const price = Number(v?.price);
|
||||||
|
|
||||||
const volA = bestVol == null ? -1 : bestVol;
|
const volA = bestVol == null ? -1 : bestVol;
|
||||||
const volB = vol == null ? -1 : vol;
|
const volB = vol == null ? -1 : vol;
|
||||||
|
|
||||||
// 1) largest volume wins
|
// 1) largest volume wins
|
||||||
if (volB > volA) {
|
if (volB > volA) {
|
||||||
best = v;
|
best = v;
|
||||||
bestVol = vol;
|
bestVol = vol;
|
||||||
bestPrice = price;
|
bestPrice = price;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (volB < volA) continue;
|
if (volB < volA) continue;
|
||||||
|
|
||||||
// 2) tie-break: higher price wins
|
// 2) tie-break: higher price wins
|
||||||
const priceA = Number.isFinite(bestPrice) ? bestPrice : -1;
|
const priceA = Number.isFinite(bestPrice) ? bestPrice : -1;
|
||||||
const priceB = Number.isFinite(price) ? price : -1;
|
const priceB = Number.isFinite(price) ? price : -1;
|
||||||
if (priceB > priceA) {
|
if (priceB > priceA) {
|
||||||
best = v;
|
best = v;
|
||||||
bestVol = vol;
|
bestVol = vol;
|
||||||
bestPrice = price;
|
bestPrice = price;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return best;
|
return best;
|
||||||
}
|
}
|
||||||
function parseDisplayPriceFromHtml(html) {
|
function parseDisplayPriceFromHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
// Narrow to the main price container first (avoid grabbing retail-price)
|
// Narrow to the main price container first (avoid grabbing retail-price)
|
||||||
const block =
|
const block =
|
||||||
s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) ||
|
s.match(/<div[^>]*class=["'][^"']*price-container[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i) ||
|
||||||
s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i);
|
s.match(/<div[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>([\s\S]{0,800})<\/div>/i);
|
||||||
|
|
||||||
const hay = block ? block[1] : s;
|
const hay = block ? block[1] : s;
|
||||||
|
|
||||||
// Remove retail-price spans so we pick the live price first
|
// Remove retail-price spans so we pick the live price first
|
||||||
const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
|
const cleaned = hay.replace(/<span[^>]*class=["'][^"']*retail-price[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
|
||||||
|
|
||||||
const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/);
|
const m = cleaned.match(/\$\s*([0-9]+(?:\.[0-9]{2})?)/);
|
||||||
if (!m) return null;
|
if (!m) return null;
|
||||||
|
|
||||||
const n = Number(m[1]);
|
const n = Number(m[1]);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function pickAnySkuFromProduct(p) {
|
function pickAnySkuFromProduct(p) {
|
||||||
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
||||||
for (const v of vs) {
|
for (const v of vs) {
|
||||||
const s = String(v?.sku || "").trim();
|
const s = String(v?.sku || "").trim();
|
||||||
if (s) return s;
|
if (s) return s;
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickInStockVariantWithFallback(p) {
|
function pickInStockVariantWithFallback(p) {
|
||||||
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
const vs = Array.isArray(p?.variants) ? p.variants : [];
|
||||||
const inStock = vs.find((v) => Number(v?.quantity) > 0);
|
const inStock = vs.find((v) => Number(v?.quantity) > 0);
|
||||||
return inStock || vs[0] || null;
|
return inStock || vs[0] || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- GraphQL ---------------- */
|
/* ---------------- GraphQL ---------------- */
|
||||||
|
|
||||||
async function tudorGql(ctx, label, query, variables) {
|
async function tudorGql(ctx, label, query, variables) {
|
||||||
return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(GQL_URL, label, ctx.store.ua, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
Accept: "application/json",
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
Origin: BASE,
|
Origin: BASE,
|
||||||
Referer: `${BASE}/`,
|
Referer: `${BASE}/`,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({ query, variables }),
|
body: JSON.stringify({ query, variables }),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- GQL queries ---------------- */
|
/* ---------------- GQL queries ---------------- */
|
||||||
|
|
@ -300,63 +299,63 @@ const PRODUCTS_BY_SKU_QUERY = `
|
||||||
`;
|
`;
|
||||||
|
|
||||||
async function fetchProductsPage(ctx, cursor) {
|
async function fetchProductsPage(ctx, cursor) {
|
||||||
const vars = {
|
const vars = {
|
||||||
storeId: STORE_ID,
|
storeId: STORE_ID,
|
||||||
allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug],
|
allTags: ctx.cat.tudorAllTags || ["spirits", ctx.cat.tudorSubSlug],
|
||||||
anyTags: null,
|
anyTags: null,
|
||||||
pageCursor: cursor || null,
|
pageCursor: cursor || null,
|
||||||
pageLimit: 100,
|
pageLimit: 100,
|
||||||
sortBy: "name",
|
sortBy: "name",
|
||||||
sortOrder: "asc",
|
sortOrder: "asc",
|
||||||
priceMin: null,
|
priceMin: null,
|
||||||
priceMax: null,
|
priceMax: null,
|
||||||
quantityMin: null,
|
quantityMin: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars);
|
const r = await tudorGql(ctx, `tudor:gql:products:${ctx.cat.key}`, PRODUCTS_QUERY, vars);
|
||||||
|
|
||||||
if (r?.status !== 200 || !r?.json?.data?.products) {
|
if (r?.status !== 200 || !r?.json?.data?.products) {
|
||||||
const errs = Array.isArray(r?.json?.errors) ? r.json.errors : [];
|
const errs = Array.isArray(r?.json?.errors) ? r.json.errors : [];
|
||||||
const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`;
|
const msg = errs.length ? errs.map((e) => e?.message || String(e)).join(" | ") : `HTTP ${r?.status}`;
|
||||||
throw new Error(`Tudor products query failed: ${msg}`);
|
throw new Error(`Tudor products query failed: ${msg}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return r.json.data.products;
|
return r.json.data.products;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- GQL bySku helper (image-only within budget) ---------------- */
|
/* ---------------- GQL bySku helper (image-only within budget) ---------------- */
|
||||||
|
|
||||||
async function fetchProductBySku(ctx, sku) {
|
async function fetchProductBySku(ctx, sku) {
|
||||||
const s = String(sku || "").trim();
|
const s = String(sku || "").trim();
|
||||||
if (!s) return null;
|
if (!s) return null;
|
||||||
|
|
||||||
if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map();
|
if (!ctx._tudorSkuCache) ctx._tudorSkuCache = new Map();
|
||||||
if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s);
|
if (ctx._tudorSkuCache.has(s)) return ctx._tudorSkuCache.get(s);
|
||||||
|
|
||||||
const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, {
|
const r = await tudorGql(ctx, `tudor:gql:bySku:${ctx.cat.key}:${s}`, PRODUCTS_BY_SKU_QUERY, {
|
||||||
sku: s,
|
sku: s,
|
||||||
storeId: STORE_ID,
|
storeId: STORE_ID,
|
||||||
});
|
});
|
||||||
|
|
||||||
let out = null;
|
let out = null;
|
||||||
if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) {
|
if (r?.status === 200 && r?.json?.data?.productsBySku?.items?.length) {
|
||||||
out = r.json.data.productsBySku.items[0] || null;
|
out = r.json.data.productsBySku.items[0] || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx._tudorSkuCache.set(s, out);
|
ctx._tudorSkuCache.set(s, out);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function supplementImageFromSku(ctx, skuProbe) {
|
async function supplementImageFromSku(ctx, skuProbe) {
|
||||||
const prod = await fetchProductBySku(ctx, skuProbe);
|
const prod = await fetchProductBySku(ctx, skuProbe);
|
||||||
if (!prod) return null;
|
if (!prod) return null;
|
||||||
|
|
||||||
const v = pickInStockVariantWithFallback(prod);
|
const v = pickInStockVariantWithFallback(prod);
|
||||||
const img = normalizeAbsUrl(
|
const img = normalizeAbsUrl(
|
||||||
firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds)
|
firstNonEmptyStr(v?.image, prod?.gulpImages, prod?.posImages, prod?.customImages, prod?.imageIds),
|
||||||
);
|
);
|
||||||
|
|
||||||
return img ? { img } : null;
|
return img ? { img } : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- HTML product page fallback (SKU + optional image) ---------------- */
|
/* ---------------- HTML product page fallback (SKU + optional image) ---------------- */
|
||||||
|
|
@ -366,333 +365,335 @@ const DETAIL_HTML_BUDGET_DEFAULT = 200;
|
||||||
const DETAIL_GQL_BUDGET_DEFAULT = 10;
|
const DETAIL_GQL_BUDGET_DEFAULT = 10;
|
||||||
|
|
||||||
function parseSkuFromHtml(html) {
|
function parseSkuFromHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
// 1) Visible block: <div class="sku ...">SKU: 67433</div>
|
// 1) Visible block: <div class="sku ...">SKU: 67433</div>
|
||||||
const m1 =
|
const m1 = s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) || s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
|
||||||
s.match(/>\s*SKU:\s*([A-Za-z0-9._-]+)\s*</i) ||
|
if (m1 && m1[1]) return String(m1[1]).trim();
|
||||||
s.match(/\bSKU:\s*([A-Za-z0-9._-]+)\b/i);
|
|
||||||
if (m1 && m1[1]) return String(m1[1]).trim();
|
|
||||||
|
|
||||||
// 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"`
|
// 2) Embedded SAPPER preloaded JSON has variants with `"sku":"67433"`
|
||||||
const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i);
|
const m2 = s.match(/"sku"\s*:\s*"([^"]+)"/i);
|
||||||
return m2 && m2[1] ? String(m2[1]).trim() : "";
|
return m2 && m2[1] ? String(m2[1]).trim() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseOgImageFromHtml(html) {
|
function parseOgImageFromHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const m =
|
const m =
|
||||||
s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) ||
|
s.match(/property=["']og:image["'][^>]*content=["']([^"']+)["']/i) ||
|
||||||
s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i);
|
s.match(/name=["']twitter:image["'][^>]*content=["']([^"']+)["']/i);
|
||||||
return m ? String(m[1] || "").trim() : "";
|
return m ? String(m[1] || "").trim() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
async function tudorFetchHtml(ctx, label, url) {
|
async function tudorFetchHtml(ctx, label, url) {
|
||||||
// Use ctx.http so pacing/throttle is respected.
|
// Use ctx.http so pacing/throttle is respected.
|
||||||
if (ctx?.http?.fetchTextWithRetry) {
|
if (ctx?.http?.fetchTextWithRetry) {
|
||||||
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
|
return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "text/html,application/xhtml+xml",
|
Accept: "text/html,application/xhtml+xml",
|
||||||
Referer: `${BASE}/`,
|
Referer: `${BASE}/`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Best-effort fallback if your wrapper has a generic fetchWithRetry.
|
// Best-effort fallback if your wrapper has a generic fetchWithRetry.
|
||||||
if (ctx?.http?.fetchWithRetry) {
|
if (ctx?.http?.fetchWithRetry) {
|
||||||
const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, {
|
const r = await ctx.http.fetchWithRetry(url, label, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "text/html,application/xhtml+xml",
|
Accept: "text/html,application/xhtml+xml",
|
||||||
Referer: `${BASE}/`,
|
Referer: `${BASE}/`,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const body = r?.text ?? r?.body ?? r?.data ?? "";
|
const body = r?.text ?? r?.body ?? r?.data ?? "";
|
||||||
const text =
|
const text =
|
||||||
typeof body === "string"
|
typeof body === "string"
|
||||||
? body
|
? body
|
||||||
: Buffer.isBuffer(body)
|
: Buffer.isBuffer(body)
|
||||||
? body.toString("utf8")
|
? body.toString("utf8")
|
||||||
: body && typeof body === "object" && typeof body.toString === "function"
|
: body && typeof body === "object" && typeof body.toString === "function"
|
||||||
? body.toString()
|
? body.toString()
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms };
|
return { status: r?.status, text, bytes: r?.bytes, ms: r?.ms };
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry).");
|
throw new Error("No HTML fetch method available on ctx.http (need fetchTextWithRetry or fetchWithRetry).");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function tudorDetailFromProductPage(ctx, url) {
|
async function tudorDetailFromProductPage(ctx, url) {
|
||||||
if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map();
|
if (!ctx._tudorHtmlCache) ctx._tudorHtmlCache = new Map();
|
||||||
if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url);
|
if (ctx._tudorHtmlCache.has(url)) return ctx._tudorHtmlCache.get(url);
|
||||||
|
|
||||||
let out = null;
|
let out = null;
|
||||||
try {
|
try {
|
||||||
const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url);
|
const r = await tudorFetchHtml(ctx, `tudor:html:${ctx.cat.key}`, url);
|
||||||
if (r?.status === 200 && typeof r?.text === "string" && r.text.length) {
|
if (r?.status === 200 && typeof r?.text === "string" && r.text.length) {
|
||||||
const rawSku = parseSkuFromHtml(r.text);
|
const rawSku = parseSkuFromHtml(r.text);
|
||||||
const sku = normalizeTudorSku(rawSku);
|
const sku = normalizeTudorSku(rawSku);
|
||||||
const img = normalizeAbsUrl(parseOgImageFromHtml(r.text));
|
const img = normalizeAbsUrl(parseOgImageFromHtml(r.text));
|
||||||
const priceNum = parseDisplayPriceFromHtml(r.text);
|
const priceNum = parseDisplayPriceFromHtml(r.text);
|
||||||
|
|
||||||
out = { sku, img, priceNum };
|
out = { sku, img, priceNum };
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
out = null;
|
out = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx._tudorHtmlCache.set(url, out);
|
ctx._tudorHtmlCache.set(url, out);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ---------------- item builder (fast, no extra calls) ---------------- */
|
/* ---------------- item builder (fast, no extra calls) ---------------- */
|
||||||
|
|
||||||
function tudorItemFromProductFast(p, ctx) {
|
function tudorItemFromProductFast(p, ctx) {
|
||||||
if (!p) return null;
|
if (!p) return null;
|
||||||
|
|
||||||
const name = cleanText(p?.name || "");
|
const name = cleanText(p?.name || "");
|
||||||
const slug = String(p?.slug || "").trim();
|
const slug = String(p?.slug || "").trim();
|
||||||
if (!name || !slug) return null;
|
if (!name || !slug) return null;
|
||||||
|
|
||||||
const v = tudorPickVariant(p);
|
const v = tudorPickVariant(p);
|
||||||
if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock
|
if (v && Number(v?.quantity) <= 0) return null; // only keep in-stock
|
||||||
|
|
||||||
const url = tudorProductUrl(ctx, slug);
|
const url = tudorProductUrl(ctx, slug);
|
||||||
|
|
||||||
// NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products
|
// NOTE: fast-path price is a best-effort; may be overridden in repair pass for multi-variant products
|
||||||
const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo);
|
const price = money(v?.price ?? p?.priceFrom ?? p?.priceTo);
|
||||||
|
|
||||||
const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p);
|
const skuRaw = String(v?.sku || "").trim() || pickAnySkuFromProduct(p);
|
||||||
const sku = normalizeTudorSku(skuRaw);
|
const sku = normalizeTudorSku(skuRaw);
|
||||||
|
|
||||||
const img = normalizeAbsUrl(
|
const img = normalizeAbsUrl(firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds));
|
||||||
firstNonEmptyStr(v?.image, p?.gulpImages, p?.posImages, p?.customImages, p?.imageIds)
|
|
||||||
);
|
|
||||||
|
|
||||||
// NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price
|
// NEW: keep lightweight variant snapshot so repair can match HTML SKU -> exact GQL variant price
|
||||||
const variants = Array.isArray(p?.variants)
|
const variants = Array.isArray(p?.variants)
|
||||||
? p.variants.map((x) => ({
|
? p.variants.map((x) => ({
|
||||||
sku: String(x?.sku || "").trim(),
|
sku: String(x?.sku || "").trim(),
|
||||||
price: x?.price,
|
price: x?.price,
|
||||||
retailPrice: x?.retailPrice,
|
retailPrice: x?.retailPrice,
|
||||||
quantity: x?.quantity,
|
quantity: x?.quantity,
|
||||||
}))
|
}))
|
||||||
: [];
|
: [];
|
||||||
|
|
||||||
return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants };
|
return { name, price, url, sku, img, _skuProbe: skuRaw, _variants: variants };
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- repair (second pass, budgeted) ---------------- */
|
/* ---------------- repair (second pass, budgeted) ---------------- */
|
||||||
|
|
||||||
async function tudorRepairItem(ctx, it) {
|
async function tudorRepairItem(ctx, it) {
|
||||||
// Determine if we need HTML for precision:
|
// Determine if we need HTML for precision:
|
||||||
// - Missing/synthetic SKU (existing behavior)
|
// - Missing/synthetic SKU (existing behavior)
|
||||||
// - OR multi-variant product where fast-path may choose the wrong variant for this URL
|
// - OR multi-variant product where fast-path may choose the wrong variant for this URL
|
||||||
const inStockVariants = Array.isArray(it._variants)
|
const inStockVariants = Array.isArray(it._variants) ? it._variants.filter((v) => Number(v?.quantity) > 0) : [];
|
||||||
? it._variants.filter((v) => Number(v?.quantity) > 0)
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const hasMultiInStock = inStockVariants.length >= 2;
|
const hasMultiInStock = inStockVariants.length >= 2;
|
||||||
|
|
||||||
// 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs
|
// 1) HTML: fix SKU if missing/synthetic, AND fix price for multi-variant URLs
|
||||||
if (isSyntheticSku(it.sku) || hasMultiInStock) {
|
if (isSyntheticSku(it.sku) || hasMultiInStock) {
|
||||||
const d = await tudorDetailFromProductPage(ctx, it.url);
|
const d = await tudorDetailFromProductPage(ctx, it.url);
|
||||||
|
|
||||||
// Prefer real SKU from HTML
|
// Prefer real SKU from HTML
|
||||||
if (d?.sku && !isSyntheticSku(d.sku)) {
|
if (d?.sku && !isSyntheticSku(d.sku)) {
|
||||||
it.sku = d.sku;
|
it.sku = d.sku;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill image if missing
|
// Fill image if missing
|
||||||
if (!it.img && d?.img) it.img = d.img;
|
if (!it.img && d?.img) it.img = d.img;
|
||||||
|
|
||||||
// Price precision:
|
// Price precision:
|
||||||
// - Best: match HTML SKU to a GQL variant sku => exact numeric variant price
|
// - Best: match HTML SKU to a GQL variant sku => exact numeric variant price
|
||||||
// - Fallback: use displayed HTML price
|
// - Fallback: use displayed HTML price
|
||||||
const htmlSkuDigits = String(d?.sku || "").replace(/^id:/i, "").trim();
|
const htmlSkuDigits = String(d?.sku || "")
|
||||||
|
.replace(/^id:/i, "")
|
||||||
|
.trim();
|
||||||
|
|
||||||
if (htmlSkuDigits && inStockVariants.length) {
|
if (htmlSkuDigits && inStockVariants.length) {
|
||||||
const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits);
|
const match = inStockVariants.find((v) => String(v?.sku || "").trim() === htmlSkuDigits);
|
||||||
if (match && Number.isFinite(Number(match.price))) {
|
if (match && Number.isFinite(Number(match.price))) {
|
||||||
it.price = money(match.price);
|
it.price = money(match.price);
|
||||||
} else if (Number.isFinite(d?.priceNum)) {
|
} else if (Number.isFinite(d?.priceNum)) {
|
||||||
it.price = money(d.priceNum);
|
it.price = money(d.priceNum);
|
||||||
}
|
}
|
||||||
} else if (Number.isFinite(d?.priceNum)) {
|
} else if (Number.isFinite(d?.priceNum)) {
|
||||||
it.price = money(d.priceNum);
|
it.price = money(d.priceNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2) Missing image -> limited productsBySku (existing behavior)
|
// 2) Missing image -> limited productsBySku (existing behavior)
|
||||||
if (!it.img) {
|
if (!it.img) {
|
||||||
const skuProbe = String(it._skuProbe || "").trim();
|
const skuProbe = String(it._skuProbe || "").trim();
|
||||||
if (skuProbe) {
|
if (skuProbe) {
|
||||||
const supp = await supplementImageFromSku(ctx, skuProbe);
|
const supp = await supplementImageFromSku(ctx, skuProbe);
|
||||||
if (supp?.img) it.img = supp.img;
|
if (supp?.img) it.img = supp.img;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final fallback ONLY after repair attempts (stability)
|
// Final fallback ONLY after repair attempts (stability)
|
||||||
if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || "";
|
if (isSyntheticSku(it.sku)) it.sku = normalizeCspc(it.url) || "";
|
||||||
|
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ---------------- scanner ---------------- */
|
/* ---------------- scanner ---------------- */
|
||||||
|
|
||||||
async function scanCategoryTudor(ctx, prevDb, report) {
|
async function scanCategoryTudor(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
|
|
||||||
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
|
const maxPages = ctx.config.maxPages === null ? 500 : Math.min(ctx.config.maxPages, 500);
|
||||||
let cursor = null;
|
let cursor = null;
|
||||||
let done = 0;
|
let done = 0;
|
||||||
|
|
||||||
const needsDetail = [];
|
const needsDetail = [];
|
||||||
|
|
||||||
for (let page = 1; page <= maxPages; page++) {
|
for (let page = 1; page <= maxPages; page++) {
|
||||||
const tPage = Date.now();
|
const tPage = Date.now();
|
||||||
|
|
||||||
const prod = await fetchProductsPage(ctx, cursor);
|
const prod = await fetchProductsPage(ctx, cursor);
|
||||||
const arr = Array.isArray(prod?.items) ? prod.items : [];
|
const arr = Array.isArray(prod?.items) ? prod.items : [];
|
||||||
|
|
||||||
let kept = 0;
|
let kept = 0;
|
||||||
for (const p of arr) {
|
for (const p of arr) {
|
||||||
const it = tudorItemFromProductFast(p, ctx);
|
const it = tudorItemFromProductFast(p, ctx);
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
|
|
||||||
// NEW: seed from cached DB to avoid repeating detail HTML
|
// NEW: seed from cached DB to avoid repeating detail HTML
|
||||||
const prev = prevDb?.byUrl?.get(it.url) || null;
|
const prev = prevDb?.byUrl?.get(it.url) || null;
|
||||||
if (prev) {
|
if (prev) {
|
||||||
it.sku = pickBetterSku(it.sku, prev.sku);
|
it.sku = pickBetterSku(it.sku, prev.sku);
|
||||||
if (!it.img && prev.img) it.img = prev.img;
|
if (!it.img && prev.img) it.img = prev.img;
|
||||||
}
|
}
|
||||||
|
|
||||||
// queue only; do not do detail calls inline
|
// queue only; do not do detail calls inline
|
||||||
if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it);
|
if (isSyntheticSku(it.sku) || !it.img) needsDetail.push(it);
|
||||||
|
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
kept++;
|
kept++;
|
||||||
}
|
}
|
||||||
|
|
||||||
done++;
|
done++;
|
||||||
|
|
||||||
const ms = Date.now() - tPage;
|
const ms = Date.now() - tPage;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft(
|
`${ctx.catPrefixOut} | Page ${pageStr(page, maxPages)} | 200 | items=${padLeft(
|
||||||
kept,
|
kept,
|
||||||
3
|
3,
|
||||||
)} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
|
)} | bytes=${kbStr(0)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
cursor = prod?.nextPageCursor || null;
|
cursor = prod?.nextPageCursor || null;
|
||||||
if (!cursor || !arr.length) break;
|
if (!cursor || !arr.length) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// second pass: repair with budgets
|
// second pass: repair with budgets
|
||||||
const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget)
|
const htmlBudget = Number.isFinite(ctx.config.tudorHtmlBudget)
|
||||||
? ctx.config.tudorHtmlBudget
|
? ctx.config.tudorHtmlBudget
|
||||||
: DETAIL_HTML_BUDGET_DEFAULT;
|
: DETAIL_HTML_BUDGET_DEFAULT;
|
||||||
|
|
||||||
const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget)
|
const gqlBudget = Number.isFinite(ctx.config.tudorGqlBudget)
|
||||||
? ctx.config.tudorGqlBudget
|
? ctx.config.tudorGqlBudget
|
||||||
: DETAIL_GQL_BUDGET_DEFAULT;
|
: DETAIL_GQL_BUDGET_DEFAULT;
|
||||||
|
|
||||||
let htmlUsed = 0;
|
let htmlUsed = 0;
|
||||||
let gqlUsed = 0;
|
let gqlUsed = 0;
|
||||||
|
|
||||||
for (const it of needsDetail) {
|
for (const it of needsDetail) {
|
||||||
const wantsHtml = isSyntheticSku(it.sku);
|
const wantsHtml = isSyntheticSku(it.sku);
|
||||||
const wantsGql = !it.img && String(it._skuProbe || "").trim();
|
const wantsGql = !it.img && String(it._skuProbe || "").trim();
|
||||||
|
|
||||||
// enforce caps
|
// enforce caps
|
||||||
if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue;
|
if (wantsHtml && htmlUsed >= htmlBudget && (!wantsGql || gqlUsed >= gqlBudget)) continue;
|
||||||
if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue;
|
if (wantsGql && gqlUsed >= gqlBudget && (!wantsHtml || htmlUsed >= htmlBudget)) continue;
|
||||||
|
|
||||||
// count budgets pessimistically
|
// count budgets pessimistically
|
||||||
if (wantsHtml) htmlUsed++;
|
if (wantsHtml) htmlUsed++;
|
||||||
if (wantsGql) gqlUsed++;
|
if (wantsGql) gqlUsed++;
|
||||||
|
|
||||||
await tudorRepairItem(ctx, it);
|
await tudorRepairItem(ctx, it);
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})`
|
`${ctx.catPrefixOut} | Unique products: ${discovered.size} | detail(html=${htmlUsed}/${htmlBudget}, gql=${gqlUsed}/${gqlBudget})`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
storeLabel: ctx.store.name,
|
storeLabel: ctx.store.name,
|
||||||
});
|
});
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: done,
|
scannedPages: done,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
|
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- store ---------------- */
|
/* ---------------- store ---------------- */
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "tudor",
|
key: "tudor",
|
||||||
name: "Tudor House",
|
name: "Tudor House",
|
||||||
host: HOST,
|
host: HOST,
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryTudor,
|
scanCategory: scanCategoryTudor,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`,
|
startUrl: `${BASE}/${STORE_ID}/category/spirits/rum`,
|
||||||
tudorRootSlug: "spirits",
|
tudorRootSlug: "spirits",
|
||||||
tudorSubSlug: "rum",
|
tudorSubSlug: "rum",
|
||||||
tudorAllTags: ["spirits", "rum"],
|
tudorAllTags: ["spirits", "rum"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "whiskey-scotch",
|
key: "whiskey-scotch",
|
||||||
label: "Whiskey / Scotch",
|
label: "Whiskey / Scotch",
|
||||||
startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`,
|
startUrl: `${BASE}/${STORE_ID}/category/spirits/whiskey-scotch`,
|
||||||
tudorRootSlug: "spirits",
|
tudorRootSlug: "spirits",
|
||||||
tudorSubSlug: "whiskey-scotch",
|
tudorSubSlug: "whiskey-scotch",
|
||||||
tudorAllTags: ["spirits", "whiskey-scotch"],
|
tudorAllTags: ["spirits", "whiskey-scotch"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "scotch-selections",
|
key: "scotch-selections",
|
||||||
label: "Scotch Selections",
|
label: "Scotch Selections",
|
||||||
startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`,
|
startUrl: `${BASE}/${STORE_ID}/category/spirits/scotch-selections`,
|
||||||
tudorRootSlug: "spirits",
|
tudorRootSlug: "spirits",
|
||||||
tudorSubSlug: "scotch-selections",
|
tudorSubSlug: "scotch-selections",
|
||||||
tudorAllTags: ["spirits", "scotch-selections"],
|
tudorAllTags: ["spirits", "scotch-selections"],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -5,169 +5,170 @@ const { normalizeCspc } = require("../utils/sku");
|
||||||
const { normalizeBaseUrl } = require("../utils/url");
|
const { normalizeBaseUrl } = require("../utils/url");
|
||||||
|
|
||||||
function normalizeAbsUrl(raw) {
|
function normalizeAbsUrl(raw) {
|
||||||
const s = String(raw || "").trim();
|
const s = String(raw || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (s.startsWith("//")) return `https:${s}`;
|
if (s.startsWith("//")) return `https:${s}`;
|
||||||
if (/^https?:\/\//i.test(s)) return s;
|
if (/^https?:\/\//i.test(s)) return s;
|
||||||
try {
|
try {
|
||||||
return new URL(s, "https://vesselliquor.com/").toString();
|
return new URL(s, "https://vesselliquor.com/").toString();
|
||||||
} catch {
|
} catch {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strip noisy Shopify/tracking params so URLs stay stable.
|
// Strip noisy Shopify/tracking params so URLs stay stable.
|
||||||
// Keep only "variant" since it can represent a distinct product configuration.
|
// Keep only "variant" since it can represent a distinct product configuration.
|
||||||
function normalizeShopifyProductUrl(rawUrl) {
|
function normalizeShopifyProductUrl(rawUrl) {
|
||||||
try {
|
try {
|
||||||
const u = new URL(String(rawUrl || ""));
|
const u = new URL(String(rawUrl || ""));
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
|
|
||||||
const keep = new Set(["variant"]);
|
const keep = new Set(["variant"]);
|
||||||
for (const k of [...u.searchParams.keys()]) {
|
for (const k of [...u.searchParams.keys()]) {
|
||||||
if (!keep.has(k)) u.searchParams.delete(k);
|
if (!keep.has(k)) u.searchParams.delete(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ([...u.searchParams.keys()].length === 0) u.search = "";
|
if ([...u.searchParams.keys()].length === 0) u.search = "";
|
||||||
if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, "");
|
if (u.pathname.length > 1) u.pathname = u.pathname.replace(/\/+$/, "");
|
||||||
|
|
||||||
return u.toString();
|
return u.toString();
|
||||||
} catch {
|
} catch {
|
||||||
return String(rawUrl || "");
|
return String(rawUrl || "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeVesselPageUrl(baseUrl, pageNum) {
|
function makeVesselPageUrl(baseUrl, pageNum) {
|
||||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
if (pageNum <= 1) u.searchParams.delete("page");
|
if (pageNum <= 1) u.searchParams.delete("page");
|
||||||
else u.searchParams.set("page", String(pageNum));
|
else u.searchParams.set("page", String(pageNum));
|
||||||
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function vesselLooksInStock(block) {
|
function vesselLooksInStock(block) {
|
||||||
const s = String(block || "").toLowerCase();
|
const s = String(block || "").toLowerCase();
|
||||||
if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false;
|
if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false;
|
||||||
if (/\bdata-available=["']false["']/.test(s)) return false;
|
if (/\bdata-available=["']false["']/.test(s)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function vesselExtractPrice(block) {
|
function vesselExtractPrice(block) {
|
||||||
const s = String(block || "");
|
const s = String(block || "");
|
||||||
|
|
||||||
const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)];
|
const saleTags = [...s.matchAll(/<sale-price\b[^>]*>([\s\S]*?)<\/sale-price>/gi)];
|
||||||
for (let i = saleTags.length - 1; i >= 0; i--) {
|
for (let i = saleTags.length - 1; i >= 0; i--) {
|
||||||
const txt = cleanText(decodeHtml(saleTags[i][1] || ""));
|
const txt = cleanText(decodeHtml(saleTags[i][1] || ""));
|
||||||
const m = txt.match(/\$\s*\d+(?:\.\d{2})?/);
|
const m = txt.match(/\$\s*\d+(?:\.\d{2})?/);
|
||||||
if (m) return m[0].replace(/\s+/g, "");
|
if (m) return m[0].replace(/\s+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: read price-list but ignore compare-at (crossed-out)
|
// Fallback: read price-list but ignore compare-at (crossed-out)
|
||||||
const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, "");
|
const withoutCompare = s.replace(/<compare-at-price\b[^>]*>[\s\S]*?<\/compare-at-price>/gi, "");
|
||||||
const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i);
|
const pl = withoutCompare.match(/<price-list\b[^>]*>([\s\S]*?)<\/price-list>/i);
|
||||||
if (pl) {
|
if (pl) {
|
||||||
const txt = cleanText(decodeHtml(pl[1] || ""));
|
const txt = cleanText(decodeHtml(pl[1] || ""));
|
||||||
const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
|
const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
|
||||||
if (all.length) return all[all.length - 1][0].replace(/\s+/g, "");
|
if (all.length) return all[all.length - 1][0].replace(/\s+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function vesselExtractSkuFromImgOrBlock(imgUrl, block) {
|
function vesselExtractSkuFromImgOrBlock(imgUrl, block) {
|
||||||
const cspc = normalizeCspc(imgUrl) || "";
|
const cspc = normalizeCspc(imgUrl) || "";
|
||||||
if (cspc) return cspc;
|
if (cspc) return cspc;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const u = new URL(String(imgUrl || ""));
|
const u = new URL(String(imgUrl || ""));
|
||||||
const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i);
|
const m = u.pathname.match(/\/(\d{1,11})\.(?:jpe?g|png|webp)$/i);
|
||||||
if (m && m[1]) return `id:${m[1]}`;
|
if (m && m[1]) return `id:${m[1]}`;
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
const s = String(block || "");
|
const s = String(block || "");
|
||||||
const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i);
|
const m2 = s.match(/\/cdn\/shop\/(?:products|files)\/(\d{1,11})\.(?:jpe?g|png|webp)/i);
|
||||||
if (m2 && m2[1]) return `id:${m2[1]}`;
|
if (m2 && m2[1]) return `id:${m2[1]}`;
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function vesselCardToItem(block, base) {
|
function vesselCardToItem(block, base) {
|
||||||
if (!vesselLooksInStock(block)) return null;
|
if (!vesselLooksInStock(block)) return null;
|
||||||
|
|
||||||
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i);
|
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*>/i);
|
||||||
if (!hrefM || !hrefM[1]) return null;
|
if (!hrefM || !hrefM[1]) return null;
|
||||||
|
|
||||||
let url = "";
|
let url = "";
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(hrefM[1]), base).toString();
|
url = new URL(decodeHtml(hrefM[1]), base).toString();
|
||||||
url = normalizeShopifyProductUrl(url);
|
url = normalizeShopifyProductUrl(url);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const titleM =
|
const titleM =
|
||||||
block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) ||
|
block.match(/product-card__title[\s\S]*?<a\b[^>]*>([\s\S]*?)<\/a>/i) ||
|
||||||
block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i);
|
block.match(/<img\b[^>]*\balt=["']([^"']+)["']/i);
|
||||||
|
|
||||||
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
||||||
if (!name) return null;
|
if (!name) return null;
|
||||||
|
|
||||||
const img = normalizeAbsUrl(extractFirstImgUrl(block, base));
|
const img = normalizeAbsUrl(extractFirstImgUrl(block, base));
|
||||||
const price = vesselExtractPrice(block);
|
const price = vesselExtractPrice(block);
|
||||||
|
|
||||||
// Prefer numeric filename SKU like 67424.jpg (works for 5-digit too)
|
// Prefer numeric filename SKU like 67424.jpg (works for 5-digit too)
|
||||||
const sku = vesselExtractSkuFromImgOrBlock(img, block);
|
const sku = vesselExtractSkuFromImgOrBlock(img, block);
|
||||||
|
|
||||||
return { name, price, url, sku, img };
|
return { name, price, url, sku, img };
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsVessel(html, ctx) {
|
function parseProductsVessel(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`;
|
||||||
|
|
||||||
const parts = s.split(/<product-card\b/i);
|
const parts = s.split(/<product-card\b/i);
|
||||||
if (parts.length <= 1) return [];
|
if (parts.length <= 1) return [];
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (let i = 1; i < parts.length; i++) {
|
for (let i = 1; i < parts.length; i++) {
|
||||||
const block = "<product-card" + parts[i];
|
const block = "<product-card" + parts[i];
|
||||||
const it = vesselCardToItem(block, base);
|
const it = vesselCardToItem(block, base);
|
||||||
if (it) items.push(it);
|
if (it) items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "vessel",
|
key: "vessel",
|
||||||
name: "Vessel Liquor",
|
name: "Vessel Liquor",
|
||||||
host: "vesselliquor.com",
|
host: "vesselliquor.com",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
|
|
||||||
parseProducts: parseProductsVessel,
|
parseProducts: parseProductsVessel,
|
||||||
makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params)
|
makePageUrl: makeVesselPageUrl, // Shopify ?page=N (preserves filter/sort params)
|
||||||
|
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1",
|
startUrl: "https://vesselliquor.com/collections/whisky?sort_by=title-ascending&filter.v.availability=1",
|
||||||
discoveryStartPage: 20,
|
discoveryStartPage: 20,
|
||||||
discoveryStep: 10,
|
discoveryStep: 10,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum-cane-spirit",
|
key: "rum-cane-spirit",
|
||||||
label: "Rum / Cane Spirit",
|
label: "Rum / Cane Spirit",
|
||||||
startUrl: "https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1",
|
startUrl:
|
||||||
discoveryStartPage: 20,
|
"https://vesselliquor.com/collections/rum-cane-spirit?sort_by=title-ascending&filter.v.availability=1",
|
||||||
discoveryStep: 10,
|
discoveryStartPage: 20,
|
||||||
},
|
discoveryStep: 10,
|
||||||
],
|
},
|
||||||
};
|
],
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore, parseProductsVessel };
|
module.exports = { createStore, parseProductsVessel };
|
||||||
|
|
|
||||||
|
|
@ -9,20 +9,20 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||||
const { addCategoryResultToReport } = require("../tracker/report");
|
const { addCategoryResultToReport } = require("../tracker/report");
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const t = Math.round(s * 10) / 10;
|
const t = Math.round(s * 10) / 10;
|
||||||
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " ");
|
||||||
}
|
}
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const w = String(total).length;
|
const w = String(total).length;
|
||||||
return `${padLeft(i, w)}/${total}`;
|
return `${padLeft(i, w)}/${total}`;
|
||||||
}
|
}
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BASE = "https://shop.vintagespirits.ca";
|
const BASE = "https://shop.vintagespirits.ca";
|
||||||
|
|
@ -30,228 +30,250 @@ const SHOP_ID = "679-320"; // from your curl; can be made dynamic later
|
||||||
const IMG_BASE = "https://s.barnetnetwork.com/img/m/";
|
const IMG_BASE = "https://s.barnetnetwork.com/img/m/";
|
||||||
|
|
||||||
function asMoneyFromApi(it) {
|
function asMoneyFromApi(it) {
|
||||||
// prefer explicit sale price when present
|
// prefer explicit sale price when present
|
||||||
const sale = Number(it?.sale_price);
|
const sale = Number(it?.sale_price);
|
||||||
const regular = Number(it?.regular_price);
|
const regular = Number(it?.regular_price);
|
||||||
const net = Number(it?.net_price);
|
const net = Number(it?.net_price);
|
||||||
|
|
||||||
const n =
|
const n =
|
||||||
(Number.isFinite(sale) && sale > 0 ? sale : NaN) ||
|
(Number.isFinite(sale) && sale > 0 ? sale : NaN) ||
|
||||||
(Number.isFinite(net) && net > 0 ? net : NaN) ||
|
(Number.isFinite(net) && net > 0 ? net : NaN) ||
|
||||||
(Number.isFinite(regular) && regular > 0 ? regular : NaN);
|
(Number.isFinite(regular) && regular > 0 ? regular : NaN);
|
||||||
|
|
||||||
if (!Number.isFinite(n)) return "";
|
if (!Number.isFinite(n)) return "";
|
||||||
return `$${n.toFixed(2)}`;
|
return `$${n.toFixed(2)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function imgUrlFromApi(it) {
|
function imgUrlFromApi(it) {
|
||||||
const p = String(it?.image || "").trim();
|
const p = String(it?.image || "").trim();
|
||||||
if (!p) return "";
|
if (!p) return "";
|
||||||
if (/^https?:\/\//i.test(p)) return p;
|
if (/^https?:\/\//i.test(p)) return p;
|
||||||
if (p.startsWith("//")) return `https:${p}`;
|
if (p.startsWith("//")) return `https:${p}`;
|
||||||
// API provides "custom/goods/..."
|
// API provides "custom/goods/..."
|
||||||
return `${IMG_BASE}${p.replace(/^\/+/, "")}`;
|
return `${IMG_BASE}${p.replace(/^\/+/, "")}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function vintageItemFromApi(it) {
|
function vintageItemFromApi(it) {
|
||||||
if (!it) return null;
|
if (!it) return null;
|
||||||
|
|
||||||
// stock gate
|
// stock gate
|
||||||
if (!it.available_for_sale) return null;
|
if (!it.available_for_sale) return null;
|
||||||
const onHand = Number(it.on_hand);
|
const onHand = Number(it.on_hand);
|
||||||
if (Number.isFinite(onHand) && onHand <= 0) return null;
|
if (Number.isFinite(onHand) && onHand <= 0) return null;
|
||||||
|
|
||||||
const url = String(it.url || "").trim();
|
const url = String(it.url || "").trim();
|
||||||
const name = String(it.description || "").trim();
|
const name = String(it.description || "").trim();
|
||||||
if (!url || !name) return null;
|
if (!url || !name) return null;
|
||||||
|
|
||||||
const sku = normalizeCspc(it.cspcid || "");
|
const sku = normalizeCspc(it.cspcid || "");
|
||||||
const price = asMoneyFromApi(it);
|
const price = asMoneyFromApi(it);
|
||||||
const img = imgUrlFromApi(it);
|
const img = imgUrlFromApi(it);
|
||||||
|
|
||||||
return { name, price, url, sku, img };
|
return { name, price, url, sku, img };
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeApiUrl(cat, page) {
|
function makeApiUrl(cat, page) {
|
||||||
const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`);
|
const u = new URL(`${BASE}/api/shop/${SHOP_ID}/products`);
|
||||||
u.searchParams.set("p", String(page));
|
u.searchParams.set("p", String(page));
|
||||||
u.searchParams.set("show_on_web", "true");
|
u.searchParams.set("show_on_web", "true");
|
||||||
u.searchParams.set("sort_by", "desc");
|
u.searchParams.set("sort_by", "desc");
|
||||||
u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS"
|
u.searchParams.set("category", cat.vsCategory); // e.g. "40 SPIRITS"
|
||||||
u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM"
|
u.searchParams.set("sub_category", cat.vsSubCategory); // e.g. "RUM"
|
||||||
u.searchParams.set("varital_name", "");
|
u.searchParams.set("varital_name", "");
|
||||||
u.searchParams.set("no_item_found", "No item found.");
|
u.searchParams.set("no_item_found", "No item found.");
|
||||||
u.searchParams.set("avail_for_sale", "false");
|
u.searchParams.set("avail_for_sale", "false");
|
||||||
u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10)));
|
u.searchParams.set("_dc", String(Math.floor(Math.random() * 1e10)));
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchVintagePage(ctx, page) {
|
async function fetchVintagePage(ctx, page) {
|
||||||
const url = makeApiUrl(ctx.cat, page);
|
const url = makeApiUrl(ctx.cat, page);
|
||||||
return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
return await ctx.http.fetchJsonWithRetry(url, `vintage:api:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "*/*",
|
Accept: "*/*",
|
||||||
Referer: ctx.cat.startUrl,
|
Referer: ctx.cat.startUrl,
|
||||||
Origin: BASE,
|
Origin: BASE,
|
||||||
},
|
},
|
||||||
// cookies not required in my testing; enable if you hit 403/empty
|
// cookies not required in my testing; enable if you hit 403/empty
|
||||||
cookies: true,
|
cookies: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scanCategoryVintageApi(ctx, prevDb, report) {
|
async function scanCategoryVintageApi(ctx, prevDb, report) {
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
let first;
|
let first;
|
||||||
try {
|
try {
|
||||||
first = await fetchVintagePage(ctx, 1);
|
first = await fetchVintagePage(ctx, 1);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`);
|
ctx.logger.warn(`${ctx.catPrefixOut} | Vintage API fetch failed: ${e?.message || e}`);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(
|
||||||
storeLabel: ctx.store.name,
|
prevDb,
|
||||||
});
|
discovered,
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
{
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
storeLabel: ctx.store.name,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: 1,
|
scannedPages: 1,
|
||||||
discoveredUnique: 0,
|
discoveredUnique: 0,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
return;
|
report,
|
||||||
}
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1);
|
const totalPages = Math.max(1, Number(first?.json?.paginator?.pages) || 1);
|
||||||
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
||||||
|
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`
|
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const pages = [];
|
const pages = [];
|
||||||
for (let p = 1; p <= scanPages; p++) pages.push(p);
|
for (let p = 1; p <= scanPages; p++) pages.push(p);
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
|
|
||||||
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
||||||
pages,
|
pages,
|
||||||
ctx.config.concurrency,
|
ctx.config.concurrency,
|
||||||
ctx.config.staggerMs,
|
ctx.config.staggerMs,
|
||||||
async (page, idx) => {
|
async (page, idx) => {
|
||||||
const r = page === 1 ? first : await fetchVintagePage(ctx, page);
|
const r = page === 1 ? first : await fetchVintagePage(ctx, page);
|
||||||
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
|
const arr = Array.isArray(r?.json?.items) ? r.json.items : [];
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (const raw of arr) {
|
for (const raw of arr) {
|
||||||
const it = vintageItemFromApi(raw);
|
const it = vintageItemFromApi(raw);
|
||||||
if (it) items.push(it);
|
if (it) items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
donePages++;
|
donePages++;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd(
|
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pages.length)} | ${String(r.status || "").padEnd(
|
||||||
3
|
3,
|
||||||
)} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr(
|
)} | ${pctStr(donePages, pages.length)} | items=${padLeft(items.length, 3)} | bytes=${kbStr(
|
||||||
r.bytes
|
r.bytes,
|
||||||
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
}
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
let dups = 0;
|
let dups = 0;
|
||||||
for (const arr of perPageItems) {
|
for (const arr of perPageItems) {
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (discovered.has(it.url)) dups++;
|
if (discovered.has(it.url)) dups++;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
ctx.logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||||
storeLabel: ctx.store.name,
|
storeLabel: ctx.store.name,
|
||||||
});
|
});
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
ctx.logger.ok(
|
ctx.logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: scanPages,
|
scannedPages: scanPages,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "vintage",
|
key: "vintage",
|
||||||
name: "Vintage Spirits",
|
name: "Vintage Spirits",
|
||||||
host: "shop.vintagespirits.ca",
|
host: "shop.vintagespirits.ca",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
scanCategory: scanCategoryVintageApi,
|
scanCategory: scanCategoryVintageApi,
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky-whiskey",
|
key: "whisky-whiskey",
|
||||||
label: "Whisky & Whiskey",
|
label: "Whisky & Whiskey",
|
||||||
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY",
|
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=WHISKY+%26+WHISKEY",
|
||||||
vsCategory: "40 SPIRITS",
|
vsCategory: "40 SPIRITS",
|
||||||
vsSubCategory: "WHISKY & WHISKEY",
|
vsSubCategory: "WHISKY & WHISKEY",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "single-malt-whisky",
|
key: "single-malt-whisky",
|
||||||
label: "Single Malt Whisky",
|
label: "Single Malt Whisky",
|
||||||
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY",
|
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=SINGLE+MALT+WHISKY",
|
||||||
vsCategory: "40 SPIRITS",
|
vsCategory: "40 SPIRITS",
|
||||||
vsSubCategory: "SINGLE MALT WHISKY",
|
vsSubCategory: "SINGLE MALT WHISKY",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM",
|
startUrl: "https://shop.vintagespirits.ca/products?category=40+SPIRITS&sub_category=RUM",
|
||||||
vsCategory: "40 SPIRITS",
|
vsCategory: "40 SPIRITS",
|
||||||
vsSubCategory: "RUM",
|
vsSubCategory: "RUM",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore };
|
module.exports = { createStore };
|
||||||
|
|
|
||||||
|
|
@ -6,120 +6,116 @@ const { makePageUrlShopifyQueryPage } = require("../utils/url");
|
||||||
const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku");
|
const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku");
|
||||||
|
|
||||||
function extractSkuFromUrlOrHref(hrefOrUrl) {
|
function extractSkuFromUrlOrHref(hrefOrUrl) {
|
||||||
const s = String(hrefOrUrl || "");
|
const s = String(hrefOrUrl || "");
|
||||||
// /products/<handle>-123456 or /collections/.../products/<handle>-123456
|
// /products/<handle>-123456 or /collections/.../products/<handle>-123456
|
||||||
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
|
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractSkuFromWillowBlock(block) {
|
function extractSkuFromWillowBlock(block) {
|
||||||
const b = String(block || "");
|
const b = String(block || "");
|
||||||
|
|
||||||
// Image filename pattern:
|
// Image filename pattern:
|
||||||
// /products/710296-Zaya-Gran-Reserva-16-Year_160x.png
|
// /products/710296-Zaya-Gran-Reserva-16-Year_160x.png
|
||||||
const m1 = b.match(/\/products\/(\d{6})[-_]/i);
|
const m1 = b.match(/\/products\/(\d{6})[-_]/i);
|
||||||
if (m1) return m1[1];
|
if (m1) return m1[1];
|
||||||
|
|
||||||
// Generic fallback
|
// Generic fallback
|
||||||
const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/);
|
const m2 = b.match(/\b(\d{6})[-_][A-Za-z]/);
|
||||||
if (m2) return m2[1];
|
if (m2) return m2[1];
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalizeWillowUrl(raw) {
|
function canonicalizeWillowUrl(raw) {
|
||||||
try {
|
try {
|
||||||
const u = new URL(String(raw));
|
const u = new URL(String(raw));
|
||||||
u.search = "";
|
u.search = "";
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
|
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
|
||||||
if (m) u.pathname = `/products/${m[1]}`;
|
if (m) u.pathname = `/products/${m[1]}`;
|
||||||
return u.toString();
|
return u.toString();
|
||||||
} catch {
|
} catch {
|
||||||
return String(raw || "");
|
return String(raw || "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prefer exact decimal from visually-hidden spans.
|
// Prefer exact decimal from visually-hidden spans.
|
||||||
// Fallback: reconstruct from $39<sup>99</sup>.
|
// Fallback: reconstruct from $39<sup>99</sup>.
|
||||||
function extractWillowCardPrice(block) {
|
function extractWillowCardPrice(block) {
|
||||||
const b = String(block || "");
|
const b = String(block || "");
|
||||||
|
|
||||||
const current =
|
const current =
|
||||||
b.match(
|
b.match(
|
||||||
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i
|
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i,
|
||||||
)?.[1] ||
|
)?.[1] || b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
|
||||||
b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
|
|
||||||
|
|
||||||
if (current) return current.replace(/\s+/g, "");
|
if (current) return current.replace(/\s+/g, "");
|
||||||
|
|
||||||
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
|
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
|
||||||
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
|
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
|
||||||
|
|
||||||
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
|
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
|
||||||
return any ? any[0].replace(/\s+/g, "") : "";
|
return any ? any[0].replace(/\s+/g, "") : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductsWillowPark(html, ctx, finalUrl) {
|
function parseProductsWillowPark(html, ctx, finalUrl) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
|
||||||
|
|
||||||
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
|
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
|
||||||
.map((m) => m.index)
|
.map((m) => m.index)
|
||||||
.filter((i) => typeof i === "number");
|
.filter((i) => typeof i === "number");
|
||||||
|
|
||||||
const blocks = [];
|
const blocks = [];
|
||||||
for (let i = 0; i < starts.length; i++) {
|
for (let i = 0; i < starts.length; i++) {
|
||||||
const a = starts[i];
|
const a = starts[i];
|
||||||
const b = i + 1 < starts.length ? starts[i + 1] : s.length;
|
const b = i + 1 < starts.length ? starts[i + 1] : s.length;
|
||||||
blocks.push(s.slice(a, b));
|
blocks.push(s.slice(a, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
const href =
|
const href =
|
||||||
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
|
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
|
||||||
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
||||||
if (!href) continue;
|
if (!href) continue;
|
||||||
|
|
||||||
let url;
|
let url;
|
||||||
try {
|
try {
|
||||||
url = new URL(decodeHtml(href), base).toString();
|
url = new URL(decodeHtml(href), base).toString();
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
url = canonicalizeWillowUrl(url);
|
url = canonicalizeWillowUrl(url);
|
||||||
|
|
||||||
const titleHtml =
|
const titleHtml =
|
||||||
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] ||
|
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] ||
|
||||||
"";
|
"";
|
||||||
const name = cleanText(decodeHtml(stripTags(titleHtml)));
|
const name = cleanText(decodeHtml(stripTags(titleHtml)));
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
const price = extractWillowCardPrice(block);
|
const price = extractWillowCardPrice(block);
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
|
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
|
||||||
|
|
||||||
const sku =
|
const sku = extractSkuFromUrlOrHref(href) || extractSkuFromUrlOrHref(url) || extractSkuFromWillowBlock(block);
|
||||||
extractSkuFromUrlOrHref(href) ||
|
|
||||||
extractSkuFromUrlOrHref(url) ||
|
|
||||||
extractSkuFromWillowBlock(block);
|
|
||||||
|
|
||||||
items.push({ name, price, url, sku, img, pid });
|
items.push({ name, price, url, sku, img, pid });
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniq = new Map();
|
const uniq = new Map();
|
||||||
for (const it of items) uniq.set(it.url, it);
|
for (const it of items) uniq.set(it.url, it);
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
function willowIsEmptyListingPage(html) {
|
function willowIsEmptyListingPage(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
|
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
|
||||||
if (/No products found/i.test(s)) return true;
|
if (/No products found/i.test(s)) return true;
|
||||||
if (/collection--empty\b/i.test(s)) return true;
|
if (/collection--empty\b/i.test(s)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */
|
/* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */
|
||||||
|
|
@ -137,102 +133,99 @@ query ($id: ID!) @inContext(country: CA) {
|
||||||
`;
|
`;
|
||||||
|
|
||||||
function pickBestVariantSku(product) {
|
function pickBestVariantSku(product) {
|
||||||
const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : [];
|
const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : [];
|
||||||
if (!vs.length) return "";
|
if (!vs.length) return "";
|
||||||
|
|
||||||
const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim());
|
const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim());
|
||||||
if (inStock) return String(inStock.sku).trim();
|
if (inStock) return String(inStock.sku).trim();
|
||||||
|
|
||||||
const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim());
|
const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim());
|
||||||
if (forSale) return String(forSale.sku).trim();
|
if (forSale) return String(forSale.sku).trim();
|
||||||
|
|
||||||
const any = vs.find((v) => String(v?.sku || "").trim());
|
const any = vs.find((v) => String(v?.sku || "").trim());
|
||||||
return any ? String(any.sku).trim() : "";
|
return any ? String(any.sku).trim() : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractStorefrontTokenFromHtml(html) {
|
function extractStorefrontTokenFromHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
|
|
||||||
// 1) script#shopify-features JSON: {"accessToken":"..."}
|
// 1) script#shopify-features JSON: {"accessToken":"..."}
|
||||||
const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1];
|
const j = s.match(/<script[^>]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1];
|
||||||
if (j) {
|
if (j) {
|
||||||
try {
|
try {
|
||||||
const obj = JSON.parse(j);
|
const obj = JSON.parse(j);
|
||||||
const t = String(obj?.accessToken || "").trim();
|
const t = String(obj?.accessToken || "").trim();
|
||||||
if (t) return t;
|
if (t) return t;
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2) meta name="shopify-checkout-api-token"
|
// 2) meta name="shopify-checkout-api-token"
|
||||||
const m = s.match(
|
const m = s.match(/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i)?.[1];
|
||||||
/<meta[^>]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i
|
return String(m || "").trim();
|
||||||
)?.[1];
|
|
||||||
return String(m || "").trim();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function willowGetStorefrontToken(ctx) {
|
async function willowGetStorefrontToken(ctx) {
|
||||||
if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken;
|
if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken;
|
||||||
|
|
||||||
const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua);
|
const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua);
|
||||||
const t = extractStorefrontTokenFromHtml(r?.text || "");
|
const t = extractStorefrontTokenFromHtml(r?.text || "");
|
||||||
if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML");
|
if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML");
|
||||||
|
|
||||||
ctx._willowStorefrontToken = t;
|
ctx._willowStorefrontToken = t;
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function willowGql(ctx, label, query, variables) {
|
async function willowGql(ctx, label, query, variables) {
|
||||||
const token = await willowGetStorefrontToken(ctx);
|
const token = await willowGetStorefrontToken(ctx);
|
||||||
|
|
||||||
const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, {
|
const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
Accept: "application/json",
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
Origin: "https://www.willowpark.net",
|
Origin: "https://www.willowpark.net",
|
||||||
Referer: "https://www.willowpark.net/",
|
Referer: "https://www.willowpark.net/",
|
||||||
"x-shopify-storefront-access-token": token,
|
"x-shopify-storefront-access-token": token,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({ query, variables }),
|
body: JSON.stringify({ query, variables }),
|
||||||
});
|
});
|
||||||
|
|
||||||
// If token is rejected, clear so a future attempt re-fetches it once.
|
// If token is rejected, clear so a future attempt re-fetches it once.
|
||||||
if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = "";
|
if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = "";
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:<NUM>.
|
// If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:<NUM>.
|
||||||
// Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is.
|
// Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is.
|
||||||
function normalizeWillowGqlSku(rawSku) {
|
function normalizeWillowGqlSku(rawSku) {
|
||||||
const s = String(rawSku || "").trim();
|
const s = String(rawSku || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
const cspc = normalizeCspc(s);
|
const cspc = normalizeCspc(s);
|
||||||
if (cspc) return cspc; // 6-digit wins
|
if (cspc) return cspc; // 6-digit wins
|
||||||
if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s;
|
if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s;
|
||||||
if (/^\d+$/.test(s)) return `id:${s}`;
|
if (/^\d+$/.test(s)) return `id:${s}`;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async function willowFetchSkuByPid(ctx, pid) {
|
async function willowFetchSkuByPid(ctx, pid) {
|
||||||
const id = String(pid || "").trim();
|
const id = String(pid || "").trim();
|
||||||
if (!id) return "";
|
if (!id) return "";
|
||||||
|
|
||||||
if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map();
|
if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map();
|
||||||
if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id);
|
if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id);
|
||||||
|
|
||||||
const gid = `gid://shopify/Product/${id}`;
|
const gid = `gid://shopify/Product/${id}`;
|
||||||
let sku = "";
|
let sku = "";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid });
|
const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid });
|
||||||
if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product));
|
if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product));
|
||||||
} catch {
|
} catch {
|
||||||
sku = "";
|
sku = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx._willowSkuCacheByPid.set(id, sku);
|
ctx._willowSkuCacheByPid.set(id, sku);
|
||||||
return sku;
|
return sku;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -240,58 +233,58 @@ async function willowFetchSkuByPid(ctx, pid) {
|
||||||
* Budgeted to avoid exploding requests.
|
* Budgeted to avoid exploding requests.
|
||||||
*/
|
*/
|
||||||
async function willowRepairDiscoveredItems(ctx, discovered, prevDb) {
|
async function willowRepairDiscoveredItems(ctx, discovered, prevDb) {
|
||||||
const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200;
|
const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200;
|
||||||
let used = 0;
|
let used = 0;
|
||||||
|
|
||||||
for (const [url, it] of discovered.entries()) {
|
for (const [url, it] of discovered.entries()) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
|
|
||||||
// Seed from prev DB so we don't repair repeatedly if we already learned a good SKU.
|
// Seed from prev DB so we don't repair repeatedly if we already learned a good SKU.
|
||||||
const prev = prevDb?.byUrl?.get(url);
|
const prev = prevDb?.byUrl?.get(url);
|
||||||
if (prev) it.sku = pickBetterSku(it.sku, prev.sku);
|
if (prev) it.sku = pickBetterSku(it.sku, prev.sku);
|
||||||
|
|
||||||
if (!needsSkuDetail(it.sku)) continue;
|
if (!needsSkuDetail(it.sku)) continue;
|
||||||
if (used >= budget) break;
|
if (used >= budget) break;
|
||||||
|
|
||||||
const repaired = await willowFetchSkuByPid(ctx, it.pid);
|
const repaired = await willowFetchSkuByPid(ctx, it.pid);
|
||||||
if (repaired) it.sku = pickBetterSku(repaired, it.sku);
|
if (repaired) it.sku = pickBetterSku(repaired, it.sku);
|
||||||
|
|
||||||
discovered.set(url, it);
|
discovered.set(url, it);
|
||||||
used++;
|
used++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "willowpark",
|
key: "willowpark",
|
||||||
name: "Willow Park",
|
name: "Willow Park",
|
||||||
host: "www.willowpark.net",
|
host: "www.willowpark.net",
|
||||||
ua: defaultUa,
|
ua: defaultUa,
|
||||||
|
|
||||||
parseProducts: parseProductsWillowPark,
|
parseProducts: parseProductsWillowPark,
|
||||||
makePageUrl: makePageUrlShopifyQueryPage,
|
makePageUrl: makePageUrlShopifyQueryPage,
|
||||||
isEmptyListingPage: willowIsEmptyListingPage,
|
isEmptyListingPage: willowIsEmptyListingPage,
|
||||||
|
|
||||||
// Hook called by scanner (add 1-line call in scanner before merge)
|
// Hook called by scanner (add 1-line call in scanner before merge)
|
||||||
repairDiscoveredItems: willowRepairDiscoveredItems,
|
repairDiscoveredItems: willowRepairDiscoveredItems,
|
||||||
|
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "scotch",
|
key: "scotch",
|
||||||
label: "Scotch",
|
label: "Scotch",
|
||||||
startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1",
|
startUrl: "https://www.willowpark.net/collections/scotch?filter.v.availability=1",
|
||||||
discoveryStartPage: 5,
|
discoveryStartPage: 5,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
|
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
|
||||||
discoveryStartPage: 3,
|
discoveryStartPage: 3,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createStore, parseProductsWillowPark };
|
module.exports = { createStore, parseProductsWillowPark };
|
||||||
|
|
|
||||||
|
|
@ -16,84 +16,86 @@ const STATUS_W = 4;
|
||||||
const PROG_W = 4;
|
const PROG_W = 4;
|
||||||
|
|
||||||
function kbStr(bytes) {
|
function kbStr(bytes) {
|
||||||
return humanBytes(bytes).padStart(8, " ");
|
return humanBytes(bytes).padStart(8, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctStr(done, total) {
|
function pctStr(done, total) {
|
||||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageStr(i, total) {
|
function pageStr(i, total) {
|
||||||
const leftW = String(total).length;
|
const leftW = String(total).length;
|
||||||
return `${padLeft(i, leftW)}/${total}`;
|
return `${padLeft(i, leftW)}/${total}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function actionCell(s) {
|
function actionCell(s) {
|
||||||
return padRightV(String(s), ACTION_W);
|
return padRightV(String(s), ACTION_W);
|
||||||
}
|
}
|
||||||
|
|
||||||
function statusCell(logger, statusRaw, okBool) {
|
function statusCell(logger, statusRaw, okBool) {
|
||||||
const cell = padRightV(String(statusRaw || ""), STATUS_W);
|
const cell = padRightV(String(statusRaw || ""), STATUS_W);
|
||||||
if (!statusRaw) return cell;
|
if (!statusRaw) return cell;
|
||||||
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
|
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
|
||||||
}
|
}
|
||||||
|
|
||||||
function progCell(v) {
|
function progCell(v) {
|
||||||
const raw = String(v ?? "----");
|
const raw = String(v ?? "----");
|
||||||
return padLeftV(raw, PROG_W);
|
return padLeftV(raw, PROG_W);
|
||||||
}
|
}
|
||||||
|
|
||||||
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
|
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
|
||||||
logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`);
|
logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeCatPrefixers(stores, logger) {
|
function makeCatPrefixers(stores, logger) {
|
||||||
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
|
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
|
||||||
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
|
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
|
||||||
|
|
||||||
function catPrefixRaw(store, cat) {
|
function catPrefixRaw(store, cat) {
|
||||||
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
|
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function catPrefixOut(store, cat) {
|
function catPrefixOut(store, cat) {
|
||||||
return logger.bold(catPrefixRaw(store, cat));
|
return logger.bold(catPrefixRaw(store, cat));
|
||||||
}
|
}
|
||||||
|
|
||||||
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
|
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildCategoryContext(store, cat, catPrefixOutFn, config) {
|
function buildCategoryContext(store, cat, catPrefixOutFn, config) {
|
||||||
const baseUrl = normalizeBaseUrl(cat.startUrl);
|
const baseUrl = normalizeBaseUrl(cat.startUrl);
|
||||||
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
|
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
|
||||||
return {
|
return {
|
||||||
store,
|
store,
|
||||||
cat,
|
cat,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
dbFile,
|
dbFile,
|
||||||
catPrefixOut: catPrefixOutFn(store, cat),
|
catPrefixOut: catPrefixOutFn(store, cat),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadCategoryDb(logger, ctx) {
|
function loadCategoryDb(logger, ctx) {
|
||||||
const prevDb = readDb(ctx.dbFile);
|
const prevDb = readDb(ctx.dbFile);
|
||||||
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
|
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
|
||||||
return prevDb;
|
return prevDb;
|
||||||
}
|
}
|
||||||
|
|
||||||
function shouldTrackItem(ctx, finalUrl, item) {
|
function shouldTrackItem(ctx, finalUrl, item) {
|
||||||
const allow = ctx?.cat?.allowUrl;
|
const allow = ctx?.cat?.allowUrl;
|
||||||
if (typeof allow !== "function") return true;
|
if (typeof allow !== "function") return true;
|
||||||
return allow(item, ctx, finalUrl);
|
return allow(item, ctx, finalUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -104,285 +106,309 @@ function shouldTrackItem(ctx, finalUrl, item) {
|
||||||
* inside links that often have "page-numbers" class, but works even without it.
|
* inside links that often have "page-numbers" class, but works even without it.
|
||||||
*/
|
*/
|
||||||
function extractTotalPagesFromPaginationHtml(html) {
|
function extractTotalPagesFromPaginationHtml(html) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
let max = 0;
|
let max = 0;
|
||||||
|
|
||||||
// /page/23/
|
// /page/23/
|
||||||
for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) {
|
for (const m of s.matchAll(/href=["'][^"']*\/page\/(\d+)\/[^"']*["']/gi)) {
|
||||||
const n = Number(m[1]);
|
const n = Number(m[1]);
|
||||||
if (Number.isFinite(n) && n > max) max = n;
|
if (Number.isFinite(n) && n > max) max = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ?paged=23
|
// ?paged=23
|
||||||
for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) {
|
for (const m of s.matchAll(/href=["'][^"']*[?&]paged=(\d+)[^"']*["']/gi)) {
|
||||||
const n = Number(m[1]);
|
const n = Number(m[1]);
|
||||||
if (Number.isFinite(n) && n > max) max = n;
|
if (Number.isFinite(n) && n > max) max = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shopify: ?page=23
|
// Shopify: ?page=23
|
||||||
for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) {
|
for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) {
|
||||||
const n = Number(m[1]);
|
const n = Number(m[1]);
|
||||||
if (Number.isFinite(n) && n > max) max = n;
|
if (Number.isFinite(n) && n > max) max = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sometimes themes render plain numbers without /page/ in href; keep it conservative:
|
// Sometimes themes render plain numbers without /page/ in href; keep it conservative:
|
||||||
// Only trust these if we already found at least one pagination-ish token.
|
// Only trust these if we already found at least one pagination-ish token.
|
||||||
if (max > 1) return max;
|
if (max > 1) return max;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function pageHasProducts(ctx, url) {
|
async function pageHasProducts(ctx, url) {
|
||||||
const { http, config } = ctx;
|
const { http, config } = ctx;
|
||||||
try {
|
try {
|
||||||
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
|
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
|
||||||
|
|
||||||
if (typeof ctx.store.isEmptyListingPage === "function") {
|
if (typeof ctx.store.isEmptyListingPage === "function") {
|
||||||
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
|
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
||||||
const items = parser(text, ctx).length;
|
const items = parser(text, ctx).length;
|
||||||
return { ok: items > 0, items };
|
return { ok: items > 0, items };
|
||||||
} catch {
|
} catch {
|
||||||
return { ok: false, items: 0 };
|
return { ok: false, items: 0 };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function probePage(ctx, baseUrl, pageNum, state) {
|
async function probePage(ctx, baseUrl, pageNum, state) {
|
||||||
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
|
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
|
||||||
|
|
||||||
const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0;
|
const delay = Number.isFinite(ctx?.cat?.discoveryDelayMs) ? ctx.cat.discoveryDelayMs : 0;
|
||||||
if (delay > 0) await sleep(delay);
|
if (delay > 0) await sleep(delay);
|
||||||
|
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const r = await pageHasProducts(ctx, url);
|
const r = await pageHasProducts(ctx, url);
|
||||||
const ms = Date.now() - t0;
|
const ms = Date.now() - t0;
|
||||||
|
|
||||||
const prog = discoverProg(state);
|
const prog = discoverProg(state);
|
||||||
|
|
||||||
logProgressLine(
|
logProgressLine(
|
||||||
ctx.logger,
|
ctx.logger,
|
||||||
ctx,
|
ctx,
|
||||||
`Discover probe page=${padLeftV(pageNum, 4)}`,
|
`Discover probe page=${padLeftV(pageNum, 4)}`,
|
||||||
r.ok ? "OK" : "MISS",
|
r.ok ? "OK" : "MISS",
|
||||||
Boolean(r.ok),
|
Boolean(r.ok),
|
||||||
prog,
|
prog,
|
||||||
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
|
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
function discoverProg(state) {
|
function discoverProg(state) {
|
||||||
if (!state || state.phase !== "binary") return " 0%";
|
if (!state || state.phase !== "binary") return " 0%";
|
||||||
const span = Math.max(1, state.hiMiss - state.loOk);
|
const span = Math.max(1, state.hiMiss - state.loOk);
|
||||||
const initial = Math.max(1, state.binInitialSpan);
|
const initial = Math.max(1, state.binInitialSpan);
|
||||||
if (initial <= 1) return "100%";
|
if (initial <= 1) return "100%";
|
||||||
|
|
||||||
const remaining = Math.max(0, span - 1);
|
const remaining = Math.max(0, span - 1);
|
||||||
const total = Math.max(1, initial - 1);
|
const total = Math.max(1, initial - 1);
|
||||||
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
|
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
|
||||||
return `${padLeft(pct, 3)}%`;
|
return `${padLeft(pct, 3)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
|
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
|
||||||
state.phase = "binary";
|
state.phase = "binary";
|
||||||
state.loOk = loOk;
|
state.loOk = loOk;
|
||||||
state.hiMiss = hiMiss;
|
state.hiMiss = hiMiss;
|
||||||
state.binInitialSpan = Math.max(1, hiMiss - loOk);
|
state.binInitialSpan = Math.max(1, hiMiss - loOk);
|
||||||
|
|
||||||
while (hiMiss - loOk > 1) {
|
while (hiMiss - loOk > 1) {
|
||||||
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
|
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
|
||||||
state.loOk = loOk;
|
state.loOk = loOk;
|
||||||
state.hiMiss = hiMiss;
|
state.hiMiss = hiMiss;
|
||||||
|
|
||||||
const pm = await probePage(ctx, baseUrl, mid, state);
|
const pm = await probePage(ctx, baseUrl, mid, state);
|
||||||
if (pm.ok) loOk = mid;
|
if (pm.ok) loOk = mid;
|
||||||
else hiMiss = mid;
|
else hiMiss = mid;
|
||||||
}
|
}
|
||||||
|
|
||||||
state.loOk = loOk;
|
state.loOk = loOk;
|
||||||
state.hiMiss = hiMiss;
|
state.hiMiss = hiMiss;
|
||||||
return loOk;
|
return loOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
|
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
|
||||||
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
|
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
|
||||||
|
|
||||||
// Fetch page 1 ONCE and try to extract total pages from pagination.
|
// Fetch page 1 ONCE and try to extract total pages from pagination.
|
||||||
const url1 = makePageUrlForCtx(ctx, baseUrl, 1);
|
const url1 = makePageUrlForCtx(ctx, baseUrl, 1);
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
const { text: html1, ms, status, bytes, finalUrl } = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua);
|
const {
|
||||||
const pMs = Date.now() - t0;
|
text: html1,
|
||||||
|
ms,
|
||||||
|
status,
|
||||||
|
bytes,
|
||||||
|
finalUrl,
|
||||||
|
} = await ctx.http.fetchTextWithRetry(url1, "discover", ctx.store.ua);
|
||||||
|
const pMs = Date.now() - t0;
|
||||||
|
|
||||||
if (typeof ctx.store.isEmptyListingPage === "function") {
|
if (typeof ctx.store.isEmptyListingPage === "function") {
|
||||||
if (ctx.store.isEmptyListingPage(html1, ctx, url1)) {
|
if (ctx.store.isEmptyListingPage(html1, ctx, url1)) {
|
||||||
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
|
ctx.logger.warn(
|
||||||
return 1;
|
`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`,
|
||||||
}
|
);
|
||||||
}
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts;
|
const parser = ctx.store.parseProducts || ctx.config.defaultParseProducts;
|
||||||
const items1 = parser(html1, ctx, finalUrl).length;
|
const items1 = parser(html1, ctx, finalUrl).length;
|
||||||
|
|
||||||
logProgressLine(
|
logProgressLine(
|
||||||
ctx.logger,
|
ctx.logger,
|
||||||
ctx,
|
ctx,
|
||||||
`Discover probe page=${padLeftV(1, 4)}`,
|
`Discover probe page=${padLeftV(1, 4)}`,
|
||||||
items1 > 0 ? "OK" : "MISS",
|
items1 > 0 ? "OK" : "MISS",
|
||||||
items1 > 0,
|
items1 > 0,
|
||||||
discoverProg(state),
|
discoverProg(state),
|
||||||
`items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}`
|
`items=${padLeftV(items1, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms || pMs)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (items1 <= 0) {
|
if (items1 <= 0) {
|
||||||
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
|
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const extracted = extractTotalPagesFromPaginationHtml(html1);
|
const extracted = extractTotalPagesFromPaginationHtml(html1);
|
||||||
|
|
||||||
// Shopify collections with filters often lie about pagination.
|
// Shopify collections with filters often lie about pagination.
|
||||||
// If page 1 looks full, don't trust a tiny extracted count.
|
// If page 1 looks full, don't trust a tiny extracted count.
|
||||||
if (extracted && extracted >= 1) {
|
if (extracted && extracted >= 1) {
|
||||||
const looksTruncated =
|
const looksTruncated = extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
|
||||||
extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48
|
|
||||||
|
|
||||||
if (!looksTruncated) {
|
if (!looksTruncated) {
|
||||||
ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`);
|
ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`);
|
||||||
return extracted;
|
return extracted;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.logger.warn(
|
ctx.logger.warn(
|
||||||
`${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe`
|
`${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to probing if pagination parse fails
|
// Fallback to probing if pagination parse fails
|
||||||
const g = Math.max(2, guess);
|
const g = Math.max(2, guess);
|
||||||
const pg = await probePage(ctx, baseUrl, g, state);
|
const pg = await probePage(ctx, baseUrl, g, state);
|
||||||
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
|
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
|
||||||
|
|
||||||
let lastOk = g;
|
let lastOk = g;
|
||||||
while (true) {
|
while (true) {
|
||||||
const probe = lastOk + step;
|
const probe = lastOk + step;
|
||||||
const pr = await probePage(ctx, baseUrl, probe, state);
|
const pr = await probePage(ctx, baseUrl, probe, state);
|
||||||
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
|
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
|
||||||
lastOk = probe;
|
lastOk = probe;
|
||||||
if (lastOk > 5000) {
|
if (lastOk > 5000) {
|
||||||
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`);
|
ctx.logger.warn(
|
||||||
return lastOk;
|
`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`,
|
||||||
}
|
);
|
||||||
}
|
return lastOk;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function discoverAndScanCategory(ctx, prevDb, report) {
|
async function discoverAndScanCategory(ctx, prevDb, report) {
|
||||||
const { logger, config } = ctx;
|
const { logger, config } = ctx;
|
||||||
|
|
||||||
if (typeof ctx.store.scanCategory === "function") {
|
if (typeof ctx.store.scanCategory === "function") {
|
||||||
await ctx.store.scanCategory(ctx, prevDb, report);
|
await ctx.store.scanCategory(ctx, prevDb, report);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const t0 = Date.now();
|
const t0 = Date.now();
|
||||||
|
|
||||||
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
|
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
|
||||||
const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep;
|
const step = Number.isFinite(ctx.cat.discoveryStep) ? ctx.cat.discoveryStep : config.discoveryStep;
|
||||||
|
|
||||||
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
|
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
|
||||||
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
|
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
|
||||||
|
|
||||||
logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
logger.ok(
|
||||||
|
`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`,
|
||||||
|
);
|
||||||
|
|
||||||
const pages = [];
|
const pages = [];
|
||||||
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
|
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
|
||||||
|
|
||||||
let donePages = 0;
|
let donePages = 0;
|
||||||
|
|
||||||
const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency;
|
const pageConc = Number.isFinite(ctx.cat.pageConcurrency) ? ctx.cat.pageConcurrency : config.concurrency;
|
||||||
const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs;
|
const pageStagger = Number.isFinite(ctx.cat.pageStaggerMs) ? ctx.cat.pageStaggerMs : config.staggerMs;
|
||||||
|
|
||||||
const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => {
|
const perPageItems = await parallelMapStaggered(pages, pageConc, pageStagger, async (pageUrl, idx) => {
|
||||||
const pnum = idx + 1;
|
const pnum = idx + 1;
|
||||||
|
|
||||||
const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry(
|
const {
|
||||||
pageUrl,
|
text: html,
|
||||||
`page:${ctx.store.key}:${ctx.cat.key}:${pnum}`,
|
ms,
|
||||||
ctx.store.ua
|
bytes,
|
||||||
);
|
status,
|
||||||
|
finalUrl,
|
||||||
|
} = await ctx.http.fetchTextWithRetry(pageUrl, `page:${ctx.store.key}:${ctx.cat.key}:${pnum}`, ctx.store.ua);
|
||||||
|
|
||||||
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
||||||
const itemsRaw = parser(html, ctx, finalUrl);
|
const itemsRaw = parser(html, ctx, finalUrl);
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
for (const it of itemsRaw) {
|
for (const it of itemsRaw) {
|
||||||
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
|
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
donePages++;
|
donePages++;
|
||||||
logProgressLine(
|
logProgressLine(
|
||||||
logger,
|
logger,
|
||||||
ctx,
|
ctx,
|
||||||
`Page ${pageStr(pnum, pages.length)}`,
|
`Page ${pageStr(pnum, pages.length)}`,
|
||||||
status ? String(status) : "",
|
status ? String(status) : "",
|
||||||
status >= 200 && status < 400,
|
status >= 200 && status < 400,
|
||||||
pctStr(donePages, pages.length),
|
pctStr(donePages, pages.length),
|
||||||
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
|
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
});
|
});
|
||||||
|
|
||||||
const discovered = new Map();
|
const discovered = new Map();
|
||||||
let dups = 0;
|
let dups = 0;
|
||||||
for (const arr of perPageItems) {
|
for (const arr of perPageItems) {
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (discovered.has(it.url)) dups++;
|
if (discovered.has(it.url)) dups++;
|
||||||
discovered.set(it.url, it);
|
discovered.set(it.url, it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof ctx.store.repairDiscoveredItems === "function") {
|
if (typeof ctx.store.repairDiscoveredItems === "function") {
|
||||||
await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb);
|
await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
||||||
|
|
||||||
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } =
|
const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb(
|
||||||
mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
prevDb,
|
||||||
|
discovered,
|
||||||
|
{ storeLabel: ctx.store.name },
|
||||||
|
);
|
||||||
|
|
||||||
const dbObj = buildDbObject(ctx, merged);
|
const dbObj = buildDbObject(ctx, merged);
|
||||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||||
|
|
||||||
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||||
|
|
||||||
const elapsed = Date.now() - t0;
|
const elapsed = Date.now() - t0;
|
||||||
logger.ok(
|
logger.ok(
|
||||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
report.categories.push({
|
report.categories.push({
|
||||||
store: ctx.store.name,
|
store: ctx.store.name,
|
||||||
label: ctx.cat.label,
|
label: ctx.cat.label,
|
||||||
key: ctx.cat.key,
|
key: ctx.cat.key,
|
||||||
dbFile: ctx.dbFile,
|
dbFile: ctx.dbFile,
|
||||||
scannedPages: scanPages,
|
scannedPages: scanPages,
|
||||||
discoveredUnique: discovered.size,
|
discoveredUnique: discovered.size,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
metaChangedCount: metaChangedItems.length,
|
metaChangedCount: metaChangedItems.length,
|
||||||
elapsedMs: elapsed,
|
elapsedMs: elapsed,
|
||||||
});
|
});
|
||||||
report.totals.newCount += newItems.length;
|
report.totals.newCount += newItems.length;
|
||||||
report.totals.updatedCount += updatedItems.length;
|
report.totals.updatedCount += updatedItems.length;
|
||||||
report.totals.removedCount += removedItems.length;
|
report.totals.removedCount += removedItems.length;
|
||||||
report.totals.restoredCount += restoredItems.length;
|
report.totals.restoredCount += restoredItems.length;
|
||||||
report.totals.metaChangedCount += metaChangedItems.length;
|
report.totals.metaChangedCount += metaChangedItems.length;
|
||||||
|
|
||||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
addCategoryResultToReport(
|
||||||
|
report,
|
||||||
|
ctx.store.name,
|
||||||
|
ctx.cat.label,
|
||||||
|
newItems,
|
||||||
|
updatedItems,
|
||||||
|
removedItems,
|
||||||
|
restoredItems,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };
|
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };
|
||||||
|
|
|
||||||
|
|
@ -8,87 +8,87 @@ const { normalizeSkuKey } = require("../utils/sku");
|
||||||
const { priceToNumber } = require("../utils/price");
|
const { priceToNumber } = require("../utils/price");
|
||||||
|
|
||||||
function ensureDir(dir) {
|
function ensureDir(dir) {
|
||||||
fs.mkdirSync(dir, { recursive: true });
|
fs.mkdirSync(dir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
function dbPathFor(key, baseUrl, dbDir) {
|
function dbPathFor(key, baseUrl, dbDir) {
|
||||||
ensureDir(dbDir);
|
ensureDir(dbDir);
|
||||||
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
|
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
|
||||||
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
|
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
|
||||||
return path.join(dbDir, `${safeKey}__${hash}.json`);
|
return path.join(dbDir, `${safeKey}__${hash}.json`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function readDb(file) {
|
function readDb(file) {
|
||||||
const byUrl = new Map();
|
const byUrl = new Map();
|
||||||
try {
|
try {
|
||||||
const txt = fs.readFileSync(file, "utf8");
|
const txt = fs.readFileSync(file, "utf8");
|
||||||
const obj = JSON.parse(txt);
|
const obj = JSON.parse(txt);
|
||||||
if (obj && Array.isArray(obj.items)) {
|
if (obj && Array.isArray(obj.items)) {
|
||||||
for (const it of obj.items) {
|
for (const it of obj.items) {
|
||||||
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
|
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
|
||||||
byUrl.set(it.url, {
|
byUrl.set(it.url, {
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
sku: String(it.sku || ""),
|
sku: String(it.sku || ""),
|
||||||
url: it.url,
|
url: it.url,
|
||||||
img: String(it.img || it.image || it.thumb || "").trim(),
|
img: String(it.img || it.image || it.thumb || "").trim(),
|
||||||
removed: Boolean(it.removed),
|
removed: Boolean(it.removed),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore missing or parse errors
|
// ignore missing or parse errors
|
||||||
}
|
}
|
||||||
return { byUrl };
|
return { byUrl };
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeJsonAtomic(file, obj) {
|
function writeJsonAtomic(file, obj) {
|
||||||
ensureDir(path.dirname(file));
|
ensureDir(path.dirname(file));
|
||||||
const tmp = `${file}.tmp`;
|
const tmp = `${file}.tmp`;
|
||||||
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
||||||
fs.renameSync(tmp, file);
|
fs.renameSync(tmp, file);
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildDbObject(ctx, merged) {
|
function buildDbObject(ctx, merged) {
|
||||||
const storeLabel = ctx?.store?.name || ctx?.store?.host || "";
|
const storeLabel = ctx?.store?.name || ctx?.store?.host || "";
|
||||||
|
|
||||||
return {
|
return {
|
||||||
version: 6,
|
version: 6,
|
||||||
store: ctx.store.host,
|
store: ctx.store.host,
|
||||||
storeLabel: ctx.store.name,
|
storeLabel: ctx.store.name,
|
||||||
category: ctx.cat.key,
|
category: ctx.cat.key,
|
||||||
categoryLabel: ctx.cat.label,
|
categoryLabel: ctx.cat.label,
|
||||||
source: ctx.baseUrl,
|
source: ctx.baseUrl,
|
||||||
updatedAt: new Date().toISOString(),
|
updatedAt: new Date().toISOString(),
|
||||||
count: merged.size,
|
count: merged.size,
|
||||||
items: [...merged.values()]
|
items: [...merged.values()]
|
||||||
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
|
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
|
||||||
.map((it) => ({
|
.map((it) => ({
|
||||||
name: it.name,
|
name: it.name,
|
||||||
price: it.price || "",
|
price: it.price || "",
|
||||||
// IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url)
|
// IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url)
|
||||||
sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "",
|
sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "",
|
||||||
url: it.url,
|
url: it.url,
|
||||||
img: String(it.img || "").trim(),
|
img: String(it.img || "").trim(),
|
||||||
removed: Boolean(it.removed),
|
removed: Boolean(it.removed),
|
||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function listDbFiles(dbDir) {
|
function listDbFiles(dbDir) {
|
||||||
const out = [];
|
const out = [];
|
||||||
try {
|
try {
|
||||||
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
|
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
|
||||||
if (!ent.isFile()) continue;
|
if (!ent.isFile()) continue;
|
||||||
const name = ent.name || "";
|
const name = ent.name || "";
|
||||||
if (!name.endsWith(".json")) continue;
|
if (!name.endsWith(".json")) continue;
|
||||||
out.push(path.join(dbDir, name));
|
out.push(path.join(dbDir, name));
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -96,42 +96,43 @@ function listDbFiles(dbDir) {
|
||||||
* but DB rows remain raw/mined skuKey.
|
* but DB rows remain raw/mined skuKey.
|
||||||
*/
|
*/
|
||||||
function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) {
|
function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) {
|
||||||
const cheapest = new Map(); // canonSku -> { storeLabel, priceNum }
|
const cheapest = new Map(); // canonSku -> { storeLabel, priceNum }
|
||||||
|
|
||||||
for (const file of listDbFiles(dbDir)) {
|
for (const file of listDbFiles(dbDir)) {
|
||||||
try {
|
try {
|
||||||
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
|
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
|
||||||
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
||||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||||
|
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (it?.removed) continue;
|
if (it?.removed) continue;
|
||||||
|
|
||||||
const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" });
|
const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" });
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const canon = skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
|
const canon =
|
||||||
|
skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
|
||||||
|
|
||||||
const p = priceToNumber(it?.price || "");
|
const p = priceToNumber(it?.price || "");
|
||||||
if (!Number.isFinite(p) || p <= 0) continue;
|
if (!Number.isFinite(p) || p <= 0) continue;
|
||||||
|
|
||||||
const prev = cheapest.get(canon);
|
const prev = cheapest.get(canon);
|
||||||
if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p });
|
if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p });
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore parse errors
|
// ignore parse errors
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return cheapest;
|
return cheapest;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
ensureDir,
|
ensureDir,
|
||||||
dbPathFor,
|
dbPathFor,
|
||||||
readDb,
|
readDb,
|
||||||
writeJsonAtomic,
|
writeJsonAtomic,
|
||||||
buildDbObject,
|
buildDbObject,
|
||||||
listDbFiles,
|
listDbFiles,
|
||||||
buildCheapestSkuIndexFromAllDbs,
|
buildCheapestSkuIndexFromAllDbs,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -5,207 +5,206 @@ const { normalizeSkuKey, normalizeCspc, pickBetterSku } = require("../utils/sku"
|
||||||
const { normPrice } = require("../utils/price");
|
const { normPrice } = require("../utils/price");
|
||||||
|
|
||||||
function normImg(v) {
|
function normImg(v) {
|
||||||
const s = String(v || "").trim();
|
const s = String(v || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
if (/^data:/i.test(s)) return "";
|
if (/^data:/i.test(s)) return "";
|
||||||
if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs
|
if (/%7Bwidth%7D|\{width\}/i.test(s)) return ""; // drop Shopify width-template URLs
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function dbStoreLabel(prevDb) {
|
function dbStoreLabel(prevDb) {
|
||||||
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
|
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
|
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
|
||||||
if (!effectiveStoreLabel) {
|
if (!effectiveStoreLabel) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'"
|
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeSkuForDb(raw, url) {
|
function normalizeSkuForDb(raw, url) {
|
||||||
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
|
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
|
||||||
}
|
}
|
||||||
|
|
||||||
const merged = new Map(prevDb.byUrl);
|
const merged = new Map(prevDb.byUrl);
|
||||||
|
|
||||||
const newItems = [];
|
const newItems = [];
|
||||||
const updatedItems = [];
|
const updatedItems = [];
|
||||||
const removedItems = [];
|
const removedItems = [];
|
||||||
const restoredItems = [];
|
const restoredItems = [];
|
||||||
const metaChangedItems = [];
|
const metaChangedItems = [];
|
||||||
|
|
||||||
// Choose a deterministic "best" record among dup active SKU rows.
|
// Choose a deterministic "best" record among dup active SKU rows.
|
||||||
// Prefer: more complete fields, then lexicographically smallest URL.
|
// Prefer: more complete fields, then lexicographically smallest URL.
|
||||||
function scoreItem(it) {
|
function scoreItem(it) {
|
||||||
if (!it) return 0;
|
if (!it) return 0;
|
||||||
const name = String(it.name || "").trim();
|
const name = String(it.name || "").trim();
|
||||||
const price = String(it.price || "").trim();
|
const price = String(it.price || "").trim();
|
||||||
const url = String(it.url || "").trim();
|
const url = String(it.url || "").trim();
|
||||||
const img = String(it.img || "").trim();
|
const img = String(it.img || "").trim();
|
||||||
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
|
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
|
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
|
||||||
const sa = scoreItem(a);
|
const sa = scoreItem(a);
|
||||||
const sb = scoreItem(b);
|
const sb = scoreItem(b);
|
||||||
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
|
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
|
||||||
// tie-breaker: stable + deterministic
|
// tie-breaker: stable + deterministic
|
||||||
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
|
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
|
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
|
||||||
// Also track *all* urls per skuKey to cleanup dupes.
|
// Also track *all* urls per skuKey to cleanup dupes.
|
||||||
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
|
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
|
||||||
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
|
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
|
||||||
|
|
||||||
for (const [url, it] of prevDb.byUrl.entries()) {
|
for (const [url, it] of prevDb.byUrl.entries()) {
|
||||||
if (!it || it.removed) continue;
|
if (!it || it.removed) continue;
|
||||||
|
|
||||||
const skuKey = normalizeSkuForDb(it.sku, url);
|
const skuKey = normalizeSkuForDb(it.sku, url);
|
||||||
if (!skuKey || /^u:/i.test(skuKey)) continue;
|
if (!skuKey || /^u:/i.test(skuKey)) continue;
|
||||||
|
|
||||||
let set = prevUrlsBySkuKey.get(skuKey);
|
let set = prevUrlsBySkuKey.get(skuKey);
|
||||||
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
|
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
|
||||||
set.add(url);
|
set.add(url);
|
||||||
|
|
||||||
const cur = prevBySkuKey.get(skuKey);
|
const cur = prevBySkuKey.get(skuKey);
|
||||||
const next = { url, item: it };
|
const next = { url, item: it };
|
||||||
if (!cur) prevBySkuKey.set(skuKey, next);
|
if (!cur) prevBySkuKey.set(skuKey, next);
|
||||||
else prevBySkuKey.set(skuKey, pickBetter(cur, next));
|
else prevBySkuKey.set(skuKey, pickBetter(cur, next));
|
||||||
}
|
}
|
||||||
|
|
||||||
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
|
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
|
||||||
|
|
||||||
for (const [url, nowRaw] of discovered.entries()) {
|
for (const [url, nowRaw] of discovered.entries()) {
|
||||||
let prev = prevDb.byUrl.get(url);
|
let prev = prevDb.byUrl.get(url);
|
||||||
let prevUrlForThisItem = url;
|
let prevUrlForThisItem = url;
|
||||||
|
|
||||||
// URL not found in previous DB: try to match by non-synthetic skuKey.
|
// URL not found in previous DB: try to match by non-synthetic skuKey.
|
||||||
if (!prev) {
|
if (!prev) {
|
||||||
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
|
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
|
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
|
||||||
const hit = prevBySkuKey.get(nowSkuKey);
|
const hit = prevBySkuKey.get(nowSkuKey);
|
||||||
if (hit && hit.url && hit.url !== url) {
|
if (hit && hit.url && hit.url !== url) {
|
||||||
prev = hit.item;
|
prev = hit.item;
|
||||||
prevUrlForThisItem = hit.url;
|
prevUrlForThisItem = hit.url;
|
||||||
|
|
||||||
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
|
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
|
||||||
const allOld = prevUrlsBySkuKey.get(nowSkuKey);
|
const allOld = prevUrlsBySkuKey.get(nowSkuKey);
|
||||||
if (allOld) {
|
if (allOld) {
|
||||||
for (const u of allOld) matchedPrevUrls.add(u);
|
for (const u of allOld) matchedPrevUrls.add(u);
|
||||||
} else {
|
} else {
|
||||||
matchedPrevUrls.add(hit.url);
|
matchedPrevUrls.add(hit.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cleanup: remove any existing active duplicates for this skuKey from the merged map.
|
// Cleanup: remove any existing active duplicates for this skuKey from the merged map.
|
||||||
// We'll re-add the chosen record at the new URL below.
|
// We'll re-add the chosen record at the new URL below.
|
||||||
if (allOld) {
|
if (allOld) {
|
||||||
for (const u of allOld) {
|
for (const u of allOld) {
|
||||||
if (u !== url && merged.has(u)) merged.delete(u);
|
if (u !== url && merged.has(u)) merged.delete(u);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (merged.has(hit.url)) merged.delete(hit.url);
|
if (merged.has(hit.url)) merged.delete(hit.url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truly new (no URL match, no skuKey match)
|
// Truly new (no URL match, no skuKey match)
|
||||||
if (!prev) {
|
if (!prev) {
|
||||||
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
|
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
const now = {
|
const now = {
|
||||||
...nowRaw,
|
...nowRaw,
|
||||||
sku: nowSku,
|
sku: nowSku,
|
||||||
img: normImg(nowRaw.img),
|
img: normImg(nowRaw.img),
|
||||||
removed: false,
|
removed: false,
|
||||||
};
|
};
|
||||||
newItems.push(now);
|
newItems.push(now);
|
||||||
merged.set(url, now);
|
merged.set(url, now);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
||||||
if (prevUrlForThisItem === url && prev.removed) {
|
if (prevUrlForThisItem === url && prev.removed) {
|
||||||
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
||||||
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
||||||
|
|
||||||
const now = {
|
const now = {
|
||||||
...nowRaw,
|
...nowRaw,
|
||||||
sku: nowSku,
|
sku: nowSku,
|
||||||
img: normImg(nowRaw.img) || normImg(prev.img),
|
img: normImg(nowRaw.img) || normImg(prev.img),
|
||||||
removed: false,
|
removed: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
restoredItems.push({
|
restoredItems.push({
|
||||||
url,
|
url,
|
||||||
name: now.name || prev.name || "",
|
name: now.name || prev.name || "",
|
||||||
price: now.price || prev.price || "",
|
price: now.price || prev.price || "",
|
||||||
sku: now.sku || "",
|
sku: now.sku || "",
|
||||||
});
|
});
|
||||||
|
|
||||||
merged.set(url, now);
|
merged.set(url, now);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
|
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
|
||||||
const prevPrice = normPrice(prev.price);
|
const prevPrice = normPrice(prev.price);
|
||||||
const nowPrice = normPrice(nowRaw.price);
|
const nowPrice = normPrice(nowRaw.price);
|
||||||
|
|
||||||
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
||||||
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
||||||
|
|
||||||
const prevImg = normImg(prev.img);
|
const prevImg = normImg(prev.img);
|
||||||
let nowImg = normImg(nowRaw.img);
|
let nowImg = normImg(nowRaw.img);
|
||||||
if (!nowImg) nowImg = prevImg;
|
if (!nowImg) nowImg = prevImg;
|
||||||
|
|
||||||
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
|
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
|
||||||
const priceChanged = prevPrice !== nowPrice;
|
const priceChanged = prevPrice !== nowPrice;
|
||||||
const skuChanged = prevSku !== nowSku;
|
const skuChanged = prevSku !== nowSku;
|
||||||
const imgChanged = prevImg !== nowImg;
|
const imgChanged = prevImg !== nowImg;
|
||||||
|
|
||||||
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
|
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
|
||||||
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
|
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (priceChanged) {
|
if (priceChanged) {
|
||||||
updatedItems.push({
|
updatedItems.push({
|
||||||
url,
|
url,
|
||||||
name: nowRaw.name || prev.name || "",
|
name: nowRaw.name || prev.name || "",
|
||||||
sku: nowSku || "",
|
sku: nowSku || "",
|
||||||
oldPrice: prev.price || "",
|
oldPrice: prev.price || "",
|
||||||
newPrice: nowRaw.price || "",
|
newPrice: nowRaw.price || "",
|
||||||
});
|
});
|
||||||
} else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
|
} else if (nameChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
|
||||||
// Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful.
|
// Count non-price changes (SKU upgrades, name/img changes, or URL moves) as meaningful.
|
||||||
metaChangedItems.push({
|
metaChangedItems.push({
|
||||||
url,
|
url,
|
||||||
name: nowRaw.name || prev.name || "",
|
name: nowRaw.name || prev.name || "",
|
||||||
sku: nowSku || "",
|
sku: nowSku || "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const [url, prev] of prevDb.byUrl.entries()) {
|
for (const [url, prev] of prevDb.byUrl.entries()) {
|
||||||
if (discovered.has(url)) continue;
|
if (discovered.has(url)) continue;
|
||||||
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
|
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
|
||||||
if (!prev.removed) {
|
if (!prev.removed) {
|
||||||
const removed = { ...prev, removed: true };
|
const removed = { ...prev, removed: true };
|
||||||
merged.set(url, removed);
|
merged.set(url, removed);
|
||||||
removedItems.push({
|
removedItems.push({
|
||||||
url,
|
url,
|
||||||
name: prev.name || "",
|
name: prev.name || "",
|
||||||
price: prev.price || "",
|
price: prev.price || "",
|
||||||
sku: normalizeCspc(prev.sku) || "",
|
sku: normalizeCspc(prev.sku) || "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems };
|
return { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems };
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { mergeDiscoveredIntoDb };
|
module.exports = { mergeDiscoveredIntoDb };
|
||||||
|
|
|
||||||
|
|
@ -8,247 +8,279 @@ const { buildCheapestSkuIndexFromAllDbs } = require("./db");
|
||||||
const { loadSkuMap } = require("../utils/sku_map");
|
const { loadSkuMap } = require("../utils/sku_map");
|
||||||
|
|
||||||
function secStr(ms) {
|
function secStr(ms) {
|
||||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||||
const tenths = Math.round(s * 10) / 10;
|
const tenths = Math.round(s * 10) / 10;
|
||||||
let out;
|
let out;
|
||||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||||
else out = `${Math.round(s)}s`;
|
else out = `${Math.round(s)}s`;
|
||||||
return out.padStart(7, " ");
|
return out.padStart(7, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function createReport() {
|
function createReport() {
|
||||||
return {
|
return {
|
||||||
startedAt: new Date(),
|
startedAt: new Date(),
|
||||||
categories: [],
|
categories: [],
|
||||||
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 },
|
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0, metaChangedCount: 0 },
|
||||||
newItems: [],
|
newItems: [],
|
||||||
updatedItems: [],
|
updatedItems: [],
|
||||||
removedItems: [],
|
removedItems: [],
|
||||||
restoredItems: [],
|
restoredItems: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
|
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
|
||||||
const reportCatLabel = `${storeName} | ${catLabel}`;
|
const reportCatLabel = `${storeName} | ${catLabel}`;
|
||||||
|
|
||||||
for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
for (const it of newItems)
|
||||||
|
report.newItems.push({
|
||||||
|
catLabel: reportCatLabel,
|
||||||
|
name: it.name,
|
||||||
|
price: it.price || "",
|
||||||
|
sku: it.sku || "",
|
||||||
|
url: it.url,
|
||||||
|
});
|
||||||
|
|
||||||
for (const it of restoredItems)
|
for (const it of restoredItems)
|
||||||
report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
report.restoredItems.push({
|
||||||
|
catLabel: reportCatLabel,
|
||||||
|
name: it.name,
|
||||||
|
price: it.price || "",
|
||||||
|
sku: it.sku || "",
|
||||||
|
url: it.url,
|
||||||
|
});
|
||||||
|
|
||||||
for (const u of updatedItems) {
|
for (const u of updatedItems) {
|
||||||
report.updatedItems.push({
|
report.updatedItems.push({
|
||||||
catLabel: reportCatLabel,
|
catLabel: reportCatLabel,
|
||||||
name: u.name,
|
name: u.name,
|
||||||
sku: u.sku || "",
|
sku: u.sku || "",
|
||||||
oldPrice: u.oldPrice,
|
oldPrice: u.oldPrice,
|
||||||
newPrice: u.newPrice,
|
newPrice: u.newPrice,
|
||||||
url: u.url,
|
url: u.url,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of removedItems)
|
for (const it of removedItems)
|
||||||
report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
report.removedItems.push({
|
||||||
|
catLabel: reportCatLabel,
|
||||||
|
name: it.name,
|
||||||
|
price: it.price || "",
|
||||||
|
sku: it.sku || "",
|
||||||
|
url: it.url,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
|
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
|
||||||
const paint = (s, code) => color(s, code, colorize);
|
const paint = (s, code) => color(s, code, colorize);
|
||||||
|
|
||||||
// Load mapping for comparisons only
|
// Load mapping for comparisons only
|
||||||
const skuMap = loadSkuMap({ dbDir });
|
const skuMap = loadSkuMap({ dbDir });
|
||||||
|
|
||||||
// Cheapest index is keyed by canonical sku (mapped)
|
// Cheapest index is keyed by canonical sku (mapped)
|
||||||
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap });
|
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap });
|
||||||
|
|
||||||
const endedAt = new Date();
|
const endedAt = new Date();
|
||||||
const durMs = endedAt - report.startedAt;
|
const durMs = endedAt - report.startedAt;
|
||||||
|
|
||||||
const storesSet = new Set(report.categories.map((c) => c.store));
|
const storesSet = new Set(report.categories.map((c) => c.store));
|
||||||
const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0);
|
const totalUnique = report.categories.reduce(
|
||||||
|
(acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0),
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
let out = "";
|
let out = "";
|
||||||
const ln = (s = "") => {
|
const ln = (s = "") => {
|
||||||
out += String(s) + "\n";
|
out += String(s) + "\n";
|
||||||
};
|
};
|
||||||
|
|
||||||
ln("");
|
ln("");
|
||||||
ln(paint("========== REPORT ==========", C.bold));
|
ln(paint("========== REPORT ==========", C.bold));
|
||||||
ln(
|
ln(
|
||||||
paint("[OK] ", C.green) +
|
paint("[OK] ", C.green) +
|
||||||
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
|
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
|
||||||
durMs
|
durMs,
|
||||||
)}`
|
)}`,
|
||||||
);
|
);
|
||||||
ln("");
|
ln("");
|
||||||
|
|
||||||
ln(paint("Per-category summary:", C.bold));
|
ln(paint("Per-category summary:", C.bold));
|
||||||
const rows = report.categories.map((c) => ({
|
const rows = report.categories.map((c) => ({
|
||||||
cat: `${c.store} | ${c.label}`,
|
cat: `${c.store} | ${c.label}`,
|
||||||
pages: c.scannedPages,
|
pages: c.scannedPages,
|
||||||
uniq: c.discoveredUnique,
|
uniq: c.discoveredUnique,
|
||||||
newC: c.newCount,
|
newC: c.newCount,
|
||||||
resC: c.restoredCount,
|
resC: c.restoredCount,
|
||||||
remC: c.removedCount,
|
remC: c.removedCount,
|
||||||
updC: c.updatedCount,
|
updC: c.updatedCount,
|
||||||
ms: c.elapsedMs,
|
ms: c.elapsedMs,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
|
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
|
||||||
ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`);
|
ln(
|
||||||
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
|
`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`,
|
||||||
for (const r of rows) {
|
);
|
||||||
ln(
|
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
|
||||||
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`
|
for (const r of rows) {
|
||||||
);
|
ln(
|
||||||
}
|
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`,
|
||||||
ln("");
|
);
|
||||||
|
}
|
||||||
|
ln("");
|
||||||
|
|
||||||
const reportLabelW = Math.max(
|
const reportLabelW = Math.max(
|
||||||
16,
|
16,
|
||||||
...report.newItems.map((x) => x.catLabel.length),
|
...report.newItems.map((x) => x.catLabel.length),
|
||||||
...report.restoredItems.map((x) => x.catLabel.length),
|
...report.restoredItems.map((x) => x.catLabel.length),
|
||||||
...report.updatedItems.map((x) => x.catLabel.length),
|
...report.updatedItems.map((x) => x.catLabel.length),
|
||||||
...report.removedItems.map((x) => x.catLabel.length)
|
...report.removedItems.map((x) => x.catLabel.length),
|
||||||
);
|
);
|
||||||
|
|
||||||
function storeFromCatLabel(catLabel) {
|
function storeFromCatLabel(catLabel) {
|
||||||
return String(catLabel || "").split(" | ")[0] || "";
|
return String(catLabel || "").split(" | ")[0] || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function skuInline(sku) {
|
function skuInline(sku) {
|
||||||
const s = normalizeCspc(sku);
|
const s = normalizeCspc(sku);
|
||||||
return s ? paint(` ${s}`, C.gray) : "";
|
return s ? paint(` ${s}`, C.gray) : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalKeyForReportItem(catLabel, skuRaw, url) {
|
function canonicalKeyForReportItem(catLabel, skuRaw, url) {
|
||||||
const storeLabel = storeFromCatLabel(catLabel);
|
const storeLabel = storeFromCatLabel(catLabel);
|
||||||
const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url });
|
const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url });
|
||||||
if (!skuKey) return "";
|
if (!skuKey) return "";
|
||||||
return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
|
return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) {
|
function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) {
|
||||||
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
|
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
|
||||||
if (!canon) return "";
|
if (!canon) return "";
|
||||||
|
|
||||||
const best = cheapestSku.get(canon);
|
const best = cheapestSku.get(canon);
|
||||||
if (!best || !best.storeLabel) return "";
|
if (!best || !best.storeLabel) return "";
|
||||||
|
|
||||||
const curStore = storeFromCatLabel(catLabel);
|
const curStore = storeFromCatLabel(catLabel);
|
||||||
if (!curStore || best.storeLabel === curStore) return "";
|
if (!curStore || best.storeLabel === curStore) return "";
|
||||||
|
|
||||||
const curP = priceToNumber(currentPriceStr);
|
const curP = priceToNumber(currentPriceStr);
|
||||||
if (!Number.isFinite(curP)) return "";
|
if (!Number.isFinite(curP)) return "";
|
||||||
if (best.priceNum >= curP) return "";
|
if (best.priceNum >= curP) return "";
|
||||||
|
|
||||||
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
|
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
|
||||||
}
|
}
|
||||||
|
|
||||||
function availableAtInline(catLabel, skuRaw, url) {
|
function availableAtInline(catLabel, skuRaw, url) {
|
||||||
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
|
const canon = canonicalKeyForReportItem(catLabel, skuRaw, url);
|
||||||
if (!canon) return "";
|
if (!canon) return "";
|
||||||
|
|
||||||
const best = cheapestSku.get(canon);
|
const best = cheapestSku.get(canon);
|
||||||
if (!best || !best.storeLabel) return "";
|
if (!best || !best.storeLabel) return "";
|
||||||
|
|
||||||
const curStore = storeFromCatLabel(catLabel);
|
const curStore = storeFromCatLabel(catLabel);
|
||||||
if (curStore && best.storeLabel === curStore) return "";
|
if (curStore && best.storeLabel === curStore) return "";
|
||||||
|
|
||||||
return paint(` (Available at ${best.storeLabel})`, C.gray);
|
return paint(` (Available at ${best.storeLabel})`, C.gray);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (report.newItems.length) {
|
if (report.newItems.length) {
|
||||||
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
|
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
|
||||||
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
const sku = String(it.sku || "");
|
const sku = String(it.sku || "");
|
||||||
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
|
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
|
||||||
ln(`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`);
|
ln(
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
|
||||||
}
|
);
|
||||||
ln("");
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
} else {
|
}
|
||||||
ln(paint("NEW LISTINGS (0)", C.bold));
|
ln("");
|
||||||
ln("");
|
} else {
|
||||||
}
|
ln(paint("NEW LISTINGS (0)", C.bold));
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (report.restoredItems.length) {
|
if (report.restoredItems.length) {
|
||||||
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
|
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
|
||||||
for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of report.restoredItems.sort((a, b) =>
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
|
||||||
const sku = String(it.sku || "");
|
)) {
|
||||||
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
ln(`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`);
|
const sku = String(it.sku || "");
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || "");
|
||||||
}
|
ln(
|
||||||
ln("");
|
`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`,
|
||||||
} else {
|
);
|
||||||
ln(paint("RESTORED (0)", C.bold));
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
ln("");
|
}
|
||||||
}
|
ln("");
|
||||||
|
} else {
|
||||||
|
ln(paint("RESTORED (0)", C.bold));
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (report.removedItems.length) {
|
if (report.removedItems.length) {
|
||||||
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
|
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
|
||||||
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
const sku = String(it.sku || "");
|
const sku = String(it.sku || "");
|
||||||
const availTag = availableAtInline(it.catLabel, sku, it.url);
|
const availTag = availableAtInline(it.catLabel, sku, it.url);
|
||||||
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`);
|
ln(
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`,
|
||||||
}
|
);
|
||||||
ln("");
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
} else {
|
}
|
||||||
ln(paint("REMOVED (0)", C.bold));
|
ln("");
|
||||||
ln("");
|
} else {
|
||||||
}
|
ln(paint("REMOVED (0)", C.bold));
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (report.updatedItems.length) {
|
if (report.updatedItems.length) {
|
||||||
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
|
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
|
||||||
|
|
||||||
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||||
const oldRaw = u.oldPrice || "";
|
const oldRaw = u.oldPrice || "";
|
||||||
const newRaw = u.newPrice || "";
|
const newRaw = u.newPrice || "";
|
||||||
|
|
||||||
const oldN = priceToNumber(oldRaw);
|
const oldN = priceToNumber(oldRaw);
|
||||||
const newN = priceToNumber(newRaw);
|
const newN = priceToNumber(newRaw);
|
||||||
|
|
||||||
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
||||||
|
|
||||||
let newP = newRaw ? newRaw : "(no price)";
|
let newP = newRaw ? newRaw : "(no price)";
|
||||||
let offTag = "";
|
let offTag = "";
|
||||||
|
|
||||||
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
||||||
if (newN > oldN) {
|
if (newN > oldN) {
|
||||||
newP = paint(newP, C.red); // increase
|
newP = paint(newP, C.red); // increase
|
||||||
} else if (newN < oldN) {
|
} else if (newN < oldN) {
|
||||||
newP = paint(newP, C.green); // decrease
|
newP = paint(newP, C.green); // decrease
|
||||||
const pct = salePctOff(oldRaw, newRaw);
|
const pct = salePctOff(oldRaw, newRaw);
|
||||||
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
||||||
} else {
|
} else {
|
||||||
newP = paint(newP, C.cyan);
|
newP = paint(newP, C.cyan);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newP = paint(newP, C.cyan);
|
newP = paint(newP, C.cyan);
|
||||||
}
|
}
|
||||||
|
|
||||||
const sku = String(u.sku || "");
|
const sku = String(u.sku || "");
|
||||||
const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || "");
|
const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || "");
|
||||||
|
|
||||||
ln(
|
ln(
|
||||||
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`
|
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`,
|
||||||
);
|
);
|
||||||
ln(` ${paint(u.url, C.dim)}`);
|
ln(` ${paint(u.url, C.dim)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
ln("");
|
ln("");
|
||||||
} else {
|
} else {
|
||||||
ln(paint("PRICE CHANGES (0)", C.bold));
|
ln(paint("PRICE CHANGES (0)", C.bold));
|
||||||
ln("");
|
ln("");
|
||||||
}
|
}
|
||||||
|
|
||||||
ln(paint("======== END REPORT ========", C.bold));
|
ln(paint("======== END REPORT ========", C.bold));
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { createReport, addCategoryResultToReport, renderFinalReport };
|
module.exports = { createReport, addCategoryResultToReport, renderFinalReport };
|
||||||
|
|
|
||||||
|
|
@ -3,96 +3,89 @@
|
||||||
const { createReport } = require("./report");
|
const { createReport } = require("./report");
|
||||||
const { setTimeout: sleep } = require("timers/promises");
|
const { setTimeout: sleep } = require("timers/promises");
|
||||||
|
|
||||||
const {
|
const { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory } = require("./category_scan");
|
||||||
makeCatPrefixers,
|
|
||||||
buildCategoryContext,
|
|
||||||
loadCategoryDb,
|
|
||||||
discoverAndScanCategory,
|
|
||||||
} = require("./category_scan");
|
|
||||||
|
|
||||||
// Some sites will intermittently 403/429. We don't want a single category/store
|
// Some sites will intermittently 403/429. We don't want a single category/store
|
||||||
// to abort the entire run. Log and continue.
|
// to abort the entire run. Log and continue.
|
||||||
function formatErr(e) {
|
function formatErr(e) {
|
||||||
if (!e) return "Unknown error";
|
if (!e) return "Unknown error";
|
||||||
if (typeof e === "string") return e;
|
if (typeof e === "string") return e;
|
||||||
if (e.stack) return e.stack;
|
if (e.stack) return e.stack;
|
||||||
return String(e);
|
return String(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runAllStores(stores, { config, logger, http }) {
|
async function runAllStores(stores, { config, logger, http }) {
|
||||||
const report = createReport();
|
const report = createReport();
|
||||||
const { catPrefixOut } = makeCatPrefixers(stores, logger);
|
const { catPrefixOut } = makeCatPrefixers(stores, logger);
|
||||||
|
|
||||||
logger.info(`Debug=on`);
|
logger.info(`Debug=on`);
|
||||||
logger.info(
|
logger.info(
|
||||||
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`
|
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`,
|
||||||
);
|
);
|
||||||
logger.info(
|
logger.info(`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`);
|
||||||
`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`
|
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
|
||||||
);
|
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
|
||||||
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
|
|
||||||
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
|
|
||||||
|
|
||||||
const workItems = [];
|
const workItems = [];
|
||||||
for (const store of stores) {
|
for (const store of stores) {
|
||||||
for (const cat of store.categories) {
|
for (const cat of store.categories) {
|
||||||
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
|
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
|
||||||
const ctx = { ...baseCtx, config, logger, http };
|
const ctx = { ...baseCtx, config, logger, http };
|
||||||
const prevDb = loadCategoryDb(logger, ctx);
|
const prevDb = loadCategoryDb(logger, ctx);
|
||||||
workItems.push({ ctx, prevDb });
|
workItems.push({ ctx, prevDb });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Host-level serialization: never run two categories from the same host concurrently.
|
// Host-level serialization: never run two categories from the same host concurrently.
|
||||||
const maxWorkers = Math.min(config.categoryConcurrency, workItems.length);
|
const maxWorkers = Math.min(config.categoryConcurrency, workItems.length);
|
||||||
const queue = workItems.slice();
|
const queue = workItems.slice();
|
||||||
const inflightHosts = new Set();
|
const inflightHosts = new Set();
|
||||||
|
|
||||||
async function runOne(w) {
|
async function runOne(w) {
|
||||||
try {
|
try {
|
||||||
await discoverAndScanCategory(w.ctx, w.prevDb, report);
|
await discoverAndScanCategory(w.ctx, w.prevDb, report);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
|
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
|
||||||
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
|
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
|
||||||
|
|
||||||
// Keep it loud in logs, but do not fail the entire run.
|
// Keep it loud in logs, but do not fail the entire run.
|
||||||
logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`);
|
logger.warn(`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function worker() {
|
async function worker() {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (queue.length === 0) return;
|
if (queue.length === 0) return;
|
||||||
|
|
||||||
// Pick next item whose host isn't currently running.
|
// Pick next item whose host isn't currently running.
|
||||||
const idx = queue.findIndex((w) => {
|
const idx = queue.findIndex((w) => {
|
||||||
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
|
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
|
||||||
return host && !inflightHosts.has(host);
|
return host && !inflightHosts.has(host);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (idx === -1) {
|
if (idx === -1) {
|
||||||
// Nothing available right now; wait a bit.
|
// Nothing available right now; wait a bit.
|
||||||
await sleep(50);
|
await sleep(50);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const w = queue.splice(idx, 1)[0];
|
const w = queue.splice(idx, 1)[0];
|
||||||
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
|
const host = String(w?.ctx?.store?.host || w?.ctx?.store?.key || "");
|
||||||
|
|
||||||
inflightHosts.add(host);
|
inflightHosts.add(host);
|
||||||
try {
|
try {
|
||||||
await runOne(w);
|
await runOne(w);
|
||||||
} finally {
|
} finally {
|
||||||
inflightHosts.delete(host);
|
inflightHosts.delete(host);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const workers = [];
|
const workers = [];
|
||||||
for (let i = 0; i < maxWorkers; i++) workers.push(worker());
|
for (let i = 0; i < maxWorkers; i++) workers.push(worker());
|
||||||
await Promise.all(workers);
|
await Promise.all(workers);
|
||||||
|
|
||||||
return report;
|
return report;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { runAllStores };
|
module.exports = { runAllStores };
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,19 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const C = {
|
const C = {
|
||||||
reset: "\x1b[0m",
|
reset: "\x1b[0m",
|
||||||
dim: "\x1b[2m",
|
dim: "\x1b[2m",
|
||||||
bold: "\x1b[1m",
|
bold: "\x1b[1m",
|
||||||
red: "\x1b[31m",
|
red: "\x1b[31m",
|
||||||
green: "\x1b[32m",
|
green: "\x1b[32m",
|
||||||
yellow: "\x1b[33m",
|
yellow: "\x1b[33m",
|
||||||
cyan: "\x1b[36m",
|
cyan: "\x1b[36m",
|
||||||
gray: "\x1b[90m",
|
gray: "\x1b[90m",
|
||||||
};
|
};
|
||||||
|
|
||||||
function color(s, code, enabled) {
|
function color(s, code, enabled) {
|
||||||
if (!enabled) return String(s);
|
if (!enabled) return String(s);
|
||||||
return String(code || "") + String(s) + C.reset;
|
return String(code || "") + String(s) + C.reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { C, color };
|
module.exports = { C, color };
|
||||||
|
|
|
||||||
|
|
@ -1,86 +1,86 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function clampInt(v, def, min, max) {
|
function clampInt(v, def, min, max) {
|
||||||
if (def === null && (v === null || v === undefined)) return null;
|
if (def === null && (v === null || v === undefined)) return null;
|
||||||
const n = Number.parseInt(v ?? "", 10);
|
const n = Number.parseInt(v ?? "", 10);
|
||||||
if (!Number.isFinite(n)) return def;
|
if (!Number.isFinite(n)) return def;
|
||||||
return Math.max(min, Math.min(max, n));
|
return Math.max(min, Math.min(max, n));
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseArgs(argv) {
|
function parseArgs(argv) {
|
||||||
let debug = false;
|
let debug = false;
|
||||||
let maxPages = null;
|
let maxPages = null;
|
||||||
let concurrency = null;
|
let concurrency = null;
|
||||||
let staggerMs = null;
|
let staggerMs = null;
|
||||||
let guess = null;
|
let guess = null;
|
||||||
let step = null;
|
let step = null;
|
||||||
let dataDir = null;
|
let dataDir = null;
|
||||||
let reportDir = null;
|
let reportDir = null;
|
||||||
|
|
||||||
const positional = [];
|
const positional = [];
|
||||||
|
|
||||||
for (let i = 0; i < argv.length; i++) {
|
for (let i = 0; i < argv.length; i++) {
|
||||||
const a = argv[i];
|
const a = argv[i];
|
||||||
|
|
||||||
if (a === "--debug" || a === "-d") {
|
if (a === "--debug" || a === "-d") {
|
||||||
debug = true;
|
debug = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
maxPages = clampInt(argv[i + 1], null, 1, 5000);
|
maxPages = clampInt(argv[i + 1], null, 1, 5000);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
concurrency = clampInt(argv[i + 1], null, 1, 64);
|
concurrency = clampInt(argv[i + 1], null, 1, 64);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
|
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
guess = clampInt(argv[i + 1], null, 1, 5000);
|
guess = clampInt(argv[i + 1], null, 1, 5000);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
step = clampInt(argv[i + 1], null, 1, 500);
|
step = clampInt(argv[i + 1], null, 1, 500);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
dataDir = String(argv[i + 1]);
|
dataDir = String(argv[i + 1]);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
reportDir = String(argv[i + 1]);
|
reportDir = String(argv[i + 1]);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!String(a).startsWith("-")) positional.push(a);
|
if (!String(a).startsWith("-")) positional.push(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxPages === null) {
|
if (maxPages === null) {
|
||||||
const cand = positional.find((x) => /^\d+$/.test(String(x)));
|
const cand = positional.find((x) => /^\d+$/.test(String(x)));
|
||||||
if (cand) {
|
if (cand) {
|
||||||
const n = Number.parseInt(cand, 10);
|
const n = Number.parseInt(cand, 10);
|
||||||
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
|
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
|
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { clampInt, parseArgs };
|
module.exports = { clampInt, parseArgs };
|
||||||
|
|
|
||||||
|
|
@ -3,24 +3,24 @@
|
||||||
const { setTimeout: sleep } = require("timers/promises");
|
const { setTimeout: sleep } = require("timers/promises");
|
||||||
|
|
||||||
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
|
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
|
||||||
const out = new Array(arr.length);
|
const out = new Array(arr.length);
|
||||||
let next = 0;
|
let next = 0;
|
||||||
|
|
||||||
async function worker(workerId) {
|
async function worker(workerId) {
|
||||||
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
|
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
|
||||||
while (true) {
|
while (true) {
|
||||||
const i = next++;
|
const i = next++;
|
||||||
if (i >= arr.length) return;
|
if (i >= arr.length) return;
|
||||||
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
|
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
|
||||||
out[i] = await fn(arr[i], i);
|
out[i] = await fn(arr[i], i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const w = Math.min(concurrency, arr.length);
|
const w = Math.min(concurrency, arr.length);
|
||||||
const workers = [];
|
const workers = [];
|
||||||
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
|
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
|
||||||
await Promise.all(workers);
|
await Promise.all(workers);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { parallelMapStaggered };
|
module.exports = { parallelMapStaggered };
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function humanBytes(n) {
|
function humanBytes(n) {
|
||||||
if (!Number.isFinite(n) || n <= 0) return "0B";
|
if (!Number.isFinite(n) || n <= 0) return "0B";
|
||||||
if (n < 1024) return `${n}B`;
|
if (n < 1024) return `${n}B`;
|
||||||
const kb = n / 1024;
|
const kb = n / 1024;
|
||||||
if (kb < 1024) return `${kb.toFixed(1)}KB`;
|
if (kb < 1024) return `${kb.toFixed(1)}KB`;
|
||||||
const mb = kb / 1024;
|
const mb = kb / 1024;
|
||||||
return `${mb.toFixed(1)}MB`;
|
return `${mb.toFixed(1)}MB`;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { humanBytes };
|
module.exports = { humanBytes };
|
||||||
|
|
|
||||||
|
|
@ -1,141 +1,137 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function stripTags(s) {
|
function stripTags(s) {
|
||||||
return String(s).replace(/<[^>]*>/g, "");
|
return String(s).replace(/<[^>]*>/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanText(s) {
|
function cleanText(s) {
|
||||||
return String(s)
|
return String(s)
|
||||||
.replace(/<[^>]+>/g, " ")
|
.replace(/<[^>]+>/g, " ")
|
||||||
.replace(/\s+/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function decodeHtml(s) {
|
function decodeHtml(s) {
|
||||||
return String(s)
|
return String(s)
|
||||||
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
||||||
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
|
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
|
||||||
.replace(/&/g, "&")
|
.replace(/&/g, "&")
|
||||||
.replace(/"/g, '"')
|
.replace(/"/g, '"')
|
||||||
.replace(/'/g, "'")
|
.replace(/'/g, "'")
|
||||||
.replace(/'/g, "'")
|
.replace(/'/g, "'")
|
||||||
.replace(/</g, "<")
|
.replace(/</g, "<")
|
||||||
.replace(/>/g, ">")
|
.replace(/>/g, ">")
|
||||||
.replace(/ /g, " ")
|
.replace(/ /g, " ")
|
||||||
.replace(/«/g, "«")
|
.replace(/«/g, "«")
|
||||||
.replace(/»/g, "»");
|
.replace(/»/g, "»");
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeRe(s) {
|
function escapeRe(s) {
|
||||||
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractHtmlAttr(html, attrName) {
|
function extractHtmlAttr(html, attrName) {
|
||||||
const re = new RegExp(
|
const re = new RegExp(`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`, "i");
|
||||||
`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`,
|
const m = re.exec(html);
|
||||||
"i"
|
if (!m) return "";
|
||||||
);
|
return m[1] ?? m[2] ?? m[3] ?? "";
|
||||||
const m = re.exec(html);
|
|
||||||
if (!m) return "";
|
|
||||||
return m[1] ?? m[2] ?? m[3] ?? "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickFirstUrlFromSrcset(srcset) {
|
function pickFirstUrlFromSrcset(srcset) {
|
||||||
const s = String(srcset || "").trim();
|
const s = String(srcset || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
const first = (s.split(",")[0] || "").trim();
|
const first = (s.split(",")[0] || "").trim();
|
||||||
const url = (first.split(/\s+/)[0] || "").trim();
|
const url = (first.split(/\s+/)[0] || "").trim();
|
||||||
return url.replace(/^["']|["']$/g, "");
|
return url.replace(/^["']|["']$/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeMaybeRelativeUrl(raw, baseUrl) {
|
function normalizeMaybeRelativeUrl(raw, baseUrl) {
|
||||||
const r = String(raw || "").trim();
|
const r = String(raw || "").trim();
|
||||||
if (!r) return "";
|
if (!r) return "";
|
||||||
let u = r;
|
let u = r;
|
||||||
if (u.startsWith("//")) u = `https:${u}`;
|
if (u.startsWith("//")) u = `https:${u}`;
|
||||||
try {
|
try {
|
||||||
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
|
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
|
||||||
} catch {
|
} catch {
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function resolveShopifyWidthPlaceholder(url, tag) {
|
function resolveShopifyWidthPlaceholder(url, tag) {
|
||||||
const s = String(url || "");
|
const s = String(url || "");
|
||||||
if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s;
|
if (!/%7Bwidth%7D|\{width\}/i.test(s)) return s;
|
||||||
|
|
||||||
// Pick a reasonable width from data-widths if available
|
// Pick a reasonable width from data-widths if available
|
||||||
let w = 400;
|
let w = 400;
|
||||||
const dw = extractHtmlAttr(tag, "data-widths");
|
const dw = extractHtmlAttr(tag, "data-widths");
|
||||||
if (dw) {
|
if (dw) {
|
||||||
try {
|
try {
|
||||||
const arr = JSON.parse(dw);
|
const arr = JSON.parse(dw);
|
||||||
if (Array.isArray(arr) && arr.length) {
|
if (Array.isArray(arr) && arr.length) {
|
||||||
if (arr.includes(400)) w = 400;
|
if (arr.includes(400)) w = 400;
|
||||||
else if (arr.includes(360)) w = 360;
|
else if (arr.includes(360)) w = 360;
|
||||||
else w = arr[0];
|
else w = arr[0];
|
||||||
}
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s
|
||||||
.replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`)
|
.replace(/_%7Bwidth%7D(x)/gi, `_${w}$1`)
|
||||||
.replace(/_\{width\}(x)/gi, `_${w}$1`)
|
.replace(/_\{width\}(x)/gi, `_${w}$1`)
|
||||||
.replace(/%7Bwidth%7D/gi, String(w))
|
.replace(/%7Bwidth%7D/gi, String(w))
|
||||||
.replace(/\{width\}/gi, String(w));
|
.replace(/\{width\}/gi, String(w));
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractFirstImgUrl(html, baseUrl) {
|
function extractFirstImgUrl(html, baseUrl) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const m = s.match(/<img\b[^>]*>/i);
|
const m = s.match(/<img\b[^>]*>/i);
|
||||||
if (!m) return "";
|
if (!m) return "";
|
||||||
|
|
||||||
const tag = m[0];
|
const tag = m[0];
|
||||||
|
|
||||||
const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"];
|
const attrs = ["data-src", "data-lazy-src", "data-original", "data-srcset", "srcset", "src"];
|
||||||
|
|
||||||
for (const a of attrs) {
|
for (const a of attrs) {
|
||||||
let v = extractHtmlAttr(tag, a);
|
let v = extractHtmlAttr(tag, a);
|
||||||
if (!v) continue;
|
if (!v) continue;
|
||||||
|
|
||||||
v = decodeHtml(String(v)).trim();
|
v = decodeHtml(String(v)).trim();
|
||||||
if (!v) continue;
|
if (!v) continue;
|
||||||
|
|
||||||
const isSrcset = a.toLowerCase().includes("srcset");
|
const isSrcset = a.toLowerCase().includes("srcset");
|
||||||
if (isSrcset) v = pickFirstUrlFromSrcset(v);
|
if (isSrcset) v = pickFirstUrlFromSrcset(v);
|
||||||
v = String(v || "").trim();
|
v = String(v || "").trim();
|
||||||
if (!v) continue;
|
if (!v) continue;
|
||||||
|
|
||||||
if (/^data:/i.test(v)) continue;
|
if (/^data:/i.test(v)) continue;
|
||||||
|
|
||||||
// If this attr is a template URL, prefer trying srcset next
|
// If this attr is a template URL, prefer trying srcset next
|
||||||
if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue;
|
if (!isSrcset && /%7Bwidth%7D|\{width\}/i.test(v)) continue;
|
||||||
|
|
||||||
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
|
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
|
||||||
abs = resolveShopifyWidthPlaceholder(abs, tag);
|
abs = resolveShopifyWidthPlaceholder(abs, tag);
|
||||||
if (abs) return abs;
|
if (abs) return abs;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: accept template URLs but force a width
|
// Fallback: accept template URLs but force a width
|
||||||
for (const a of ["data-src", "src"]) {
|
for (const a of ["data-src", "src"]) {
|
||||||
let v = extractHtmlAttr(tag, a);
|
let v = extractHtmlAttr(tag, a);
|
||||||
if (!v) continue;
|
if (!v) continue;
|
||||||
v = decodeHtml(String(v)).trim();
|
v = decodeHtml(String(v)).trim();
|
||||||
if (!v || /^data:/i.test(v)) continue;
|
if (!v || /^data:/i.test(v)) continue;
|
||||||
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
|
let abs = normalizeMaybeRelativeUrl(v, baseUrl);
|
||||||
abs = resolveShopifyWidthPlaceholder(abs, tag);
|
abs = resolveShopifyWidthPlaceholder(abs, tag);
|
||||||
if (abs) return abs;
|
if (abs) return abs;
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
stripTags,
|
stripTags,
|
||||||
cleanText,
|
cleanText,
|
||||||
decodeHtml,
|
decodeHtml,
|
||||||
escapeRe,
|
escapeRe,
|
||||||
extractHtmlAttr,
|
extractHtmlAttr,
|
||||||
extractFirstImgUrl,
|
extractFirstImgUrl,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,23 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function normPrice(p) {
|
function normPrice(p) {
|
||||||
return String(p || "").trim().replace(/\s+/g, "");
|
return String(p || "")
|
||||||
|
.trim()
|
||||||
|
.replace(/\s+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function priceToNumber(p) {
|
function priceToNumber(p) {
|
||||||
const s = String(p || "");
|
const s = String(p || "");
|
||||||
const n = Number(s.replace(/[^0-9.]/g, ""));
|
const n = Number(s.replace(/[^0-9.]/g, ""));
|
||||||
return Number.isFinite(n) ? n : NaN;
|
return Number.isFinite(n) ? n : NaN;
|
||||||
}
|
}
|
||||||
|
|
||||||
function salePctOff(oldPriceStr, newPriceStr) {
|
function salePctOff(oldPriceStr, newPriceStr) {
|
||||||
const oldN = priceToNumber(oldPriceStr);
|
const oldN = priceToNumber(oldPriceStr);
|
||||||
const newN = priceToNumber(newPriceStr);
|
const newN = priceToNumber(newPriceStr);
|
||||||
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
|
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
|
||||||
if (newN >= oldN) return null;
|
if (newN >= oldN) return null;
|
||||||
return Math.round(((oldN - newN) / oldN) * 100);
|
return Math.round(((oldN - newN) / oldN) * 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { normPrice, priceToNumber, salePctOff };
|
module.exports = { normPrice, priceToNumber, salePctOff };
|
||||||
|
|
|
||||||
117
src/utils/sku.js
117
src/utils/sku.js
|
|
@ -2,72 +2,71 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function fnv1a32(str) {
|
function fnv1a32(str) {
|
||||||
let h = 0x811c9dc5;
|
let h = 0x811c9dc5;
|
||||||
for (let i = 0; i < str.length; i++) {
|
for (let i = 0; i < str.length; i++) {
|
||||||
h ^= str.charCodeAt(i);
|
h ^= str.charCodeAt(i);
|
||||||
h = Math.imul(h, 0x01000193);
|
h = Math.imul(h, 0x01000193);
|
||||||
}
|
}
|
||||||
return (h >>> 0).toString(16).padStart(8, "0");
|
return (h >>> 0).toString(16).padStart(8, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
function idToCspc6(idDigits) {
|
function idToCspc6(idDigits) {
|
||||||
const s = String(idDigits || "").trim();
|
const s = String(idDigits || "").trim();
|
||||||
if (!/^\d{1,6}$/.test(s)) return "";
|
if (!/^\d{1,6}$/.test(s)) return "";
|
||||||
return s.padStart(6, "0");
|
return s.padStart(6, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function normalizeCspc(v) {
|
function normalizeCspc(v) {
|
||||||
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeUpcDigits(v) {
|
function normalizeUpcDigits(v) {
|
||||||
const m = String(v ?? "").match(/\b(\d{12,14})\b/);
|
const m = String(v ?? "").match(/\b(\d{12,14})\b/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHANGE: allow 1-11 digits so BCL 3-digit ids like id:141 are preserved
|
// CHANGE: allow 1-11 digits so BCL 3-digit ids like id:141 are preserved
|
||||||
function normalizeIdDigits(v) {
|
function normalizeIdDigits(v) {
|
||||||
const m = String(v ?? "").match(/\b(\d{1,11})\b/);
|
const m = String(v ?? "").match(/\b(\d{1,11})\b/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
|
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
|
||||||
function makeSyntheticSkuKey({ storeLabel, url }) {
|
function makeSyntheticSkuKey({ storeLabel, url }) {
|
||||||
const store = String(storeLabel || "store");
|
const store = String(storeLabel || "store");
|
||||||
const u = String(url || "");
|
const u = String(url || "");
|
||||||
if (!u) return "";
|
if (!u) return "";
|
||||||
return `u:${fnv1a32(`${store}|${u}`)}`;
|
return `u:${fnv1a32(`${store}|${u}`)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- NEW: SKU quality helpers ---------------- */
|
/* ---------------- NEW: SKU quality helpers ---------------- */
|
||||||
|
|
||||||
function skuQuality(v) {
|
function skuQuality(v) {
|
||||||
const s = String(v ?? "").trim();
|
const s = String(v ?? "").trim();
|
||||||
if (!s) return 0; // missing
|
if (!s) return 0; // missing
|
||||||
if (/^u:/i.test(s)) return 0; // synthetic
|
if (/^u:/i.test(s)) return 0; // synthetic
|
||||||
if (normalizeCspc(s)) return 3; // best (6-digit CSPC)
|
if (normalizeCspc(s)) return 3; // best (6-digit CSPC)
|
||||||
if (/^upc:/i.test(s)) return 2;
|
if (/^upc:/i.test(s)) return 2;
|
||||||
if (/^id:/i.test(s)) return 2;
|
if (/^id:/i.test(s)) return 2;
|
||||||
return 1; // explicit non-synthetic string
|
return 1; // explicit non-synthetic string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prefer higher quality; on ties keep existing (stable) value
|
// Prefer higher quality; on ties keep existing (stable) value
|
||||||
function pickBetterSku(newSku, oldSku) {
|
function pickBetterSku(newSku, oldSku) {
|
||||||
const a = String(newSku ?? "").trim();
|
const a = String(newSku ?? "").trim();
|
||||||
const b = String(oldSku ?? "").trim();
|
const b = String(oldSku ?? "").trim();
|
||||||
const qa = skuQuality(a);
|
const qa = skuQuality(a);
|
||||||
const qb = skuQuality(b);
|
const qb = skuQuality(b);
|
||||||
if (qa > qb) return a;
|
if (qa > qb) return a;
|
||||||
if (qb > qa) return b;
|
if (qb > qa) return b;
|
||||||
return b || a;
|
return b || a;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only fetch product pages when missing/synthetic
|
// Only fetch product pages when missing/synthetic
|
||||||
function needsSkuDetail(sku) {
|
function needsSkuDetail(sku) {
|
||||||
const s = String(sku ?? "").trim();
|
const s = String(sku ?? "").trim();
|
||||||
return !s || /^u:/i.test(s);
|
return !s || /^u:/i.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -79,34 +78,34 @@ function needsSkuDetail(sku) {
|
||||||
* - else => u:<fnv(store|url)> (old recipe)
|
* - else => u:<fnv(store|url)> (old recipe)
|
||||||
*/
|
*/
|
||||||
function normalizeSkuKey(v, { storeLabel, url } = {}) {
|
function normalizeSkuKey(v, { storeLabel, url } = {}) {
|
||||||
const raw = String(v ?? "").trim();
|
const raw = String(v ?? "").trim();
|
||||||
|
|
||||||
const cspc = normalizeCspc(raw);
|
const cspc = normalizeCspc(raw);
|
||||||
if (cspc) return cspc;
|
if (cspc) return cspc;
|
||||||
|
|
||||||
// NEW: only if explicitly tagged, so legacy behavior doesn't change
|
// NEW: only if explicitly tagged, so legacy behavior doesn't change
|
||||||
if (/^upc:/i.test(raw)) {
|
if (/^upc:/i.test(raw)) {
|
||||||
const upc = normalizeUpcDigits(raw);
|
const upc = normalizeUpcDigits(raw);
|
||||||
return upc ? `upc:${upc}` : "";
|
return upc ? `upc:${upc}` : "";
|
||||||
}
|
}
|
||||||
if (/^id:/i.test(raw)) {
|
if (/^id:/i.test(raw)) {
|
||||||
const id = normalizeIdDigits(raw);
|
const id = normalizeIdDigits(raw);
|
||||||
if (!id) return "";
|
if (!id) return "";
|
||||||
const cspc = idToCspc6(id);
|
const cspc = idToCspc6(id);
|
||||||
return cspc ? cspc : `id:${id}`;
|
return cspc ? cspc : `id:${id}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (raw.startsWith("u:")) return raw;
|
if (raw.startsWith("u:")) return raw;
|
||||||
|
|
||||||
const syn = makeSyntheticSkuKey({ storeLabel, url });
|
const syn = makeSyntheticSkuKey({ storeLabel, url });
|
||||||
return syn || "";
|
return syn || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
normalizeCspc,
|
normalizeCspc,
|
||||||
normalizeSkuKey,
|
normalizeSkuKey,
|
||||||
makeSyntheticSkuKey,
|
makeSyntheticSkuKey,
|
||||||
skuQuality,
|
skuQuality,
|
||||||
pickBetterSku,
|
pickBetterSku,
|
||||||
needsSkuDetail,
|
needsSkuDetail,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -6,203 +6,200 @@ const path = require("path");
|
||||||
/* ---------------- Union-Find (undirected grouping) ---------------- */
|
/* ---------------- Union-Find (undirected grouping) ---------------- */
|
||||||
|
|
||||||
class DSU {
|
class DSU {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.parent = new Map();
|
this.parent = new Map();
|
||||||
this.rank = new Map();
|
this.rank = new Map();
|
||||||
}
|
}
|
||||||
_add(x) {
|
_add(x) {
|
||||||
if (!this.parent.has(x)) {
|
if (!this.parent.has(x)) {
|
||||||
this.parent.set(x, x);
|
this.parent.set(x, x);
|
||||||
this.rank.set(x, 0);
|
this.rank.set(x, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
find(x) {
|
find(x) {
|
||||||
x = String(x || "").trim();
|
x = String(x || "").trim();
|
||||||
if (!x) return "";
|
if (!x) return "";
|
||||||
this._add(x);
|
this._add(x);
|
||||||
let p = this.parent.get(x);
|
let p = this.parent.get(x);
|
||||||
if (p !== x) {
|
if (p !== x) {
|
||||||
p = this.find(p);
|
p = this.find(p);
|
||||||
this.parent.set(x, p);
|
this.parent.set(x, p);
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
union(a, b) {
|
union(a, b) {
|
||||||
a = String(a || "").trim();
|
a = String(a || "").trim();
|
||||||
b = String(b || "").trim();
|
b = String(b || "").trim();
|
||||||
if (!a || !b || a === b) return;
|
if (!a || !b || a === b) return;
|
||||||
const ra = this.find(a);
|
const ra = this.find(a);
|
||||||
const rb = this.find(b);
|
const rb = this.find(b);
|
||||||
if (!ra || !rb || ra === rb) return;
|
if (!ra || !rb || ra === rb) return;
|
||||||
|
|
||||||
const rka = this.rank.get(ra) || 0;
|
const rka = this.rank.get(ra) || 0;
|
||||||
const rkb = this.rank.get(rb) || 0;
|
const rkb = this.rank.get(rb) || 0;
|
||||||
|
|
||||||
if (rka < rkb) this.parent.set(ra, rb);
|
if (rka < rkb) this.parent.set(ra, rb);
|
||||||
else if (rkb < rka) this.parent.set(rb, ra);
|
else if (rkb < rka) this.parent.set(rb, ra);
|
||||||
else {
|
else {
|
||||||
this.parent.set(rb, ra);
|
this.parent.set(rb, ra);
|
||||||
this.rank.set(ra, rka + 1);
|
this.rank.set(ra, rka + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isUnknownSkuKey(k) {
|
function isUnknownSkuKey(k) {
|
||||||
return String(k || "").startsWith("u:");
|
return String(k || "").startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isNumericSku(k) {
|
function isNumericSku(k) {
|
||||||
return /^\d+$/.test(String(k || "").trim());
|
return /^\d+$/.test(String(k || "").trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
function isUpcSku(k) {
|
function isUpcSku(k) {
|
||||||
const s = String(k || "").trim();
|
const s = String(k || "").trim();
|
||||||
if (s.startsWith("upc:")) return true;
|
if (s.startsWith("upc:")) return true;
|
||||||
return /^\d{12,14}$/.test(s); // keep legacy support
|
return /^\d{12,14}$/.test(s); // keep legacy support
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function compareSku(a, b) {
|
function compareSku(a, b) {
|
||||||
a = String(a || "").trim();
|
a = String(a || "").trim();
|
||||||
b = String(b || "").trim();
|
b = String(b || "").trim();
|
||||||
if (a === b) return 0;
|
if (a === b) return 0;
|
||||||
|
|
||||||
const au = isUnknownSkuKey(a);
|
const au = isUnknownSkuKey(a);
|
||||||
const bu = isUnknownSkuKey(b);
|
const bu = isUnknownSkuKey(b);
|
||||||
if (au !== bu) return au ? 1 : -1; // real first
|
if (au !== bu) return au ? 1 : -1; // real first
|
||||||
|
|
||||||
|
const aUpc = isUpcSku(a);
|
||||||
|
const bUpc = isUpcSku(b);
|
||||||
|
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys
|
||||||
|
|
||||||
const aUpc = isUpcSku(a);
|
const an = isNumericSku(a);
|
||||||
const bUpc = isUpcSku(b);
|
const bn = isNumericSku(b);
|
||||||
if (aUpc !== bUpc) return aUpc ? 1 : -1; // UPCs after other "real" keys
|
if (an && bn) {
|
||||||
|
const na = Number(a);
|
||||||
|
const nb = Number(b);
|
||||||
|
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a < b ? -1 : 1;
|
||||||
const an = isNumericSku(a);
|
|
||||||
const bn = isNumericSku(b);
|
|
||||||
if (an && bn) {
|
|
||||||
const na = Number(a);
|
|
||||||
const nb = Number(b);
|
|
||||||
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a < b ? -1 : 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- File discovery ---------------- */
|
/* ---------------- File discovery ---------------- */
|
||||||
|
|
||||||
function tryReadJson(file) {
|
function tryReadJson(file) {
|
||||||
try {
|
try {
|
||||||
const txt = fs.readFileSync(file, "utf8");
|
const txt = fs.readFileSync(file, "utf8");
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function defaultSkuLinksCandidates(dbDir) {
|
function defaultSkuLinksCandidates(dbDir) {
|
||||||
const out = [];
|
const out = [];
|
||||||
|
|
||||||
// 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db)
|
// 1) next to db dir: <dbDir>/../sku_links.json (common when dbDir is .../data/db)
|
||||||
if (dbDir) {
|
if (dbDir) {
|
||||||
out.push(path.join(dbDir, "..", "sku_links.json"));
|
out.push(path.join(dbDir, "..", "sku_links.json"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2) repo root conventional location
|
// 2) repo root conventional location
|
||||||
out.push(path.join(process.cwd(), "data", "sku_links.json"));
|
out.push(path.join(process.cwd(), "data", "sku_links.json"));
|
||||||
|
|
||||||
// 3) common worktree location
|
// 3) common worktree location
|
||||||
out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json"));
|
out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json"));
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function findSkuLinksFile({ dbDir, mappingFile } = {}) {
|
function findSkuLinksFile({ dbDir, mappingFile } = {}) {
|
||||||
// env override
|
// env override
|
||||||
const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim();
|
const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim();
|
||||||
if (env) return env;
|
if (env) return env;
|
||||||
|
|
||||||
if (mappingFile) return mappingFile;
|
if (mappingFile) return mappingFile;
|
||||||
|
|
||||||
for (const f of defaultSkuLinksCandidates(dbDir)) {
|
for (const f of defaultSkuLinksCandidates(dbDir)) {
|
||||||
if (!f) continue;
|
if (!f) continue;
|
||||||
try {
|
try {
|
||||||
if (fs.existsSync(f)) return f;
|
if (fs.existsSync(f)) return f;
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeImplicitSkuKey(k) {
|
function normalizeImplicitSkuKey(k) {
|
||||||
const s = String(k || "").trim();
|
const s = String(k || "").trim();
|
||||||
const m = s.match(/^id:(\d{1,6})$/i);
|
const m = s.match(/^id:(\d{1,6})$/i);
|
||||||
if (m) return String(m[1]).padStart(6, "0");
|
if (m) return String(m[1]).padStart(6, "0");
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Public API ---------------- */
|
/* ---------------- Public API ---------------- */
|
||||||
|
|
||||||
function buildSkuMapFromLinksArray(links) {
|
function buildSkuMapFromLinksArray(links) {
|
||||||
const dsu = new DSU();
|
const dsu = new DSU();
|
||||||
const all = new Set();
|
const all = new Set();
|
||||||
|
|
||||||
for (const x of Array.isArray(links) ? links : []) {
|
for (const x of Array.isArray(links) ? links : []) {
|
||||||
const a = normalizeImplicitSkuKey(x?.fromSku);
|
const a = normalizeImplicitSkuKey(x?.fromSku);
|
||||||
const b = normalizeImplicitSkuKey(x?.toSku);
|
const b = normalizeImplicitSkuKey(x?.toSku);
|
||||||
if (!a || !b) continue;
|
if (!a || !b) continue;
|
||||||
|
|
||||||
all.add(a);
|
all.add(a);
|
||||||
all.add(b);
|
all.add(b);
|
||||||
|
|
||||||
// undirected union => hardened vs A->B->C and cycles
|
// undirected union => hardened vs A->B->C and cycles
|
||||||
dsu.union(a, b);
|
dsu.union(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
// root -> Set(members)
|
// root -> Set(members)
|
||||||
const byRoot = new Map();
|
const byRoot = new Map();
|
||||||
for (const s of all) {
|
for (const s of all) {
|
||||||
const r = dsu.find(s);
|
const r = dsu.find(s);
|
||||||
if (!r) continue;
|
if (!r) continue;
|
||||||
let set = byRoot.get(r);
|
let set = byRoot.get(r);
|
||||||
if (!set) byRoot.set(r, (set = new Set()));
|
if (!set) byRoot.set(r, (set = new Set()));
|
||||||
set.add(s);
|
set.add(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// root -> canonical rep
|
// root -> canonical rep
|
||||||
const repByRoot = new Map();
|
const repByRoot = new Map();
|
||||||
for (const [root, members] of byRoot.entries()) {
|
for (const [root, members] of byRoot.entries()) {
|
||||||
const arr = Array.from(members);
|
const arr = Array.from(members);
|
||||||
arr.sort(compareSku);
|
arr.sort(compareSku);
|
||||||
repByRoot.set(root, arr[0] || root);
|
repByRoot.set(root, arr[0] || root);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sku -> canonical rep
|
// sku -> canonical rep
|
||||||
const canonBySku = new Map();
|
const canonBySku = new Map();
|
||||||
for (const [root, members] of byRoot.entries()) {
|
for (const [root, members] of byRoot.entries()) {
|
||||||
const rep = repByRoot.get(root) || root;
|
const rep = repByRoot.get(root) || root;
|
||||||
for (const s of members) canonBySku.set(s, rep);
|
for (const s of members) canonBySku.set(s, rep);
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalSku(sku) {
|
function canonicalSku(sku) {
|
||||||
const s = normalizeImplicitSkuKey(sku);
|
const s = normalizeImplicitSkuKey(sku);
|
||||||
if (!s) return s;
|
if (!s) return s;
|
||||||
return canonBySku.get(s) || s;
|
return canonBySku.get(s) || s;
|
||||||
}
|
}
|
||||||
|
|
||||||
return { canonicalSku, _canonBySku: canonBySku };
|
return { canonicalSku, _canonBySku: canonBySku };
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadSkuMap({ dbDir, mappingFile } = {}) {
|
function loadSkuMap({ dbDir, mappingFile } = {}) {
|
||||||
const file = findSkuLinksFile({ dbDir, mappingFile });
|
const file = findSkuLinksFile({ dbDir, mappingFile });
|
||||||
if (!file) {
|
if (!file) {
|
||||||
return buildSkuMapFromLinksArray([]);
|
return buildSkuMapFromLinksArray([]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const obj = tryReadJson(file);
|
const obj = tryReadJson(file);
|
||||||
const links = Array.isArray(obj?.links) ? obj.links : [];
|
const links = Array.isArray(obj?.links) ? obj.links : [];
|
||||||
return buildSkuMapFromLinksArray(links);
|
return buildSkuMapFromLinksArray(links);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { loadSkuMap };
|
module.exports = { loadSkuMap };
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,29 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function padRight(s, n) {
|
function padRight(s, n) {
|
||||||
s = String(s);
|
s = String(s);
|
||||||
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
function padLeft(s, n) {
|
function padLeft(s, n) {
|
||||||
s = String(s);
|
s = String(s);
|
||||||
return s.length >= n ? s : " ".repeat(n - s.length) + s;
|
return s.length >= n ? s : " ".repeat(n - s.length) + s;
|
||||||
}
|
}
|
||||||
|
|
||||||
function stripAnsi(s) {
|
function stripAnsi(s) {
|
||||||
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
|
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function padRightV(s, n) {
|
function padRightV(s, n) {
|
||||||
s = String(s);
|
s = String(s);
|
||||||
const w = stripAnsi(s).length;
|
const w = stripAnsi(s).length;
|
||||||
return w >= n ? s : s + " ".repeat(n - w);
|
return w >= n ? s : s + " ".repeat(n - w);
|
||||||
}
|
}
|
||||||
|
|
||||||
function padLeftV(s, n) {
|
function padLeftV(s, n) {
|
||||||
s = String(s);
|
s = String(s);
|
||||||
const w = stripAnsi(s).length;
|
const w = stripAnsi(s).length;
|
||||||
return w >= n ? s : " ".repeat(n - w) + s;
|
return w >= n ? s : " ".repeat(n - w) + s;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };
|
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,11 @@
|
||||||
const { cleanText, decodeHtml } = require("./html");
|
const { cleanText, decodeHtml } = require("./html");
|
||||||
|
|
||||||
function sanitizeName(s) {
|
function sanitizeName(s) {
|
||||||
return cleanText(decodeHtml(String(s || "")))
|
return cleanText(decodeHtml(String(s || "")))
|
||||||
.replace(/['"’“”`´]/g, "")
|
.replace(/['"’“”`´]/g, "")
|
||||||
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
|
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
|
||||||
.replace(/\s+/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { sanitizeName };
|
module.exports = { sanitizeName };
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,19 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function ts(d = new Date()) {
|
function ts(d = new Date()) {
|
||||||
const h = String(d.getHours()).padStart(2, "0");
|
const h = String(d.getHours()).padStart(2, "0");
|
||||||
const m = String(d.getMinutes()).padStart(2, "0");
|
const m = String(d.getMinutes()).padStart(2, "0");
|
||||||
const s = String(d.getSeconds()).padStart(2, "0");
|
const s = String(d.getSeconds()).padStart(2, "0");
|
||||||
const ms = String(d.getMilliseconds()).padStart(3, "0");
|
const ms = String(d.getMilliseconds()).padStart(3, "0");
|
||||||
return `${h}:${m}:${s}.${ms}`;
|
return `${h}:${m}:${s}.${ms}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isoTimestampFileSafe(d = new Date()) {
|
function isoTimestampFileSafe(d = new Date()) {
|
||||||
// 2026-01-16T21-27-01Z
|
// 2026-01-16T21-27-01Z
|
||||||
return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z");
|
return d
|
||||||
|
.toISOString()
|
||||||
|
.replace(/:/g, "-")
|
||||||
|
.replace(/\.\d{3}Z$/, "Z");
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { ts, isoTimestampFileSafe };
|
module.exports = { ts, isoTimestampFileSafe };
|
||||||
|
|
|
||||||
|
|
@ -1,50 +1,56 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
function normalizeBaseUrl(startUrl) {
|
function normalizeBaseUrl(startUrl) {
|
||||||
try {
|
try {
|
||||||
const u = new URL(startUrl);
|
const u = new URL(startUrl);
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
|
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
|
||||||
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
||||||
|
|
||||||
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
||||||
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
||||||
return u.toString();
|
return u.toString();
|
||||||
} catch {
|
} catch {
|
||||||
return startUrl;
|
return startUrl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePageUrl(baseUrl, pageNum) {
|
function makePageUrl(baseUrl, pageNum) {
|
||||||
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
|
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
|
||||||
const u = new URL(baseUrl);
|
const u = new URL(baseUrl);
|
||||||
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
||||||
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
||||||
u.pathname = u.pathname + `page/${pageNum}/`;
|
u.pathname = u.pathname + `page/${pageNum}/`;
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePageUrlForCtx(ctx, baseUrl, pageNum) {
|
function makePageUrlForCtx(ctx, baseUrl, pageNum) {
|
||||||
const fn = ctx?.store?.makePageUrl;
|
const fn = ctx?.store?.makePageUrl;
|
||||||
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
|
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
|
function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
|
||||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
if (pageNum <= 1) u.searchParams.set(paramName, "1");
|
if (pageNum <= 1) u.searchParams.set(paramName, "1");
|
||||||
else u.searchParams.set(paramName, String(pageNum));
|
else u.searchParams.set(paramName, String(pageNum));
|
||||||
u.search = `?${u.searchParams.toString()}`;
|
u.search = `?${u.searchParams.toString()}`;
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
|
function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
|
||||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||||
u.hash = "";
|
u.hash = "";
|
||||||
u.searchParams.set("page", String(Math.max(1, pageNum)));
|
u.searchParams.set("page", String(Math.max(1, pageNum)));
|
||||||
u.search = `?${u.searchParams.toString()}`;
|
u.search = `?${u.searchParams.toString()}`;
|
||||||
return u.toString();
|
return u.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage };
|
module.exports = {
|
||||||
|
normalizeBaseUrl,
|
||||||
|
makePageUrl,
|
||||||
|
makePageUrlForCtx,
|
||||||
|
makePageUrlQueryParam,
|
||||||
|
makePageUrlShopifyQueryPage,
|
||||||
|
};
|
||||||
|
|
|
||||||
|
|
@ -8,50 +8,50 @@ const { decodeHtml, stripTags, cleanText } = require("./html");
|
||||||
* - Else uses the normal price bdi/span content.
|
* - Else uses the normal price bdi/span content.
|
||||||
*/
|
*/
|
||||||
function extractPriceFromTmbBlock(block) {
|
function extractPriceFromTmbBlock(block) {
|
||||||
const span = matchFirstPriceSpan(block);
|
const span = matchFirstPriceSpan(block);
|
||||||
if (!span) return "";
|
if (!span) return "";
|
||||||
|
|
||||||
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
|
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
|
||||||
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
|
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
|
||||||
|
|
||||||
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
|
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
|
||||||
if (bdis.length) {
|
if (bdis.length) {
|
||||||
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
|
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
|
||||||
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
|
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
|
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
|
||||||
const text = cleanText(decodeHtml(stripTags(scope)));
|
const text = cleanText(decodeHtml(stripTags(scope)));
|
||||||
const num = text.match(/(\d+(?:\.\d{2})?)/);
|
const num = text.match(/(\d+(?:\.\d{2})?)/);
|
||||||
if (sym && num) return `${sym[1].trim()}${num[1]}`;
|
if (sym && num) return `${sym[1].trim()}${num[1]}`;
|
||||||
|
|
||||||
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
|
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
|
||||||
return m ? m[0].replace(/\s+/g, "") : "";
|
return m ? m[0].replace(/\s+/g, "") : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function matchFirstPriceSpan(html) {
|
function matchFirstPriceSpan(html) {
|
||||||
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
|
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
|
||||||
const m = re.exec(html);
|
const m = re.exec(html);
|
||||||
if (!m) return "";
|
if (!m) return "";
|
||||||
const start = m.index + m[0].length;
|
const start = m.index + m[0].length;
|
||||||
|
|
||||||
let i = start;
|
let i = start;
|
||||||
let depth = 1;
|
let depth = 1;
|
||||||
while (i < html.length) {
|
while (i < html.length) {
|
||||||
const nextOpen = html.indexOf("<span", i);
|
const nextOpen = html.indexOf("<span", i);
|
||||||
const nextClose = html.indexOf("</span>", i);
|
const nextClose = html.indexOf("</span>", i);
|
||||||
if (nextClose === -1) break;
|
if (nextClose === -1) break;
|
||||||
|
|
||||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||||
depth++;
|
depth++;
|
||||||
i = nextOpen + 5;
|
i = nextOpen + 5;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
depth--;
|
depth--;
|
||||||
if (depth === 0) return html.slice(start, nextClose);
|
if (depth === 0) return html.slice(start, nextClose);
|
||||||
i = nextClose + 7;
|
i = nextClose + 7;
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { extractPriceFromTmbBlock };
|
module.exports = { extractPriceFromTmbBlock };
|
||||||
|
|
|
||||||
|
|
@ -22,320 +22,310 @@ const path = require("path");
|
||||||
/* ---------------- helpers ---------------- */
|
/* ---------------- helpers ---------------- */
|
||||||
|
|
||||||
function ensureDir(dir) {
|
function ensureDir(dir) {
|
||||||
fs.mkdirSync(dir, { recursive: true });
|
fs.mkdirSync(dir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
function readJson(p) {
|
function readJson(p) {
|
||||||
try {
|
try {
|
||||||
return JSON.parse(fs.readFileSync(p, "utf8"));
|
return JSON.parse(fs.readFileSync(p, "utf8"));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function listDbFiles() {
|
function listDbFiles() {
|
||||||
const dir = path.join(process.cwd(), "data", "db");
|
const dir = path.join(process.cwd(), "data", "db");
|
||||||
try {
|
try {
|
||||||
return fs
|
return fs
|
||||||
.readdirSync(dir, { withFileTypes: true })
|
.readdirSync(dir, { withFileTypes: true })
|
||||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||||
.map((e) => path.join(dir, e.name));
|
.map((e) => path.join(dir, e.name));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function priceToNumber(v) {
|
function priceToNumber(v) {
|
||||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||||
const n = Number(s);
|
const n = Number(s);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasRealSku6(s) {
|
function hasRealSku6(s) {
|
||||||
return /\b\d{6}\b/.test(String(s || ""));
|
return /\b\d{6}\b/.test(String(s || ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
function isSyntheticSkuKey(k) {
|
function isSyntheticSkuKey(k) {
|
||||||
return String(k || "").startsWith("u:");
|
return String(k || "").startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
function storeKeyFromDbPath(abs) {
|
function storeKeyFromDbPath(abs) {
|
||||||
const base = path.basename(abs);
|
const base = path.basename(abs);
|
||||||
const m = base.match(/^([^_]+)__.+\.json$/i);
|
const m = base.match(/^([^_]+)__.+\.json$/i);
|
||||||
const k = m ? m[1] : base.replace(/\.json$/i, "");
|
const k = m ? m[1] : base.replace(/\.json$/i, "");
|
||||||
return String(k || "").toLowerCase();
|
return String(k || "").toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- sku helpers ---------------- */
|
/* ---------------- sku helpers ---------------- */
|
||||||
|
|
||||||
function loadSkuMapOrNull() {
|
function loadSkuMapOrNull() {
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// eslint-disable-next-line node/no-missing-require
|
||||||
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
|
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
|
||||||
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
|
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
|
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// eslint-disable-next-line node/no-missing-require
|
||||||
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
|
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
|
||||||
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
|
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
|
||||||
return k ? String(k) : "";
|
return k ? String(k) : "";
|
||||||
} catch {
|
} catch {
|
||||||
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
|
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
|
||||||
if (m) return m[1];
|
if (m) return m[1];
|
||||||
if (url) return `u:${storeLabel}:${url}`;
|
if (url) return `u:${storeLabel}:${url}`;
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalize(k, skuMap) {
|
function canonicalize(k, skuMap) {
|
||||||
if (!k) return "";
|
if (!k) return "";
|
||||||
if (skuMap && typeof skuMap.canonicalSku === "function") {
|
if (skuMap && typeof skuMap.canonicalSku === "function") {
|
||||||
return String(skuMap.canonicalSku(k) || k);
|
return String(skuMap.canonicalSku(k) || k);
|
||||||
}
|
}
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- grouping ---------------- */
|
/* ---------------- grouping ---------------- */
|
||||||
|
|
||||||
const BC_STORE_KEYS = new Set([
|
const BC_STORE_KEYS = new Set(["gull", "strath", "bcl", "legacy", "legacyliquor", "tudor", "vessel", "vintage", "arc"]);
|
||||||
"gull",
|
|
||||||
"strath",
|
|
||||||
"bcl",
|
|
||||||
"legacy",
|
|
||||||
"legacyliquor",
|
|
||||||
"tudor",
|
|
||||||
"vessel",
|
|
||||||
"vintage",
|
|
||||||
"arc"
|
|
||||||
]);
|
|
||||||
|
|
||||||
function groupAllowsStore(group, storeKey) {
|
function groupAllowsStore(group, storeKey) {
|
||||||
const k = String(storeKey || "").toLowerCase();
|
const k = String(storeKey || "").toLowerCase();
|
||||||
if (group === "bc") return BC_STORE_KEYS.has(k);
|
if (group === "bc") return BC_STORE_KEYS.has(k);
|
||||||
if (group === "ab") return !BC_STORE_KEYS.has(k);
|
if (group === "ab") return !BC_STORE_KEYS.has(k);
|
||||||
return true; // all
|
return true; // all
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- args ---------------- */
|
/* ---------------- args ---------------- */
|
||||||
|
|
||||||
function parseArgs(argv) {
|
function parseArgs(argv) {
|
||||||
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
|
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
|
||||||
for (let i = 0; i < argv.length; i++) {
|
for (let i = 0; i < argv.length; i++) {
|
||||||
const a = argv[i];
|
const a = argv[i];
|
||||||
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
|
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
|
||||||
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
|
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
|
||||||
else if (a === "--require-all") out.requireAll = true;
|
else if (a === "--require-all") out.requireAll = true;
|
||||||
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
|
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
|
||||||
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
|
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
|
||||||
}
|
}
|
||||||
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
|
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- main ---------------- */
|
/* ---------------- main ---------------- */
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const args = parseArgs(process.argv.slice(2));
|
const args = parseArgs(process.argv.slice(2));
|
||||||
const repoRoot = process.cwd();
|
const repoRoot = process.cwd();
|
||||||
const reportsDir = path.join(repoRoot, "reports");
|
const reportsDir = path.join(repoRoot, "reports");
|
||||||
ensureDir(reportsDir);
|
ensureDir(reportsDir);
|
||||||
|
|
||||||
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
|
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
|
||||||
ensureDir(path.dirname(outPath));
|
ensureDir(path.dirname(outPath));
|
||||||
|
|
||||||
const dbFiles = listDbFiles();
|
const dbFiles = listDbFiles();
|
||||||
if (!dbFiles.length) {
|
if (!dbFiles.length) {
|
||||||
console.error("No DB files found");
|
console.error("No DB files found");
|
||||||
process.exitCode = 2;
|
process.exitCode = 2;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const skuMap = loadSkuMapOrNull();
|
const skuMap = loadSkuMapOrNull();
|
||||||
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
|
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
|
||||||
console.log(`[debug] scanning ${dbFiles.length} db files`);
|
console.log(`[debug] scanning ${dbFiles.length} db files`);
|
||||||
|
|
||||||
const storeToCanon = new Map(); // storeKey -> Set(canonSku)
|
const storeToCanon = new Map(); // storeKey -> Set(canonSku)
|
||||||
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
|
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
|
||||||
|
|
||||||
let liveRows = 0;
|
let liveRows = 0;
|
||||||
let removedRows = 0;
|
let removedRows = 0;
|
||||||
|
|
||||||
for (const abs of dbFiles.sort()) {
|
for (const abs of dbFiles.sort()) {
|
||||||
const obj = readJson(abs);
|
const obj = readJson(abs);
|
||||||
if (!obj) continue;
|
if (!obj) continue;
|
||||||
|
|
||||||
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
|
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
|
||||||
if (!storeLabel) continue;
|
if (!storeLabel) continue;
|
||||||
|
|
||||||
const storeKey = storeKeyFromDbPath(abs);
|
const storeKey = storeKeyFromDbPath(abs);
|
||||||
if (!groupAllowsStore(args.group, storeKey)) continue;
|
if (!groupAllowsStore(args.group, storeKey)) continue;
|
||||||
|
|
||||||
if (!storeToCanon.has(storeKey)) {
|
if (!storeToCanon.has(storeKey)) {
|
||||||
storeToCanon.set(storeKey, new Set());
|
storeToCanon.set(storeKey, new Set());
|
||||||
}
|
}
|
||||||
|
|
||||||
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
|
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
|
||||||
const items = Array.isArray(obj.items) ? obj.items : [];
|
const items = Array.isArray(obj.items) ? obj.items : [];
|
||||||
|
|
||||||
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
|
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
|
||||||
|
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
if (it.removed) {
|
if (it.removed) {
|
||||||
removedRows++;
|
removedRows++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
liveRows++;
|
liveRows++;
|
||||||
|
|
||||||
const skuKey = normalizeSkuKeyOrEmpty({
|
const skuKey = normalizeSkuKeyOrEmpty({
|
||||||
skuRaw: it.sku,
|
skuRaw: it.sku,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
url: it.url,
|
url: it.url,
|
||||||
});
|
});
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const canonSku = canonicalize(skuKey, skuMap);
|
const canonSku = canonicalize(skuKey, skuMap);
|
||||||
if (!canonSku) continue;
|
if (!canonSku) continue;
|
||||||
|
|
||||||
storeToCanon.get(storeKey).add(canonSku);
|
storeToCanon.get(storeKey).add(canonSku);
|
||||||
|
|
||||||
let agg = canonAgg.get(canonSku);
|
let agg = canonAgg.get(canonSku);
|
||||||
if (!agg) {
|
if (!agg) {
|
||||||
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
|
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
|
||||||
canonAgg.set(canonSku, agg);
|
canonAgg.set(canonSku, agg);
|
||||||
}
|
}
|
||||||
|
|
||||||
agg.stores.add(storeKey);
|
agg.stores.add(storeKey);
|
||||||
|
|
||||||
const priceNum = priceToNumber(it.price);
|
const priceNum = priceToNumber(it.price);
|
||||||
if (priceNum !== null) {
|
if (priceNum !== null) {
|
||||||
const prev = agg.storeMin.get(storeKey);
|
const prev = agg.storeMin.get(storeKey);
|
||||||
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
|
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
const listing = {
|
const listing = {
|
||||||
canonSku,
|
canonSku,
|
||||||
skuKey,
|
skuKey,
|
||||||
skuRaw: String(it.sku || ""),
|
skuRaw: String(it.sku || ""),
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
priceNum,
|
priceNum,
|
||||||
url: String(it.url || ""),
|
url: String(it.url || ""),
|
||||||
storeKey,
|
storeKey,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel: String(obj.categoryLabel || obj.category || ""),
|
categoryLabel: String(obj.categoryLabel || obj.category || ""),
|
||||||
dbFile: rel,
|
dbFile: rel,
|
||||||
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
|
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
|
||||||
};
|
};
|
||||||
|
|
||||||
agg.listings.push(listing);
|
agg.listings.push(listing);
|
||||||
|
|
||||||
if (priceNum !== null) {
|
if (priceNum !== null) {
|
||||||
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
|
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
|
||||||
agg.cheapest = { priceNum, item: listing };
|
agg.cheapest = { priceNum, item: listing };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const stores = [...storeToCanon.keys()].sort();
|
const stores = [...storeToCanon.keys()].sort();
|
||||||
const storeCount = stores.length;
|
const storeCount = stores.length;
|
||||||
|
|
||||||
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
|
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
|
||||||
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
|
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
|
||||||
|
|
||||||
function pickRepresentative(agg) {
|
function pickRepresentative(agg) {
|
||||||
const preferred = agg.listings
|
const preferred = agg.listings
|
||||||
.filter((l) => l.hasRealSku6)
|
.filter((l) => l.hasRealSku6)
|
||||||
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
|
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
|
||||||
|
|
||||||
if (preferred.length) return preferred[0];
|
if (preferred.length) return preferred[0];
|
||||||
if (agg.cheapest) return agg.cheapest.item;
|
if (agg.cheapest) return agg.cheapest.item;
|
||||||
return agg.listings[0] || null;
|
return agg.listings[0] || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const rows = [];
|
const rows = [];
|
||||||
|
|
||||||
for (const [canonSku, agg] of canonAgg.entries()) {
|
for (const [canonSku, agg] of canonAgg.entries()) {
|
||||||
const rep = pickRepresentative(agg);
|
const rep = pickRepresentative(agg);
|
||||||
const missingStores = stores.filter((s) => !agg.stores.has(s));
|
const missingStores = stores.filter((s) => !agg.stores.has(s));
|
||||||
|
|
||||||
const storePrices = {};
|
const storePrices = {};
|
||||||
for (const s of stores) {
|
for (const s of stores) {
|
||||||
const p = agg.storeMin.get(s);
|
const p = agg.storeMin.get(s);
|
||||||
if (Number.isFinite(p)) storePrices[s] = p;
|
if (Number.isFinite(p)) storePrices[s] = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
rows.push({
|
rows.push({
|
||||||
canonSku,
|
canonSku,
|
||||||
storeCount: agg.stores.size,
|
storeCount: agg.stores.size,
|
||||||
stores: [...agg.stores].sort(),
|
stores: [...agg.stores].sort(),
|
||||||
missingStores,
|
missingStores,
|
||||||
storePrices, // { [storeKey]: number } min live price per store
|
storePrices, // { [storeKey]: number } min live price per store
|
||||||
representative: rep
|
representative: rep
|
||||||
? {
|
? {
|
||||||
name: rep.name,
|
name: rep.name,
|
||||||
price: rep.price,
|
price: rep.price,
|
||||||
priceNum: rep.priceNum,
|
priceNum: rep.priceNum,
|
||||||
storeKey: rep.storeKey,
|
storeKey: rep.storeKey,
|
||||||
storeLabel: rep.storeLabel,
|
storeLabel: rep.storeLabel,
|
||||||
skuRaw: rep.skuRaw,
|
skuRaw: rep.skuRaw,
|
||||||
skuKey: rep.skuKey,
|
skuKey: rep.skuKey,
|
||||||
url: rep.url,
|
url: rep.url,
|
||||||
categoryLabel: rep.categoryLabel,
|
categoryLabel: rep.categoryLabel,
|
||||||
dbFile: rep.dbFile,
|
dbFile: rep.dbFile,
|
||||||
}
|
}
|
||||||
: null,
|
: null,
|
||||||
cheapest: agg.cheapest
|
cheapest: agg.cheapest
|
||||||
? {
|
? {
|
||||||
price: agg.cheapest.item.price,
|
price: agg.cheapest.item.price,
|
||||||
priceNum: agg.cheapest.priceNum,
|
priceNum: agg.cheapest.priceNum,
|
||||||
storeKey: agg.cheapest.item.storeKey,
|
storeKey: agg.cheapest.item.storeKey,
|
||||||
url: agg.cheapest.item.url,
|
url: agg.cheapest.item.url,
|
||||||
}
|
}
|
||||||
: null,
|
: null,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
|
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
|
||||||
rows.sort((a, b) => {
|
rows.sort((a, b) => {
|
||||||
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
|
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
|
||||||
return String(a.canonSku).localeCompare(String(b.canonSku));
|
return String(a.canonSku).localeCompare(String(b.canonSku));
|
||||||
});
|
});
|
||||||
|
|
||||||
const filtered = args.requireAll
|
const filtered = args.requireAll
|
||||||
? rows.filter((r) => r.storeCount === storeCount)
|
? rows.filter((r) => r.storeCount === storeCount)
|
||||||
: rows.filter((r) => r.storeCount >= args.minStores);
|
: rows.filter((r) => r.storeCount >= args.minStores);
|
||||||
|
|
||||||
const top = filtered.slice(0, args.top);
|
const top = filtered.slice(0, args.top);
|
||||||
|
|
||||||
const payload = {
|
const payload = {
|
||||||
generatedAt: new Date().toISOString(),
|
generatedAt: new Date().toISOString(),
|
||||||
args: {
|
args: {
|
||||||
top: args.top,
|
top: args.top,
|
||||||
minStores: args.minStores,
|
minStores: args.minStores,
|
||||||
requireAll: args.requireAll,
|
requireAll: args.requireAll,
|
||||||
group: args.group,
|
group: args.group,
|
||||||
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
|
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
|
||||||
},
|
},
|
||||||
storeCount,
|
storeCount,
|
||||||
stores,
|
stores,
|
||||||
totals: {
|
totals: {
|
||||||
liveRows,
|
liveRows,
|
||||||
removedRows,
|
removedRows,
|
||||||
canonSkus: canonAgg.size,
|
canonSkus: canonAgg.size,
|
||||||
outputCount: top.length,
|
outputCount: top.length,
|
||||||
},
|
},
|
||||||
rows: top,
|
rows: top,
|
||||||
};
|
};
|
||||||
|
|
||||||
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
||||||
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
|
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
|
||||||
|
|
@ -30,278 +30,278 @@ const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
function runGit(args) {
|
function runGit(args) {
|
||||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitShowJson(sha, filePath) {
|
function gitShowJson(sha, filePath) {
|
||||||
try {
|
try {
|
||||||
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
||||||
encoding: "utf8",
|
encoding: "utf8",
|
||||||
stdio: ["ignore", "pipe", "pipe"],
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
});
|
});
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitFileExistsAtSha(sha, filePath) {
|
function gitFileExistsAtSha(sha, filePath) {
|
||||||
if (!sha) return false;
|
if (!sha) return false;
|
||||||
try {
|
try {
|
||||||
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
|
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
|
||||||
stdio: ["ignore", "ignore", "ignore"],
|
stdio: ["ignore", "ignore", "ignore"],
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function readJson(filePath) {
|
function readJson(filePath) {
|
||||||
try {
|
try {
|
||||||
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function ensureDir(dir) {
|
function ensureDir(dir) {
|
||||||
fs.mkdirSync(dir, { recursive: true });
|
fs.mkdirSync(dir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
function priceToNumber(v) {
|
function priceToNumber(v) {
|
||||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||||
const n = Number(s);
|
const n = Number(s);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctOff(oldStr, newStr) {
|
function pctOff(oldStr, newStr) {
|
||||||
const a = priceToNumber(oldStr);
|
const a = priceToNumber(oldStr);
|
||||||
const b = priceToNumber(newStr);
|
const b = priceToNumber(newStr);
|
||||||
if (a === null || b === null) return null;
|
if (a === null || b === null) return null;
|
||||||
if (a <= 0) return null;
|
if (a <= 0) return null;
|
||||||
if (b >= a) return 0;
|
if (b >= a) return 0;
|
||||||
return Math.round(((a - b) / a) * 100);
|
return Math.round(((a - b) / a) * 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
function htmlEscape(s) {
|
function htmlEscape(s) {
|
||||||
return String(s ?? "")
|
return String(s ?? "")
|
||||||
.replace(/&/g, "&")
|
.replace(/&/g, "&")
|
||||||
.replace(/</g, "<")
|
.replace(/</g, "<")
|
||||||
.replace(/>/g, ">")
|
.replace(/>/g, ">")
|
||||||
.replace(/"/g, """);
|
.replace(/"/g, """);
|
||||||
}
|
}
|
||||||
|
|
||||||
function normToken(s) {
|
function normToken(s) {
|
||||||
return String(s || "")
|
return String(s || "")
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.trim()
|
.trim()
|
||||||
.replace(/\s+/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
.replace(/[^\w:./-]+/g, "");
|
.replace(/[^\w:./-]+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
function getFirstParentSha(headSha) {
|
function getFirstParentSha(headSha) {
|
||||||
try {
|
try {
|
||||||
const out = runGit(["rev-list", "--parents", "-n", "1", headSha]);
|
const out = runGit(["rev-list", "--parents", "-n", "1", headSha]);
|
||||||
const parts = out.split(/\s+/).filter(Boolean);
|
const parts = out.split(/\s+/).filter(Boolean);
|
||||||
return parts.length >= 2 ? parts[1] : "";
|
return parts.length >= 2 ? parts[1] : "";
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function listChangedDbFiles(fromSha, toSha) {
|
function listChangedDbFiles(fromSha, toSha) {
|
||||||
try {
|
try {
|
||||||
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
|
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
|
||||||
return out
|
return out
|
||||||
.split(/\r?\n/)
|
.split(/\r?\n/)
|
||||||
.map((s) => s.trim())
|
.map((s) => s.trim())
|
||||||
.filter((s) => s && s.endsWith(".json"));
|
.filter((s) => s && s.endsWith(".json"));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function listDbFilesOnDisk() {
|
function listDbFilesOnDisk() {
|
||||||
const dir = path.join(process.cwd(), "data", "db");
|
const dir = path.join(process.cwd(), "data", "db");
|
||||||
try {
|
try {
|
||||||
return fs
|
return fs
|
||||||
.readdirSync(dir, { withFileTypes: true })
|
.readdirSync(dir, { withFileTypes: true })
|
||||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||||
.map((e) => path.posix.join("data/db", e.name));
|
.map((e) => path.posix.join("data/db", e.name));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We reuse your existing canonical SKU mapping logic.
|
// We reuse your existing canonical SKU mapping logic.
|
||||||
function loadSkuMapOrNull() {
|
function loadSkuMapOrNull() {
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// eslint-disable-next-line node/no-missing-require
|
||||||
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
|
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
|
||||||
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
|
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
|
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// eslint-disable-next-line node/no-missing-require
|
||||||
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku"));
|
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku"));
|
||||||
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
|
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
|
||||||
return k ? String(k) : "";
|
return k ? String(k) : "";
|
||||||
} catch {
|
} catch {
|
||||||
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
|
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
|
||||||
if (m) return m[1];
|
if (m) return m[1];
|
||||||
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
|
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalize(skuKey, skuMap) {
|
function canonicalize(skuKey, skuMap) {
|
||||||
if (!skuKey) return "";
|
if (!skuKey) return "";
|
||||||
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey);
|
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey);
|
||||||
return skuKey;
|
return skuKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
function mapDbItems(obj, skuMap, { includeRemoved }) {
|
function mapDbItems(obj, skuMap, { includeRemoved }) {
|
||||||
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
||||||
const categoryLabel = String(obj?.categoryLabel || obj?.category || "");
|
const categoryLabel = String(obj?.categoryLabel || obj?.category || "");
|
||||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||||
|
|
||||||
const m = new Map(); // canonSku -> item (for this store+category db)
|
const m = new Map(); // canonSku -> item (for this store+category db)
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
const removed = Boolean(it.removed);
|
const removed = Boolean(it.removed);
|
||||||
if (!includeRemoved && removed) continue;
|
if (!includeRemoved && removed) continue;
|
||||||
|
|
||||||
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url });
|
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw: it.sku, storeLabel, url: it.url });
|
||||||
const canon = canonicalize(skuKey, skuMap);
|
const canon = canonicalize(skuKey, skuMap);
|
||||||
if (!canon) continue;
|
if (!canon) continue;
|
||||||
|
|
||||||
m.set(canon, {
|
m.set(canon, {
|
||||||
canonSku: canon,
|
canonSku: canon,
|
||||||
skuRaw: String(it.sku || ""),
|
skuRaw: String(it.sku || ""),
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
url: String(it.url || ""),
|
url: String(it.url || ""),
|
||||||
img: String(it.img || it.image || it.thumb || ""),
|
img: String(it.img || it.image || it.thumb || ""),
|
||||||
removed,
|
removed,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
function diffDb(prevObj, nextObj, skuMap) {
|
function diffDb(prevObj, nextObj, skuMap) {
|
||||||
const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true });
|
const prevAll = mapDbItems(prevObj, skuMap, { includeRemoved: true });
|
||||||
const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true });
|
const nextAll = mapDbItems(nextObj, skuMap, { includeRemoved: true });
|
||||||
const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false });
|
const prevLive = mapDbItems(prevObj, skuMap, { includeRemoved: false });
|
||||||
const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false });
|
const nextLive = mapDbItems(nextObj, skuMap, { includeRemoved: false });
|
||||||
|
|
||||||
const newItems = [];
|
const newItems = [];
|
||||||
const priceDown = [];
|
const priceDown = [];
|
||||||
|
|
||||||
for (const [canon, now] of nextLive.entries()) {
|
for (const [canon, now] of nextLive.entries()) {
|
||||||
const had = prevAll.get(canon);
|
const had = prevAll.get(canon);
|
||||||
if (!had) {
|
if (!had) {
|
||||||
newItems.push(now);
|
newItems.push(now);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const [canon, now] of nextLive.entries()) {
|
for (const [canon, now] of nextLive.entries()) {
|
||||||
const was = prevLive.get(canon);
|
const was = prevLive.get(canon);
|
||||||
if (!was) continue;
|
if (!was) continue;
|
||||||
|
|
||||||
const a = String(was.price || "");
|
const a = String(was.price || "");
|
||||||
const b = String(now.price || "");
|
const b = String(now.price || "");
|
||||||
if (a === b) continue;
|
if (a === b) continue;
|
||||||
|
|
||||||
const aN = priceToNumber(a);
|
const aN = priceToNumber(a);
|
||||||
const bN = priceToNumber(b);
|
const bN = priceToNumber(b);
|
||||||
if (aN === null || bN === null) continue;
|
if (aN === null || bN === null) continue;
|
||||||
if (bN >= aN) continue;
|
if (bN >= aN) continue;
|
||||||
|
|
||||||
priceDown.push({
|
priceDown.push({
|
||||||
...now,
|
...now,
|
||||||
oldPrice: a,
|
oldPrice: a,
|
||||||
newPrice: b,
|
newPrice: b,
|
||||||
pct: pctOff(a, b),
|
pct: pctOff(a, b),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return { newItems, priceDown };
|
return { newItems, priceDown };
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildCurrentIndexes(skuMap) {
|
function buildCurrentIndexes(skuMap) {
|
||||||
const files = listDbFilesOnDisk();
|
const files = listDbFilesOnDisk();
|
||||||
const availability = new Map(); // canonSku -> Set(storeLabel)
|
const availability = new Map(); // canonSku -> Set(storeLabel)
|
||||||
const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} }
|
const cheapest = new Map(); // canonSku -> { priceNum, stores:Set, example:{name,url,img,categoryLabel} }
|
||||||
const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item)
|
const byStoreCanon = new Map(); // storeLabel -> Map(canonSku -> item)
|
||||||
|
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
const obj = readJson(file);
|
const obj = readJson(file);
|
||||||
if (!obj) continue;
|
if (!obj) continue;
|
||||||
const storeLabel = String(obj.storeLabel || obj.store || "");
|
const storeLabel = String(obj.storeLabel || obj.store || "");
|
||||||
if (!storeLabel) continue;
|
if (!storeLabel) continue;
|
||||||
|
|
||||||
const live = mapDbItems(obj, skuMap, { includeRemoved: false });
|
const live = mapDbItems(obj, skuMap, { includeRemoved: false });
|
||||||
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
|
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
|
||||||
|
|
||||||
for (const it of live.values()) {
|
for (const it of live.values()) {
|
||||||
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
|
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
|
||||||
availability.get(it.canonSku).add(storeLabel);
|
availability.get(it.canonSku).add(storeLabel);
|
||||||
|
|
||||||
byStoreCanon.get(storeLabel).set(it.canonSku, it);
|
byStoreCanon.get(storeLabel).set(it.canonSku, it);
|
||||||
|
|
||||||
const p = priceToNumber(it.price);
|
const p = priceToNumber(it.price);
|
||||||
if (p === null) continue;
|
if (p === null) continue;
|
||||||
|
|
||||||
const cur = cheapest.get(it.canonSku);
|
const cur = cheapest.get(it.canonSku);
|
||||||
if (!cur) {
|
if (!cur) {
|
||||||
cheapest.set(it.canonSku, {
|
cheapest.set(it.canonSku, {
|
||||||
priceNum: p,
|
priceNum: p,
|
||||||
stores: new Set([storeLabel]),
|
stores: new Set([storeLabel]),
|
||||||
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
|
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
|
||||||
});
|
});
|
||||||
} else if (p < cur.priceNum) {
|
} else if (p < cur.priceNum) {
|
||||||
cheapest.set(it.canonSku, {
|
cheapest.set(it.canonSku, {
|
||||||
priceNum: p,
|
priceNum: p,
|
||||||
stores: new Set([storeLabel]),
|
stores: new Set([storeLabel]),
|
||||||
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
|
example: { name: it.name, url: it.url, img: it.img, categoryLabel: it.categoryLabel },
|
||||||
});
|
});
|
||||||
} else if (p === cur.priceNum) {
|
} else if (p === cur.priceNum) {
|
||||||
cur.stores.add(storeLabel);
|
cur.stores.add(storeLabel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { availability, cheapest, byStoreCanon };
|
return { availability, cheapest, byStoreCanon };
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl }) {
|
function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl }) {
|
||||||
const now = new Date().toISOString();
|
const now = new Date().toISOString();
|
||||||
|
|
||||||
function section(titleText, rowsHtml) {
|
function section(titleText, rowsHtml) {
|
||||||
return `
|
return `
|
||||||
<div style="margin:16px 0 6px 0;font-weight:700;font-size:16px">${htmlEscape(titleText)}</div>
|
<div style="margin:16px 0 6px 0;font-weight:700;font-size:16px">${htmlEscape(titleText)}</div>
|
||||||
${rowsHtml || `<div style="color:#666">None</div>`}
|
${rowsHtml || `<div style="color:#666">None</div>`}
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function card(it, extraHtml) {
|
function card(it, extraHtml) {
|
||||||
const img = it.img
|
const img = it.img
|
||||||
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
|
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
|
||||||
: "";
|
: "";
|
||||||
const name = htmlEscape(it.name || "");
|
const name = htmlEscape(it.name || "");
|
||||||
const store = htmlEscape(it.storeLabel || "");
|
const store = htmlEscape(it.storeLabel || "");
|
||||||
const cat = htmlEscape(it.categoryLabel || "");
|
const cat = htmlEscape(it.categoryLabel || "");
|
||||||
const price = htmlEscape(it.price || "");
|
const price = htmlEscape(it.price || "");
|
||||||
const url = htmlEscape(it.url || "");
|
const url = htmlEscape(it.url || "");
|
||||||
return `
|
return `
|
||||||
<table role="presentation" width="100%" cellpadding="0" cellspacing="0" style="border:1px solid #eee;border-radius:12px;margin:10px 0">
|
<table role="presentation" width="100%" cellpadding="0" cellspacing="0" style="border:1px solid #eee;border-radius:12px;margin:10px 0">
|
||||||
<tr>
|
<tr>
|
||||||
<td style="padding:12px;vertical-align:top;width:96px">${img || ""}</td>
|
<td style="padding:12px;vertical-align:top;width:96px">${img || ""}</td>
|
||||||
|
|
@ -315,15 +315,15 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniqueHtml = uniqueNews.map((it) => card(it)).join("");
|
const uniqueHtml = uniqueNews.map((it) => card(it)).join("");
|
||||||
const salesHtml = bigSales
|
const salesHtml = bigSales
|
||||||
.map((it) => {
|
.map((it) => {
|
||||||
const pct = Number.isFinite(it.pct) ? it.pct : null;
|
const pct = Number.isFinite(it.pct) ? it.pct : null;
|
||||||
const oldP = htmlEscape(it.oldPrice || "");
|
const oldP = htmlEscape(it.oldPrice || "");
|
||||||
const newP = htmlEscape(it.newPrice || "");
|
const newP = htmlEscape(it.newPrice || "");
|
||||||
const extra = `
|
const extra = `
|
||||||
<div style="margin-top:6px;font-size:13px">
|
<div style="margin-top:6px;font-size:13px">
|
||||||
<span style="color:#b00020;text-decoration:line-through">${oldP}</span>
|
<span style="color:#b00020;text-decoration:line-through">${oldP}</span>
|
||||||
<span style="margin:0 6px;color:#666">→</span>
|
<span style="margin:0 6px;color:#666">→</span>
|
||||||
|
|
@ -331,11 +331,11 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
|
||||||
${pct !== null ? `<span style="margin-left:8px;color:#137333;font-weight:700">(${pct}% off)</span>` : ""}
|
${pct !== null ? `<span style="margin-left:8px;color:#137333;font-weight:700">(${pct}% off)</span>` : ""}
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
return card({ ...it, price: "" }, extra);
|
return card({ ...it, price: "" }, extra);
|
||||||
})
|
})
|
||||||
.join("");
|
.join("");
|
||||||
|
|
||||||
const links = `
|
const links = `
|
||||||
<div style="margin-top:10px;font-size:12px;color:#666">
|
<div style="margin-top:10px;font-size:12px;color:#666">
|
||||||
${commitUrl ? `Commit: <a href="${htmlEscape(commitUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(commitUrl)}</a><br/>` : ""}
|
${commitUrl ? `Commit: <a href="${htmlEscape(commitUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(commitUrl)}</a><br/>` : ""}
|
||||||
${pagesUrl ? `Visualizer: <a href="${htmlEscape(pagesUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(pagesUrl)}</a>` : ""}
|
${pagesUrl ? `Visualizer: <a href="${htmlEscape(pagesUrl)}" style="color:#0b57d0;text-decoration:none">${htmlEscape(pagesUrl)}</a>` : ""}
|
||||||
|
|
@ -343,7 +343,7 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
return `<!doctype html>
|
return `<!doctype html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8" />
|
<meta charset="utf-8" />
|
||||||
|
|
@ -365,137 +365,137 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeGithubOutput(kv) {
|
function writeGithubOutput(kv) {
|
||||||
const outPath = process.env.GITHUB_OUTPUT;
|
const outPath = process.env.GITHUB_OUTPUT;
|
||||||
if (!outPath) return;
|
if (!outPath) return;
|
||||||
const lines = [];
|
const lines = [];
|
||||||
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
|
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
|
||||||
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
|
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const repoRoot = process.cwd();
|
const repoRoot = process.cwd();
|
||||||
const reportsDir = path.join(repoRoot, "reports");
|
const reportsDir = path.join(repoRoot, "reports");
|
||||||
ensureDir(reportsDir);
|
ensureDir(reportsDir);
|
||||||
|
|
||||||
const headSha = runGit(["rev-parse", "HEAD"]);
|
const headSha = runGit(["rev-parse", "HEAD"]);
|
||||||
const parentSha = getFirstParentSha(headSha);
|
const parentSha = getFirstParentSha(headSha);
|
||||||
if (!parentSha) {
|
if (!parentSha) {
|
||||||
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
|
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
|
||||||
writeGithubOutput({ should_send: 0 });
|
writeGithubOutput({ should_send: 0 });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const skuMap = loadSkuMapOrNull();
|
const skuMap = loadSkuMapOrNull();
|
||||||
|
|
||||||
const changed = listChangedDbFiles(parentSha, headSha);
|
const changed = listChangedDbFiles(parentSha, headSha);
|
||||||
if (!changed.length) {
|
if (!changed.length) {
|
||||||
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
|
fs.writeFileSync(path.join(reportsDir, "alert_should_send.txt"), "0\n", "utf8");
|
||||||
writeGithubOutput({ should_send: 0 });
|
writeGithubOutput({ should_send: 0 });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
|
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
|
||||||
|
|
||||||
const uniqueNews = [];
|
const uniqueNews = [];
|
||||||
const bigSales = [];
|
const bigSales = [];
|
||||||
|
|
||||||
for (const file of changed) {
|
for (const file of changed) {
|
||||||
const existedBefore = gitFileExistsAtSha(parentSha, file);
|
const existedBefore = gitFileExistsAtSha(parentSha, file);
|
||||||
const existsNow = gitFileExistsAtSha(headSha, file);
|
const existsNow = gitFileExistsAtSha(headSha, file);
|
||||||
|
|
||||||
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
|
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
|
||||||
if (!existedBefore && existsNow) {
|
if (!existedBefore && existsNow) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const prevObj = gitShowJson(parentSha, file);
|
const prevObj = gitShowJson(parentSha, file);
|
||||||
const nextObj = gitShowJson(headSha, file);
|
const nextObj = gitShowJson(headSha, file);
|
||||||
if (!prevObj && !nextObj) continue;
|
if (!prevObj && !nextObj) continue;
|
||||||
|
|
||||||
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
|
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
|
||||||
|
|
||||||
for (const it of newItems) {
|
for (const it of newItems) {
|
||||||
const stores = availability.get(it.canonSku);
|
const stores = availability.get(it.canonSku);
|
||||||
const storeCount = stores ? stores.size : 0;
|
const storeCount = stores ? stores.size : 0;
|
||||||
if (storeCount !== 1) continue;
|
if (storeCount !== 1) continue;
|
||||||
if (!stores.has(it.storeLabel)) continue;
|
if (!stores.has(it.storeLabel)) continue;
|
||||||
|
|
||||||
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
|
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
|
||||||
uniqueNews.push(cur);
|
uniqueNews.push(cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of priceDown) {
|
for (const it of priceDown) {
|
||||||
const pct = it.pct;
|
const pct = it.pct;
|
||||||
if (!Number.isFinite(pct) || pct < 20) continue;
|
if (!Number.isFinite(pct) || pct < 20) continue;
|
||||||
|
|
||||||
const best = cheapest.get(it.canonSku);
|
const best = cheapest.get(it.canonSku);
|
||||||
if (!best) continue;
|
if (!best) continue;
|
||||||
|
|
||||||
const newN = priceToNumber(it.newPrice);
|
const newN = priceToNumber(it.newPrice);
|
||||||
if (newN === null) continue;
|
if (newN === null) continue;
|
||||||
|
|
||||||
if (best.priceNum !== newN) continue;
|
if (best.priceNum !== newN) continue;
|
||||||
if (!best.stores.has(it.storeLabel)) continue;
|
if (!best.stores.has(it.storeLabel)) continue;
|
||||||
|
|
||||||
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
|
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
|
||||||
|
|
||||||
bigSales.push({
|
bigSales.push({
|
||||||
...cur,
|
...cur,
|
||||||
oldPrice: it.oldPrice,
|
oldPrice: it.oldPrice,
|
||||||
newPrice: it.newPrice,
|
newPrice: it.newPrice,
|
||||||
pct,
|
pct,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function dedupe(arr) {
|
function dedupe(arr) {
|
||||||
const out = [];
|
const out = [];
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
const k = `${it.canonSku}|${it.storeLabel}`;
|
const k = `${it.canonSku}|${it.storeLabel}`;
|
||||||
if (seen.has(k)) continue;
|
if (seen.has(k)) continue;
|
||||||
seen.add(k);
|
seen.add(k);
|
||||||
out.push(it);
|
out.push(it);
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || ""));
|
const uniqueFinal = dedupe(uniqueNews).sort((a, b) => (a.name || "").localeCompare(b.name || ""));
|
||||||
const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0));
|
const salesFinal = dedupe(bigSales).sort((a, b) => (b.pct || 0) - (a.pct || 0));
|
||||||
|
|
||||||
const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0;
|
const shouldSend = uniqueFinal.length > 0 || salesFinal.length > 0;
|
||||||
|
|
||||||
const subject = shouldSend
|
const subject = shouldSend
|
||||||
? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales`
|
? `Spirit Tracker: ${uniqueFinal.length} unique new · ${salesFinal.length} big sales`
|
||||||
: `Spirit Tracker: (no alert)`;
|
: `Spirit Tracker: (no alert)`;
|
||||||
|
|
||||||
const ghRepo = process.env.GITHUB_REPOSITORY || "";
|
const ghRepo = process.env.GITHUB_REPOSITORY || "";
|
||||||
const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com";
|
const ghUrl = process.env.GITHUB_SERVER_URL || "https://github.com";
|
||||||
const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : "";
|
const commitUrl = ghRepo ? `${ghUrl}/${ghRepo}/commit/${headSha}` : "";
|
||||||
const pagesUrl = process.env.PAGES_URL || "";
|
const pagesUrl = process.env.PAGES_URL || "";
|
||||||
|
|
||||||
const html = renderHtml({
|
const html = renderHtml({
|
||||||
title: "Spirit Tracker Alert",
|
title: "Spirit Tracker Alert",
|
||||||
subtitle: subject,
|
subtitle: subject,
|
||||||
uniqueNews: uniqueFinal,
|
uniqueNews: uniqueFinal,
|
||||||
bigSales: salesFinal,
|
bigSales: salesFinal,
|
||||||
commitUrl,
|
commitUrl,
|
||||||
pagesUrl,
|
pagesUrl,
|
||||||
});
|
});
|
||||||
|
|
||||||
const htmlPath = path.join(reportsDir, "alert.html");
|
const htmlPath = path.join(reportsDir, "alert.html");
|
||||||
const subjPath = path.join(reportsDir, "alert_subject.txt");
|
const subjPath = path.join(reportsDir, "alert_subject.txt");
|
||||||
const sendPath = path.join(reportsDir, "alert_should_send.txt");
|
const sendPath = path.join(reportsDir, "alert_should_send.txt");
|
||||||
|
|
||||||
fs.writeFileSync(htmlPath, html, "utf8");
|
fs.writeFileSync(htmlPath, html, "utf8");
|
||||||
fs.writeFileSync(subjPath, subject + "\n", "utf8");
|
fs.writeFileSync(subjPath, subject + "\n", "utf8");
|
||||||
fs.writeFileSync(sendPath, (shouldSend ? "1\n" : "0\n"), "utf8");
|
fs.writeFileSync(sendPath, shouldSend ? "1\n" : "0\n", "utf8");
|
||||||
|
|
||||||
writeGithubOutput({
|
writeGithubOutput({
|
||||||
should_send: shouldSend ? 1 : 0,
|
should_send: shouldSend ? 1 : 0,
|
||||||
subject,
|
subject,
|
||||||
html_path: htmlPath,
|
html_path: htmlPath,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
|
||||||
|
|
@ -6,134 +6,134 @@ const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
function runGit(args) {
|
function runGit(args) {
|
||||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
function listDbFiles(dbDir) {
|
function listDbFiles(dbDir) {
|
||||||
try {
|
try {
|
||||||
return fs
|
return fs
|
||||||
.readdirSync(dbDir, { withFileTypes: true })
|
.readdirSync(dbDir, { withFileTypes: true })
|
||||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||||
.map((e) => path.join(dbDir, e.name));
|
.map((e) => path.join(dbDir, e.name));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function listCommonListingReportFiles(reportsDir) {
|
function listCommonListingReportFiles(reportsDir) {
|
||||||
try {
|
try {
|
||||||
return fs
|
return fs
|
||||||
.readdirSync(reportsDir, { withFileTypes: true })
|
.readdirSync(reportsDir, { withFileTypes: true })
|
||||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||||
.map((e) => e.name)
|
.map((e) => e.name)
|
||||||
.filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name))
|
.filter((name) => /^common_listings_.*_top\d+\.json$/i.test(name))
|
||||||
.map((name) => path.join(reportsDir, name));
|
.map((name) => path.join(reportsDir, name));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function dateOnly(iso) {
|
function dateOnly(iso) {
|
||||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildCommitPayloadForFiles({ repoRoot, relFiles, maxRawPerFile, maxDaysPerFile }) {
|
function buildCommitPayloadForFiles({ repoRoot, relFiles, maxRawPerFile, maxDaysPerFile }) {
|
||||||
const payload = {
|
const payload = {
|
||||||
generatedAt: new Date().toISOString(),
|
generatedAt: new Date().toISOString(),
|
||||||
branch: "data",
|
branch: "data",
|
||||||
files: {},
|
files: {},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const rel of relFiles.sort()) {
|
for (const rel of relFiles.sort()) {
|
||||||
let txt = "";
|
let txt = "";
|
||||||
try {
|
try {
|
||||||
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
|
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
|
||||||
txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]);
|
txt = runGit(["log", "--format=%H %cI", `-${maxRawPerFile}`, "--", rel]);
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const lines = txt
|
const lines = txt
|
||||||
.split(/\r?\n/)
|
.split(/\r?\n/)
|
||||||
.map((s) => s.trim())
|
.map((s) => s.trim())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
// git log is newest -> oldest.
|
// git log is newest -> oldest.
|
||||||
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
|
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
|
||||||
const byDate = new Map(); // date -> { sha, date, ts }
|
const byDate = new Map(); // date -> { sha, date, ts }
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
||||||
if (!m) continue;
|
if (!m) continue;
|
||||||
|
|
||||||
const sha = m[1];
|
const sha = m[1];
|
||||||
const ts = m[2];
|
const ts = m[2];
|
||||||
const d = dateOnly(ts);
|
const d = dateOnly(ts);
|
||||||
if (!d) continue;
|
if (!d) continue;
|
||||||
|
|
||||||
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
|
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to oldest -> newest
|
// Convert to oldest -> newest
|
||||||
let arr = [...byDate.values()].reverse();
|
let arr = [...byDate.values()].reverse();
|
||||||
|
|
||||||
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
|
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
|
||||||
if (arr.length > maxDaysPerFile) {
|
if (arr.length > maxDaysPerFile) {
|
||||||
arr = arr.slice(arr.length - maxDaysPerFile);
|
arr = arr.slice(arr.length - maxDaysPerFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
payload.files[rel] = arr;
|
payload.files[rel] = arr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const repoRoot = process.cwd();
|
const repoRoot = process.cwd();
|
||||||
const dbDir = path.join(repoRoot, "data", "db");
|
const dbDir = path.join(repoRoot, "data", "db");
|
||||||
const reportsDir = path.join(repoRoot, "reports");
|
const reportsDir = path.join(repoRoot, "reports");
|
||||||
const outDir = path.join(repoRoot, "viz", "data");
|
const outDir = path.join(repoRoot, "viz", "data");
|
||||||
|
|
||||||
fs.mkdirSync(outDir, { recursive: true });
|
fs.mkdirSync(outDir, { recursive: true });
|
||||||
|
|
||||||
// ---- Existing output (UNCHANGED): db_commits.json ----
|
// ---- Existing output (UNCHANGED): db_commits.json ----
|
||||||
const outFileDb = path.join(outDir, "db_commits.json");
|
const outFileDb = path.join(outDir, "db_commits.json");
|
||||||
|
|
||||||
const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
|
const dbFiles = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
|
||||||
|
|
||||||
// We want the viz to show ONE point per day (the most recent run that day).
|
// We want the viz to show ONE point per day (the most recent run that day).
|
||||||
// So we collapse multiple commits per day down to the newest commit for that date.
|
// So we collapse multiple commits per day down to the newest commit for that date.
|
||||||
//
|
//
|
||||||
// With multiple runs/day, we also want to keep a long-ish daily history.
|
// With multiple runs/day, we also want to keep a long-ish daily history.
|
||||||
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
|
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
|
||||||
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
|
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
|
||||||
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
|
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
|
||||||
|
|
||||||
const payloadDb = buildCommitPayloadForFiles({
|
const payloadDb = buildCommitPayloadForFiles({
|
||||||
repoRoot,
|
repoRoot,
|
||||||
relFiles: dbFiles,
|
relFiles: dbFiles,
|
||||||
maxRawPerFile: MAX_RAW_PER_FILE,
|
maxRawPerFile: MAX_RAW_PER_FILE,
|
||||||
maxDaysPerFile: MAX_DAYS_PER_FILE,
|
maxDaysPerFile: MAX_DAYS_PER_FILE,
|
||||||
});
|
});
|
||||||
|
|
||||||
fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8");
|
fs.writeFileSync(outFileDb, JSON.stringify(payloadDb, null, 2) + "\n", "utf8");
|
||||||
process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`);
|
process.stdout.write(`Wrote ${outFileDb} (${Object.keys(payloadDb.files).length} files)\n`);
|
||||||
|
|
||||||
// ---- New output: common listings report commits ----
|
// ---- New output: common listings report commits ----
|
||||||
const outFileCommon = path.join(outDir, "common_listings_commits.json");
|
const outFileCommon = path.join(outDir, "common_listings_commits.json");
|
||||||
|
|
||||||
const reportFilesAbs = listCommonListingReportFiles(reportsDir);
|
const reportFilesAbs = listCommonListingReportFiles(reportsDir);
|
||||||
const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs)));
|
const reportFilesRel = reportFilesAbs.map((abs) => path.posix.join("reports", path.basename(abs)));
|
||||||
|
|
||||||
const payloadCommon = buildCommitPayloadForFiles({
|
const payloadCommon = buildCommitPayloadForFiles({
|
||||||
repoRoot,
|
repoRoot,
|
||||||
relFiles: reportFilesRel,
|
relFiles: reportFilesRel,
|
||||||
maxRawPerFile: MAX_RAW_PER_FILE,
|
maxRawPerFile: MAX_RAW_PER_FILE,
|
||||||
maxDaysPerFile: MAX_DAYS_PER_FILE,
|
maxDaysPerFile: MAX_DAYS_PER_FILE,
|
||||||
});
|
});
|
||||||
|
|
||||||
fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8");
|
fs.writeFileSync(outFileCommon, JSON.stringify(payloadCommon, null, 2) + "\n", "utf8");
|
||||||
process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`);
|
process.stdout.write(`Wrote ${outFileCommon} (${Object.keys(payloadCommon.files).length} files)\n`);
|
||||||
}
|
}
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
|
||||||
|
|
@ -6,233 +6,224 @@ const path = require("path");
|
||||||
const { execFileSync } = require("child_process");
|
const { execFileSync } = require("child_process");
|
||||||
|
|
||||||
function ensureDir(dir) {
|
function ensureDir(dir) {
|
||||||
fs.mkdirSync(dir, { recursive: true });
|
fs.mkdirSync(dir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
function listJsonFiles(dir) {
|
function listJsonFiles(dir) {
|
||||||
const out = [];
|
const out = [];
|
||||||
try {
|
try {
|
||||||
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
|
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||||
if (!ent.isFile()) continue;
|
if (!ent.isFile()) continue;
|
||||||
if (!String(ent.name || "").endsWith(".json")) continue;
|
if (!String(ent.name || "").endsWith(".json")) continue;
|
||||||
out.push(path.join(dir, ent.name));
|
out.push(path.join(dir, ent.name));
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function readJson(file) {
|
function readJson(file) {
|
||||||
try {
|
try {
|
||||||
return JSON.parse(fs.readFileSync(file, "utf8"));
|
return JSON.parse(fs.readFileSync(file, "utf8"));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function readDbCommitsOrNull(repoRoot) {
|
function readDbCommitsOrNull(repoRoot) {
|
||||||
const p = path.join(repoRoot, "viz", "data", "db_commits.json");
|
const p = path.join(repoRoot, "viz", "data", "db_commits.json");
|
||||||
try {
|
try {
|
||||||
return JSON.parse(fs.readFileSync(p, "utf8"));
|
return JSON.parse(fs.readFileSync(p, "utf8"));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitShowJson(sha, filePath) {
|
function gitShowJson(sha, filePath) {
|
||||||
try {
|
try {
|
||||||
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
||||||
encoding: "utf8",
|
encoding: "utf8",
|
||||||
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
|
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
|
||||||
});
|
});
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeCspc(v) {
|
function normalizeCspc(v) {
|
||||||
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function fnv1a32(str) {
|
function fnv1a32(str) {
|
||||||
let h = 0x811c9dc5;
|
let h = 0x811c9dc5;
|
||||||
for (let i = 0; i < str.length; i++) {
|
for (let i = 0; i < str.length; i++) {
|
||||||
h ^= str.charCodeAt(i);
|
h ^= str.charCodeAt(i);
|
||||||
h = Math.imul(h, 0x01000193);
|
h = Math.imul(h, 0x01000193);
|
||||||
}
|
}
|
||||||
return (h >>> 0).toString(16).padStart(8, "0");
|
return (h >>> 0).toString(16).padStart(8, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeSyntheticSku(storeLabel, url) {
|
function makeSyntheticSku(storeLabel, url) {
|
||||||
const store = String(storeLabel || "store");
|
const store = String(storeLabel || "store");
|
||||||
const u = String(url || "");
|
const u = String(url || "");
|
||||||
if (!u) return "";
|
if (!u) return "";
|
||||||
return `u:${fnv1a32(`${store}|${u}`)}`;
|
return `u:${fnv1a32(`${store}|${u}`)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function keySkuForItem(it, storeLabel) {
|
function keySkuForItem(it, storeLabel) {
|
||||||
const real = normalizeCspc(it?.sku);
|
const real = normalizeCspc(it?.sku);
|
||||||
if (real) return real;
|
if (real) return real;
|
||||||
return makeSyntheticSku(storeLabel, it?.url);
|
return makeSyntheticSku(storeLabel, it?.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns Map(skuKey -> firstSeenAtISO) for this dbFile (store/category file).
|
// Returns Map(skuKey -> firstSeenAtISO) for this dbFile (store/category file).
|
||||||
function computeFirstSeenForDbFile({
|
function computeFirstSeenForDbFile({ repoRoot, relDbFile, storeLabel, wantSkuKeys, commitsArr, nowIso }) {
|
||||||
repoRoot,
|
const out = new Map();
|
||||||
relDbFile,
|
const want = new Set(wantSkuKeys);
|
||||||
storeLabel,
|
|
||||||
wantSkuKeys,
|
|
||||||
commitsArr,
|
|
||||||
nowIso,
|
|
||||||
}) {
|
|
||||||
const out = new Map();
|
|
||||||
const want = new Set(wantSkuKeys);
|
|
||||||
|
|
||||||
// No commit history available -> treat as new today
|
// No commit history available -> treat as new today
|
||||||
if (!Array.isArray(commitsArr) || !commitsArr.length) {
|
if (!Array.isArray(commitsArr) || !commitsArr.length) {
|
||||||
for (const k of want) out.set(k, nowIso);
|
for (const k of want) out.set(k, nowIso);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// commitsArr is oldest -> newest (from db_commits.json)
|
// commitsArr is oldest -> newest (from db_commits.json)
|
||||||
for (const c of commitsArr) {
|
for (const c of commitsArr) {
|
||||||
const sha = String(c?.sha || "");
|
const sha = String(c?.sha || "");
|
||||||
const ts = String(c?.ts || "");
|
const ts = String(c?.ts || "");
|
||||||
if (!sha || !ts) continue;
|
if (!sha || !ts) continue;
|
||||||
|
|
||||||
const obj = gitShowJson(sha, relDbFile);
|
const obj = gitShowJson(sha, relDbFile);
|
||||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||||
const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || "");
|
const sLabel = String(obj?.storeLabel || obj?.store || storeLabel || "");
|
||||||
|
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
if (Boolean(it.removed)) continue; // first time it existed LIVE in this file
|
if (Boolean(it.removed)) continue; // first time it existed LIVE in this file
|
||||||
|
|
||||||
const k = keySkuForItem(it, sLabel);
|
const k = keySkuForItem(it, sLabel);
|
||||||
if (!k) continue;
|
if (!k) continue;
|
||||||
if (!want.has(k)) continue;
|
if (!want.has(k)) continue;
|
||||||
if (out.has(k)) continue;
|
if (out.has(k)) continue;
|
||||||
|
|
||||||
out.set(k, ts);
|
out.set(k, ts);
|
||||||
if (out.size >= want.size) break;
|
if (out.size >= want.size) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (out.size >= want.size) break;
|
if (out.size >= want.size) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Anything never seen historically -> new today
|
// Anything never seen historically -> new today
|
||||||
for (const k of want) if (!out.has(k)) out.set(k, nowIso);
|
for (const k of want) if (!out.has(k)) out.set(k, nowIso);
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const repoRoot = path.resolve(__dirname, "..");
|
const repoRoot = path.resolve(__dirname, "..");
|
||||||
const dbDir = path.join(repoRoot, "data", "db");
|
const dbDir = path.join(repoRoot, "data", "db");
|
||||||
const outDir = path.join(repoRoot, "viz", "data");
|
const outDir = path.join(repoRoot, "viz", "data");
|
||||||
const outFile = path.join(outDir, "index.json");
|
const outFile = path.join(outDir, "index.json");
|
||||||
|
|
||||||
ensureDir(outDir);
|
ensureDir(outDir);
|
||||||
|
|
||||||
const nowIso = new Date().toISOString();
|
const nowIso = new Date().toISOString();
|
||||||
const commitsManifest = readDbCommitsOrNull(repoRoot);
|
const commitsManifest = readDbCommitsOrNull(repoRoot);
|
||||||
|
|
||||||
const items = [];
|
const items = [];
|
||||||
let liveCount = 0;
|
let liveCount = 0;
|
||||||
|
|
||||||
for (const file of listJsonFiles(dbDir)) {
|
for (const file of listJsonFiles(dbDir)) {
|
||||||
const obj = readJson(file);
|
const obj = readJson(file);
|
||||||
if (!obj) continue;
|
if (!obj) continue;
|
||||||
|
|
||||||
const store = String(obj.store || "");
|
const store = String(obj.store || "");
|
||||||
const storeLabel = String(obj.storeLabel || store || "");
|
const storeLabel = String(obj.storeLabel || store || "");
|
||||||
const category = String(obj.category || "");
|
const category = String(obj.category || "");
|
||||||
const categoryLabel = String(obj.categoryLabel || "");
|
const categoryLabel = String(obj.categoryLabel || "");
|
||||||
const source = String(obj.source || "");
|
const source = String(obj.source || "");
|
||||||
const updatedAt = String(obj.updatedAt || "");
|
const updatedAt = String(obj.updatedAt || "");
|
||||||
|
|
||||||
const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json
|
const dbFile = path.relative(repoRoot, file).replace(/\\/g, "/"); // e.g. data/db/foo.json
|
||||||
|
|
||||||
const arr = Array.isArray(obj.items) ? obj.items : [];
|
const arr = Array.isArray(obj.items) ? obj.items : [];
|
||||||
|
|
||||||
// Build want keys from CURRENT file contents (includes removed rows too)
|
// Build want keys from CURRENT file contents (includes removed rows too)
|
||||||
const wantSkuKeys = [];
|
const wantSkuKeys = [];
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
const k = keySkuForItem(it, storeLabel);
|
const k = keySkuForItem(it, storeLabel);
|
||||||
if (k) wantSkuKeys.push(k);
|
if (k) wantSkuKeys.push(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
const commitsArr = commitsManifest?.files?.[dbFile] || null;
|
const commitsArr = commitsManifest?.files?.[dbFile] || null;
|
||||||
const firstSeenByKey = computeFirstSeenForDbFile({
|
const firstSeenByKey = computeFirstSeenForDbFile({
|
||||||
repoRoot,
|
repoRoot,
|
||||||
relDbFile: dbFile,
|
relDbFile: dbFile,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
wantSkuKeys,
|
wantSkuKeys,
|
||||||
commitsArr,
|
commitsArr,
|
||||||
nowIso,
|
nowIso,
|
||||||
});
|
});
|
||||||
|
|
||||||
for (const it of arr) {
|
for (const it of arr) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
|
|
||||||
const removed = Boolean(it.removed);
|
const removed = Boolean(it.removed);
|
||||||
if (!removed) liveCount++;
|
if (!removed) liveCount++;
|
||||||
|
|
||||||
const sku = String(it.sku || "").trim();
|
const sku = String(it.sku || "").trim();
|
||||||
const name = String(it.name || "").trim();
|
const name = String(it.name || "").trim();
|
||||||
const price = String(it.price || "").trim();
|
const price = String(it.price || "").trim();
|
||||||
const url = String(it.url || "").trim();
|
const url = String(it.url || "").trim();
|
||||||
const img = String(it.img || it.image || it.thumb || "").trim();
|
const img = String(it.img || it.image || it.thumb || "").trim();
|
||||||
|
|
||||||
const skuKey = keySkuForItem(it, storeLabel);
|
const skuKey = keySkuForItem(it, storeLabel);
|
||||||
const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso;
|
const firstSeenAt = skuKey ? String(firstSeenByKey.get(skuKey) || nowIso) : nowIso;
|
||||||
|
|
||||||
items.push({
|
items.push({
|
||||||
sku,
|
sku,
|
||||||
name,
|
name,
|
||||||
price,
|
price,
|
||||||
url,
|
url,
|
||||||
img,
|
img,
|
||||||
removed, // NEW (additive): allows viz to show history / removed-only items
|
removed, // NEW (additive): allows viz to show history / removed-only items
|
||||||
store,
|
store,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
category,
|
category,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
source,
|
source,
|
||||||
updatedAt,
|
updatedAt,
|
||||||
firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now)
|
firstSeenAt, // NEW: first time this item appeared LIVE in this store/category db file (or now)
|
||||||
dbFile,
|
dbFile,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
items.sort((a, b) => {
|
items.sort((a, b) => {
|
||||||
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
|
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
|
||||||
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
|
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
|
||||||
return ak.localeCompare(bk);
|
return ak.localeCompare(bk);
|
||||||
});
|
});
|
||||||
|
|
||||||
const outObj = {
|
const outObj = {
|
||||||
generatedAt: nowIso,
|
generatedAt: nowIso,
|
||||||
// Additive metadata. Old readers can ignore.
|
// Additive metadata. Old readers can ignore.
|
||||||
includesRemoved: true,
|
includesRemoved: true,
|
||||||
count: items.length,
|
count: items.length,
|
||||||
countLive: liveCount,
|
countLive: liveCount,
|
||||||
items,
|
items,
|
||||||
};
|
};
|
||||||
|
|
||||||
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
|
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
|
||||||
process.stdout.write(
|
process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`);
|
||||||
`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { main };
|
module.exports = { main };
|
||||||
|
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
main();
|
main();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,474 +6,467 @@ const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
function runGit(args) {
|
function runGit(args) {
|
||||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitShowJson(sha, filePath) {
|
function gitShowJson(sha, filePath) {
|
||||||
try {
|
try {
|
||||||
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
||||||
encoding: "utf8",
|
encoding: "utf8",
|
||||||
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
|
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
|
||||||
});
|
});
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitFileExistsAtSha(sha, filePath) {
|
function gitFileExistsAtSha(sha, filePath) {
|
||||||
if (!sha) return false;
|
if (!sha) return false;
|
||||||
try {
|
try {
|
||||||
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
|
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
|
||||||
stdio: ["ignore", "ignore", "ignore"],
|
stdio: ["ignore", "ignore", "ignore"],
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitListTreeFiles(sha, dirRel) {
|
function gitListTreeFiles(sha, dirRel) {
|
||||||
try {
|
try {
|
||||||
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
|
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
|
||||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
return out
|
||||||
} catch {
|
.split(/\r?\n/)
|
||||||
return [];
|
.map((s) => s.trim())
|
||||||
}
|
.filter(Boolean);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function readJsonFileOrNull(filePath) {
|
function readJsonFileOrNull(filePath) {
|
||||||
try {
|
try {
|
||||||
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeCspc(v) {
|
function normalizeCspc(v) {
|
||||||
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normPriceStr(p) {
|
function normPriceStr(p) {
|
||||||
return String(p ?? "").trim();
|
return String(p ?? "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function priceToNumber(v) {
|
function priceToNumber(v) {
|
||||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||||
const n = Number(s);
|
const n = Number(s);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function dateOnly(iso) {
|
function dateOnly(iso) {
|
||||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function fnv1a32(str) {
|
function fnv1a32(str) {
|
||||||
let h = 0x811c9dc5;
|
let h = 0x811c9dc5;
|
||||||
for (let i = 0; i < str.length; i++) {
|
for (let i = 0; i < str.length; i++) {
|
||||||
h ^= str.charCodeAt(i);
|
h ^= str.charCodeAt(i);
|
||||||
h = Math.imul(h, 0x01000193);
|
h = Math.imul(h, 0x01000193);
|
||||||
}
|
}
|
||||||
return (h >>> 0).toString(16).padStart(8, "0");
|
return (h >>> 0).toString(16).padStart(8, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeSyntheticSku(storeLabel, url) {
|
function makeSyntheticSku(storeLabel, url) {
|
||||||
const store = String(storeLabel || "store");
|
const store = String(storeLabel || "store");
|
||||||
const u = String(url || "");
|
const u = String(url || "");
|
||||||
if (!u) return "";
|
if (!u) return "";
|
||||||
return `u:${fnv1a32(`${store}|${u}`)}`;
|
return `u:${fnv1a32(`${store}|${u}`)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function keySkuForItem(it, storeLabel) {
|
function keySkuForItem(it, storeLabel) {
|
||||||
const real = normalizeCspc(it?.sku);
|
const real = normalizeCspc(it?.sku);
|
||||||
if (real) return real;
|
if (real) return real;
|
||||||
return makeSyntheticSku(storeLabel, it?.url);
|
return makeSyntheticSku(storeLabel, it?.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
|
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
|
||||||
const m = new Map();
|
const m = new Map();
|
||||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||||
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
||||||
|
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
|
|
||||||
const sku = keySkuForItem(it, storeLabel);
|
const sku = keySkuForItem(it, storeLabel);
|
||||||
if (!sku) continue;
|
if (!sku) continue;
|
||||||
|
|
||||||
const removed = Boolean(it.removed);
|
const removed = Boolean(it.removed);
|
||||||
if (!includeRemoved && removed) continue;
|
if (!includeRemoved && removed) continue;
|
||||||
|
|
||||||
const next = {
|
const next = {
|
||||||
sku,
|
sku,
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
url: String(it.url || ""),
|
url: String(it.url || ""),
|
||||||
removed,
|
removed,
|
||||||
};
|
};
|
||||||
|
|
||||||
const prev = m.get(sku);
|
const prev = m.get(sku);
|
||||||
if (!prev) {
|
if (!prev) {
|
||||||
m.set(sku, next);
|
m.set(sku, next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prefer the non-removed record if both exist.
|
// Prefer the non-removed record if both exist.
|
||||||
if (prev.removed && !next.removed) {
|
if (prev.removed && !next.removed) {
|
||||||
m.set(sku, next);
|
m.set(sku, next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!prev.removed && next.removed) {
|
if (!prev.removed && next.removed) {
|
||||||
continue; // keep the active one
|
continue; // keep the active one
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise keep the “better” one (more complete data), deterministic.
|
// Otherwise keep the “better” one (more complete data), deterministic.
|
||||||
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
|
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
|
||||||
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
|
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
|
||||||
if (nextScore > prevScore) m.set(sku, next);
|
if (nextScore > prevScore) m.set(sku, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
function diffDb(prevObj, nextObj) {
|
function diffDb(prevObj, nextObj) {
|
||||||
const prevAll = mapBySku(prevObj, { includeRemoved: true });
|
const prevAll = mapBySku(prevObj, { includeRemoved: true });
|
||||||
const nextAll = mapBySku(nextObj, { includeRemoved: true });
|
const nextAll = mapBySku(nextObj, { includeRemoved: true });
|
||||||
|
|
||||||
const prevLive = mapBySku(prevObj, { includeRemoved: false });
|
const prevLive = mapBySku(prevObj, { includeRemoved: false });
|
||||||
const nextLive = mapBySku(nextObj, { includeRemoved: false });
|
const nextLive = mapBySku(nextObj, { includeRemoved: false });
|
||||||
|
|
||||||
const newItems = [];
|
const newItems = [];
|
||||||
const restoredItems = [];
|
const restoredItems = [];
|
||||||
const removedItems = [];
|
const removedItems = [];
|
||||||
const priceChanges = [];
|
const priceChanges = [];
|
||||||
|
|
||||||
for (const [sku, now] of nextLive.entries()) {
|
for (const [sku, now] of nextLive.entries()) {
|
||||||
const had = prevAll.get(sku);
|
const had = prevAll.get(sku);
|
||||||
if (!had) {
|
if (!had) {
|
||||||
newItems.push({ ...now });
|
newItems.push({ ...now });
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (had.removed) {
|
if (had.removed) {
|
||||||
restoredItems.push({ ...now });
|
restoredItems.push({ ...now });
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const [sku, was] of prevLive.entries()) {
|
for (const [sku, was] of prevLive.entries()) {
|
||||||
const nxt = nextAll.get(sku);
|
const nxt = nextAll.get(sku);
|
||||||
if (!nxt || nxt.removed) {
|
if (!nxt || nxt.removed) {
|
||||||
removedItems.push({ ...was });
|
removedItems.push({ ...was });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const [sku, now] of nextLive.entries()) {
|
for (const [sku, now] of nextLive.entries()) {
|
||||||
const was = prevLive.get(sku);
|
const was = prevLive.get(sku);
|
||||||
if (!was) continue;
|
if (!was) continue;
|
||||||
|
|
||||||
const a = normPriceStr(was.price);
|
const a = normPriceStr(was.price);
|
||||||
const b = normPriceStr(now.price);
|
const b = normPriceStr(now.price);
|
||||||
if (a === b) continue;
|
if (a === b) continue;
|
||||||
|
|
||||||
const aN = priceToNumber(a);
|
const aN = priceToNumber(a);
|
||||||
const bN = priceToNumber(b);
|
const bN = priceToNumber(b);
|
||||||
|
|
||||||
let kind = "price_change";
|
let kind = "price_change";
|
||||||
if (aN !== null && bN !== null) {
|
if (aN !== null && bN !== null) {
|
||||||
if (bN < aN) kind = "price_down";
|
if (bN < aN) kind = "price_down";
|
||||||
else if (bN > aN) kind = "price_up";
|
else if (bN > aN) kind = "price_up";
|
||||||
else kind = "price_change";
|
else kind = "price_change";
|
||||||
}
|
}
|
||||||
|
|
||||||
priceChanges.push({
|
priceChanges.push({
|
||||||
kind,
|
kind,
|
||||||
sku,
|
sku,
|
||||||
name: now.name || was.name || "",
|
name: now.name || was.name || "",
|
||||||
oldPrice: a,
|
oldPrice: a,
|
||||||
newPrice: b,
|
newPrice: b,
|
||||||
url: now.url || was.url || "",
|
url: now.url || was.url || "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return { newItems, restoredItems, removedItems, priceChanges };
|
return { newItems, restoredItems, removedItems, priceChanges };
|
||||||
}
|
}
|
||||||
|
|
||||||
function getHeadShaOrEmpty() {
|
function getHeadShaOrEmpty() {
|
||||||
try {
|
try {
|
||||||
return runGit(["rev-parse", "--verify", "HEAD"]);
|
return runGit(["rev-parse", "--verify", "HEAD"]);
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function firstParentSha(sha) {
|
function firstParentSha(sha) {
|
||||||
try {
|
try {
|
||||||
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
|
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
|
||||||
const parts = out.split(/\s+/).filter(Boolean);
|
const parts = out.split(/\s+/).filter(Boolean);
|
||||||
return parts.length >= 2 ? parts[1] : "";
|
return parts.length >= 2 ? parts[1] : "";
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function listChangedDbFiles(fromSha, toSha) {
|
function listChangedDbFiles(fromSha, toSha) {
|
||||||
if (!fromSha && toSha && toSha !== "WORKTREE") {
|
if (!fromSha && toSha && toSha !== "WORKTREE") {
|
||||||
return gitListTreeFiles(toSha, "data/db");
|
return gitListTreeFiles(toSha, "data/db");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fromSha && toSha === "WORKTREE") {
|
if (!fromSha && toSha === "WORKTREE") {
|
||||||
try {
|
try {
|
||||||
return fs
|
return fs
|
||||||
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
|
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
|
||||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||||
.map((e) => path.posix.join("data/db", e.name));
|
.map((e) => path.posix.join("data/db", e.name));
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (toSha === "WORKTREE") {
|
if (toSha === "WORKTREE") {
|
||||||
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
|
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
|
||||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
return out
|
||||||
}
|
.split(/\r?\n/)
|
||||||
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
|
.map((s) => s.trim())
|
||||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
.filter(Boolean);
|
||||||
} catch {
|
}
|
||||||
return [];
|
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
|
||||||
}
|
return out
|
||||||
|
.split(/\r?\n/)
|
||||||
|
.map((s) => s.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function logDbCommitsSince(sinceIso) {
|
function logDbCommitsSince(sinceIso) {
|
||||||
try {
|
try {
|
||||||
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
|
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
|
||||||
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
const lines = out
|
||||||
const arr = [];
|
.split(/\r?\n/)
|
||||||
for (const line of lines) {
|
.map((s) => s.trim())
|
||||||
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
.filter(Boolean);
|
||||||
if (!m) continue;
|
const arr = [];
|
||||||
const sha = m[1];
|
for (const line of lines) {
|
||||||
const ts = m[2];
|
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
||||||
const d = dateOnly(ts);
|
if (!m) continue;
|
||||||
arr.push({ sha, ts, date: d });
|
const sha = m[1];
|
||||||
}
|
const ts = m[2];
|
||||||
arr.reverse();
|
const d = dateOnly(ts);
|
||||||
return arr;
|
arr.push({ sha, ts, date: d });
|
||||||
} catch {
|
}
|
||||||
return [];
|
arr.reverse();
|
||||||
}
|
return arr;
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const repoRoot = process.cwd();
|
const repoRoot = process.cwd();
|
||||||
const outDir = path.join(repoRoot, "viz", "data");
|
const outDir = path.join(repoRoot, "viz", "data");
|
||||||
const outFile = path.join(outDir, "recent.json");
|
const outFile = path.join(outDir, "recent.json");
|
||||||
fs.mkdirSync(outDir, { recursive: true });
|
fs.mkdirSync(outDir, { recursive: true });
|
||||||
|
|
||||||
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7));
|
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 7));
|
||||||
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000));
|
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 5000));
|
||||||
|
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
|
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
|
||||||
const sinceIso = since.toISOString();
|
const sinceIso = since.toISOString();
|
||||||
|
|
||||||
const headSha = getHeadShaOrEmpty();
|
const headSha = getHeadShaOrEmpty();
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
|
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
|
||||||
const pairs = [];
|
const pairs = [];
|
||||||
|
|
||||||
if (commits.length) {
|
if (commits.length) {
|
||||||
const first = commits[0];
|
const first = commits[0];
|
||||||
const parent = firstParentSha(first.sha);
|
const parent = firstParentSha(first.sha);
|
||||||
pairs.push({
|
pairs.push({
|
||||||
fromSha: parent || "",
|
fromSha: parent || "",
|
||||||
toSha: first.sha,
|
toSha: first.sha,
|
||||||
ts: first.ts,
|
ts: first.ts,
|
||||||
date: first.date,
|
date: first.date,
|
||||||
});
|
});
|
||||||
|
|
||||||
for (let i = 1; i < commits.length; i++) {
|
for (let i = 1; i < commits.length; i++) {
|
||||||
pairs.push({
|
pairs.push({
|
||||||
fromSha: commits[i - 1].sha,
|
fromSha: commits[i - 1].sha,
|
||||||
toSha: commits[i].sha,
|
toSha: commits[i].sha,
|
||||||
ts: commits[i].ts,
|
ts: commits[i].ts,
|
||||||
date: commits[i].date,
|
date: commits[i].date,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (headSha) {
|
if (headSha) {
|
||||||
pairs.push({
|
pairs.push({
|
||||||
fromSha: headSha,
|
fromSha: headSha,
|
||||||
toSha: "WORKTREE",
|
toSha: "WORKTREE",
|
||||||
ts: now.toISOString(),
|
ts: now.toISOString(),
|
||||||
date: dateOnly(now.toISOString()),
|
date: dateOnly(now.toISOString()),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function isSmwsBottle(storeLabel, it) {
|
function isSmwsBottle(storeLabel, it) {
|
||||||
const hay = [
|
const hay = [storeLabel, it?.name, it?.url]
|
||||||
storeLabel,
|
.map((x) => String(x || ""))
|
||||||
it?.name,
|
.join(" | ")
|
||||||
it?.url,
|
.toLowerCase();
|
||||||
]
|
return hay.includes("smws") || hay.includes("scotch malt whisky society");
|
||||||
.map((x) => String(x || ""))
|
}
|
||||||
.join(" | ")
|
|
||||||
.toLowerCase();
|
|
||||||
return hay.includes("smws") || hay.includes("scotch malt whisky society");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const p of pairs) {
|
for (const p of pairs) {
|
||||||
const fromSha = p.fromSha;
|
const fromSha = p.fromSha;
|
||||||
const toSha = p.toSha;
|
const toSha = p.toSha;
|
||||||
const ts = p.ts;
|
const ts = p.ts;
|
||||||
const d = p.date;
|
const d = p.date;
|
||||||
|
|
||||||
const files = listChangedDbFiles(fromSha, toSha);
|
const files = listChangedDbFiles(fromSha, toSha);
|
||||||
if (!files.length) continue;
|
if (!files.length) continue;
|
||||||
|
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
let prevObj = null;
|
let prevObj = null;
|
||||||
let nextObj = null;
|
let nextObj = null;
|
||||||
|
|
||||||
if (toSha === "WORKTREE") {
|
if (toSha === "WORKTREE") {
|
||||||
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
||||||
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
|
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
|
||||||
} else {
|
} else {
|
||||||
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
||||||
nextObj = gitShowJson(toSha, file);
|
nextObj = gitShowJson(toSha, file);
|
||||||
}
|
}
|
||||||
|
|
||||||
const nextExists =
|
const nextExists =
|
||||||
toSha === "WORKTREE"
|
toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file);
|
||||||
? fs.existsSync(path.join(repoRoot, file))
|
if (!nextExists) continue;
|
||||||
: gitFileExistsAtSha(toSha, file);
|
|
||||||
if (!nextExists) continue;
|
|
||||||
|
|
||||||
if (!prevObj && !nextObj) continue;
|
if (!prevObj && !nextObj) continue;
|
||||||
|
|
||||||
const storeLabel = String(
|
const storeLabel = String(
|
||||||
nextObj?.storeLabel ||
|
nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "",
|
||||||
nextObj?.store ||
|
);
|
||||||
prevObj?.storeLabel ||
|
const categoryLabel = String(
|
||||||
prevObj?.store ||
|
nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "",
|
||||||
""
|
);
|
||||||
);
|
|
||||||
const categoryLabel = String(
|
|
||||||
nextObj?.categoryLabel ||
|
|
||||||
nextObj?.category ||
|
|
||||||
prevObj?.categoryLabel ||
|
|
||||||
prevObj?.category ||
|
|
||||||
""
|
|
||||||
);
|
|
||||||
|
|
||||||
const isNewStoreFile =
|
const isNewStoreFile =
|
||||||
Boolean(fromSha) &&
|
Boolean(fromSha) &&
|
||||||
!gitFileExistsAtSha(fromSha, file) &&
|
!gitFileExistsAtSha(fromSha, file) &&
|
||||||
(toSha === "WORKTREE"
|
(toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file));
|
||||||
? fs.existsSync(path.join(repoRoot, file))
|
|
||||||
: gitFileExistsAtSha(toSha, file));
|
|
||||||
|
|
||||||
let { newItems, restoredItems, removedItems, priceChanges } = diffDb(
|
let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
|
||||||
prevObj,
|
|
||||||
nextObj
|
|
||||||
);
|
|
||||||
|
|
||||||
if (isNewStoreFile) {
|
if (isNewStoreFile) {
|
||||||
newItems = [];
|
newItems = [];
|
||||||
restoredItems = [];
|
restoredItems = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of newItems) {
|
for (const it of newItems) {
|
||||||
if (isSmwsBottle(storeLabel, it)) continue;
|
if (isSmwsBottle(storeLabel, it)) continue;
|
||||||
items.push({
|
items.push({
|
||||||
ts,
|
ts,
|
||||||
date: d,
|
date: d,
|
||||||
fromSha: fromSha || "",
|
fromSha: fromSha || "",
|
||||||
toSha,
|
toSha,
|
||||||
kind: "new",
|
kind: "new",
|
||||||
sku: it.sku,
|
sku: it.sku,
|
||||||
name: it.name,
|
name: it.name,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
price: normPriceStr(it.price),
|
price: normPriceStr(it.price),
|
||||||
url: it.url,
|
url: it.url,
|
||||||
dbFile: file,
|
dbFile: file,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of restoredItems) {
|
for (const it of restoredItems) {
|
||||||
items.push({
|
items.push({
|
||||||
ts,
|
ts,
|
||||||
date: d,
|
date: d,
|
||||||
fromSha: fromSha || "",
|
fromSha: fromSha || "",
|
||||||
toSha,
|
toSha,
|
||||||
kind: "restored",
|
kind: "restored",
|
||||||
sku: it.sku,
|
sku: it.sku,
|
||||||
name: it.name,
|
name: it.name,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
price: normPriceStr(it.price),
|
price: normPriceStr(it.price),
|
||||||
url: it.url,
|
url: it.url,
|
||||||
dbFile: file,
|
dbFile: file,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of removedItems) {
|
for (const it of removedItems) {
|
||||||
items.push({
|
items.push({
|
||||||
ts,
|
ts,
|
||||||
date: d,
|
date: d,
|
||||||
fromSha: fromSha || "",
|
fromSha: fromSha || "",
|
||||||
toSha,
|
toSha,
|
||||||
kind: "removed",
|
kind: "removed",
|
||||||
sku: it.sku,
|
sku: it.sku,
|
||||||
name: it.name,
|
name: it.name,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
price: normPriceStr(it.price),
|
price: normPriceStr(it.price),
|
||||||
url: it.url,
|
url: it.url,
|
||||||
dbFile: file,
|
dbFile: file,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const u of priceChanges) {
|
for (const u of priceChanges) {
|
||||||
items.push({
|
items.push({
|
||||||
ts,
|
ts,
|
||||||
date: d,
|
date: d,
|
||||||
fromSha: fromSha || "",
|
fromSha: fromSha || "",
|
||||||
toSha,
|
toSha,
|
||||||
kind: u.kind,
|
kind: u.kind,
|
||||||
sku: u.sku,
|
sku: u.sku,
|
||||||
name: u.name,
|
name: u.name,
|
||||||
storeLabel,
|
storeLabel,
|
||||||
categoryLabel,
|
categoryLabel,
|
||||||
oldPrice: normPriceStr(u.oldPrice),
|
oldPrice: normPriceStr(u.oldPrice),
|
||||||
newPrice: normPriceStr(u.newPrice),
|
newPrice: normPriceStr(u.newPrice),
|
||||||
url: u.url,
|
url: u.url,
|
||||||
dbFile: file,
|
dbFile: file,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
|
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
|
||||||
|
|
||||||
const trimmed = items.slice(0, maxItems);
|
const trimmed = items.slice(0, maxItems);
|
||||||
|
|
||||||
const payload = {
|
const payload = {
|
||||||
generatedAt: now.toISOString(),
|
generatedAt: now.toISOString(),
|
||||||
windowDays,
|
windowDays,
|
||||||
since: sinceIso,
|
since: sinceIso,
|
||||||
headSha,
|
headSha,
|
||||||
count: trimmed.length,
|
count: trimmed.length,
|
||||||
items: trimmed,
|
items: trimmed,
|
||||||
};
|
};
|
||||||
|
|
||||||
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
||||||
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
|
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
|
||||||
}
|
}
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
|
||||||
|
|
@ -8,24 +8,24 @@ const DB_DIR = path.join(__dirname, "../data/db");
|
||||||
const LINKS_FILE = path.join(__dirname, "../data/sku_links.json");
|
const LINKS_FILE = path.join(__dirname, "../data/sku_links.json");
|
||||||
|
|
||||||
function normalizeImplicitSkuKey(k) {
|
function normalizeImplicitSkuKey(k) {
|
||||||
const s = String(k || "").trim();
|
const s = String(k || "").trim();
|
||||||
const m = s.match(/^id:(\d{1,6})$/i);
|
const m = s.match(/^id:(\d{1,6})$/i);
|
||||||
if (m) return String(m[1]).padStart(6, "0");
|
if (m) return String(m[1]).padStart(6, "0");
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
// collect all valid SKUs from db files (normalized)
|
// collect all valid SKUs from db files (normalized)
|
||||||
const validSkus = new Set();
|
const validSkus = new Set();
|
||||||
|
|
||||||
for (const file of fs.readdirSync(DB_DIR)) {
|
for (const file of fs.readdirSync(DB_DIR)) {
|
||||||
if (!file.endsWith(".json")) continue;
|
if (!file.endsWith(".json")) continue;
|
||||||
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
|
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
|
||||||
if (!Array.isArray(data.items)) continue;
|
if (!Array.isArray(data.items)) continue;
|
||||||
for (const item of data.items) {
|
for (const item of data.items) {
|
||||||
if (!item || !item.sku) continue;
|
if (!item || !item.sku) continue;
|
||||||
const k = normalizeImplicitSkuKey(item.sku);
|
const k = normalizeImplicitSkuKey(item.sku);
|
||||||
if (k) validSkus.add(k);
|
if (k) validSkus.add(k);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// load links
|
// load links
|
||||||
|
|
@ -40,40 +40,40 @@ const seen = new Set(); // dedupe after normalization
|
||||||
const nextLinks = [];
|
const nextLinks = [];
|
||||||
|
|
||||||
for (const x of Array.isArray(linksData.links) ? linksData.links : []) {
|
for (const x of Array.isArray(linksData.links) ? linksData.links : []) {
|
||||||
const a = normalizeImplicitSkuKey(x?.fromSku);
|
const a = normalizeImplicitSkuKey(x?.fromSku);
|
||||||
const b = normalizeImplicitSkuKey(x?.toSku);
|
const b = normalizeImplicitSkuKey(x?.toSku);
|
||||||
|
|
||||||
if (!a || !b) {
|
if (!a || !b) {
|
||||||
prunedMissing++;
|
prunedMissing++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// drop links that are now implicit (id:1234 <-> 001234 etc)
|
// drop links that are now implicit (id:1234 <-> 001234 etc)
|
||||||
if (a === b) {
|
if (a === b) {
|
||||||
prunedAuto++;
|
prunedAuto++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// keep only links where BOTH normalized skus exist in db
|
// keep only links where BOTH normalized skus exist in db
|
||||||
if (!validSkus.has(a) || !validSkus.has(b)) {
|
if (!validSkus.has(a) || !validSkus.has(b)) {
|
||||||
prunedMissing++;
|
prunedMissing++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// dedupe (undirected) after normalization
|
// dedupe (undirected) after normalization
|
||||||
const key = a < b ? `${a}|${b}` : `${b}|${a}`;
|
const key = a < b ? `${a}|${b}` : `${b}|${a}`;
|
||||||
if (seen.has(key)) {
|
if (seen.has(key)) {
|
||||||
prunedDup++;
|
prunedDup++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
seen.add(key);
|
seen.add(key);
|
||||||
|
|
||||||
// preserve datestamps/metadata; just normalize the SKUs
|
// preserve datestamps/metadata; just normalize the SKUs
|
||||||
nextLinks.push({
|
nextLinks.push({
|
||||||
...x,
|
...x,
|
||||||
fromSku: a,
|
fromSku: a,
|
||||||
toSku: b,
|
toSku: b,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
linksData.links = nextLinks;
|
linksData.links = nextLinks;
|
||||||
|
|
|
||||||
|
|
@ -12,307 +12,340 @@ const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price");
|
||||||
const { isoTimestampFileSafe } = require("../src/utils/time");
|
const { isoTimestampFileSafe } = require("../src/utils/time");
|
||||||
|
|
||||||
function runGit(args) {
|
function runGit(args) {
|
||||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitShowText(sha, filePath) {
|
function gitShowText(sha, filePath) {
|
||||||
try {
|
try {
|
||||||
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
|
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function gitListDbFiles(sha, dbDirRel) {
|
function gitListDbFiles(sha, dbDirRel) {
|
||||||
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
|
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
|
||||||
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
const lines = out
|
||||||
return new Set(lines);
|
.split(/\r?\n/)
|
||||||
|
.map((s) => s.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
return new Set(lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseJsonOrNull(txt) {
|
function parseJsonOrNull(txt) {
|
||||||
if (txt == null) return null;
|
if (txt == null) return null;
|
||||||
try {
|
try {
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function mapItemsByUrl(obj) {
|
function mapItemsByUrl(obj) {
|
||||||
const m = new Map();
|
const m = new Map();
|
||||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||||
for (const it of items) {
|
for (const it of items) {
|
||||||
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
|
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
|
||||||
m.set(it.url, {
|
m.set(it.url, {
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
sku: String(it.sku || ""),
|
sku: String(it.sku || ""),
|
||||||
url: it.url,
|
url: it.url,
|
||||||
removed: Boolean(it.removed),
|
removed: Boolean(it.removed),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildDiffForDb(prevObj, nextObj) {
|
function buildDiffForDb(prevObj, nextObj) {
|
||||||
const prev = mapItemsByUrl(prevObj);
|
const prev = mapItemsByUrl(prevObj);
|
||||||
const next = mapItemsByUrl(nextObj);
|
const next = mapItemsByUrl(nextObj);
|
||||||
|
|
||||||
const urls = new Set([...prev.keys(), ...next.keys()]);
|
const urls = new Set([...prev.keys(), ...next.keys()]);
|
||||||
|
|
||||||
const newItems = [];
|
const newItems = [];
|
||||||
const restoredItems = [];
|
const restoredItems = [];
|
||||||
const removedItems = [];
|
const removedItems = [];
|
||||||
const updatedItems = [];
|
const updatedItems = [];
|
||||||
|
|
||||||
for (const url of urls) {
|
for (const url of urls) {
|
||||||
const a = prev.get(url);
|
const a = prev.get(url);
|
||||||
const b = next.get(url);
|
const b = next.get(url);
|
||||||
|
|
||||||
const aExists = Boolean(a);
|
const aExists = Boolean(a);
|
||||||
const bExists = Boolean(b);
|
const bExists = Boolean(b);
|
||||||
|
|
||||||
const aRemoved = Boolean(a?.removed);
|
const aRemoved = Boolean(a?.removed);
|
||||||
const bRemoved = Boolean(b?.removed);
|
const bRemoved = Boolean(b?.removed);
|
||||||
|
|
||||||
if (!aExists && bExists && !bRemoved) {
|
if (!aExists && bExists && !bRemoved) {
|
||||||
newItems.push({ ...b });
|
newItems.push({ ...b });
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aExists && aRemoved && bExists && !bRemoved) {
|
if (aExists && aRemoved && bExists && !bRemoved) {
|
||||||
restoredItems.push({ ...b });
|
restoredItems.push({ ...b });
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aExists && !aRemoved && (!bExists || bRemoved)) {
|
if (aExists && !aRemoved && (!bExists || bRemoved)) {
|
||||||
removedItems.push({ ...a });
|
removedItems.push({ ...a });
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aExists && bExists && !aRemoved && !bRemoved) {
|
if (aExists && bExists && !aRemoved && !bRemoved) {
|
||||||
const aP = normPrice(a.price);
|
const aP = normPrice(a.price);
|
||||||
const bP = normPrice(b.price);
|
const bP = normPrice(b.price);
|
||||||
if (aP !== bP) {
|
if (aP !== bP) {
|
||||||
updatedItems.push({
|
updatedItems.push({
|
||||||
name: b.name || a.name || "",
|
name: b.name || a.name || "",
|
||||||
sku: normalizeCspc(b.sku || a.sku || ""),
|
sku: normalizeCspc(b.sku || a.sku || ""),
|
||||||
oldPrice: a.price || "",
|
oldPrice: a.price || "",
|
||||||
newPrice: b.price || "",
|
newPrice: b.price || "",
|
||||||
url,
|
url,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { newItems, restoredItems, removedItems, updatedItems };
|
return { newItems, restoredItems, removedItems, updatedItems };
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseArgs(argv) {
|
function parseArgs(argv) {
|
||||||
const flags = new Set();
|
const flags = new Set();
|
||||||
const kv = new Map();
|
const kv = new Map();
|
||||||
const positional = [];
|
const positional = [];
|
||||||
|
|
||||||
for (let i = 0; i < argv.length; i++) {
|
for (let i = 0; i < argv.length; i++) {
|
||||||
const a = argv[i];
|
const a = argv[i];
|
||||||
if (!a.startsWith("-")) {
|
if (!a.startsWith("-")) {
|
||||||
positional.push(a);
|
positional.push(a);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (a === "--no-color") {
|
if (a === "--no-color") {
|
||||||
flags.add("no-color");
|
flags.add("no-color");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (a === "--color") {
|
if (a === "--color") {
|
||||||
flags.add("color");
|
flags.add("color");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||||
kv.set(a, argv[i + 1]);
|
kv.set(a, argv[i + 1]);
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
flags.add(a);
|
flags.add(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
const fromSha = positional[0] || "";
|
const fromSha = positional[0] || "";
|
||||||
const toSha = positional[1] || "";
|
const toSha = positional[1] || "";
|
||||||
const dbDir = kv.get("--db-dir") || "data/db";
|
const dbDir = kv.get("--db-dir") || "data/db";
|
||||||
const outFile = kv.get("--out") || "";
|
const outFile = kv.get("--out") || "";
|
||||||
|
|
||||||
return { fromSha, toSha, dbDir, outFile, flags };
|
return { fromSha, toSha, dbDir, outFile, flags };
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
|
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
|
||||||
const paint = (s, code) => color(s, code, colorize);
|
const paint = (s, code) => color(s, code, colorize);
|
||||||
|
|
||||||
let out = "";
|
let out = "";
|
||||||
const ln = (s = "") => {
|
const ln = (s = "") => {
|
||||||
out += String(s) + "\n";
|
out += String(s) + "\n";
|
||||||
};
|
};
|
||||||
|
|
||||||
ln(paint("========== DIFF REPORT ==========", C.bold));
|
ln(paint("========== DIFF REPORT ==========", C.bold));
|
||||||
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
|
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
|
||||||
ln(
|
ln(
|
||||||
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`
|
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`,
|
||||||
);
|
);
|
||||||
ln("");
|
ln("");
|
||||||
|
|
||||||
const rows = diffReport.categories;
|
const rows = diffReport.categories;
|
||||||
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
|
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
|
||||||
|
|
||||||
ln(paint("Per-category summary:", C.bold));
|
ln(paint("Per-category summary:", C.bold));
|
||||||
ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`);
|
ln(
|
||||||
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
|
`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`,
|
||||||
for (const r of rows) {
|
);
|
||||||
ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`);
|
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
|
||||||
}
|
for (const r of rows) {
|
||||||
ln("");
|
ln(
|
||||||
|
`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ln("");
|
||||||
|
|
||||||
const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length));
|
const labelW = Math.max(
|
||||||
|
16,
|
||||||
|
...diffReport.newItems.map((x) => x.catLabel.length),
|
||||||
|
...diffReport.restoredItems.map((x) => x.catLabel.length),
|
||||||
|
...diffReport.removedItems.map((x) => x.catLabel.length),
|
||||||
|
...diffReport.updatedItems.map((x) => x.catLabel.length),
|
||||||
|
);
|
||||||
|
|
||||||
const skuInline = (sku) => {
|
const skuInline = (sku) => {
|
||||||
const s = normalizeCspc(sku);
|
const s = normalizeCspc(sku);
|
||||||
return s ? paint(` ${s}`, C.gray) : "";
|
return s ? paint(` ${s}`, C.gray) : "";
|
||||||
};
|
};
|
||||||
|
|
||||||
if (diffReport.newItems.length) {
|
if (diffReport.newItems.length) {
|
||||||
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
|
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
|
||||||
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
ln(
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
|
||||||
}
|
);
|
||||||
ln("");
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
}
|
}
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (diffReport.restoredItems.length) {
|
if (diffReport.restoredItems.length) {
|
||||||
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
|
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
|
||||||
for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of diffReport.restoredItems.sort((a, b) =>
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
|
||||||
ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
)) {
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
}
|
ln(
|
||||||
ln("");
|
`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
|
||||||
}
|
);
|
||||||
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
|
}
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (diffReport.removedItems.length) {
|
if (diffReport.removedItems.length) {
|
||||||
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
|
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
|
||||||
for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const it of diffReport.removedItems.sort((a, b) =>
|
||||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
|
||||||
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
)) {
|
||||||
ln(` ${paint(it.url, C.dim)}`);
|
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||||
}
|
ln(
|
||||||
ln("");
|
`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`,
|
||||||
}
|
);
|
||||||
|
ln(` ${paint(it.url, C.dim)}`);
|
||||||
|
}
|
||||||
|
ln("");
|
||||||
|
}
|
||||||
|
|
||||||
if (diffReport.updatedItems.length) {
|
if (diffReport.updatedItems.length) {
|
||||||
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
|
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
|
||||||
|
|
||||||
for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
for (const u of diffReport.updatedItems.sort((a, b) =>
|
||||||
const oldRaw = u.oldPrice || "";
|
(a.catLabel + a.name).localeCompare(b.catLabel + b.name),
|
||||||
const newRaw = u.newPrice || "";
|
)) {
|
||||||
|
const oldRaw = u.oldPrice || "";
|
||||||
|
const newRaw = u.newPrice || "";
|
||||||
|
|
||||||
const oldN = priceToNumber(oldRaw);
|
const oldN = priceToNumber(oldRaw);
|
||||||
const newN = priceToNumber(newRaw);
|
const newN = priceToNumber(newRaw);
|
||||||
|
|
||||||
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
||||||
|
|
||||||
let newP = newRaw ? newRaw : "(no price)";
|
let newP = newRaw ? newRaw : "(no price)";
|
||||||
let offTag = "";
|
let offTag = "";
|
||||||
|
|
||||||
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
||||||
if (newN > oldN) newP = paint(newP, C.red);
|
if (newN > oldN) newP = paint(newP, C.red);
|
||||||
else if (newN < oldN) {
|
else if (newN < oldN) {
|
||||||
newP = paint(newP, C.green);
|
newP = paint(newP, C.green);
|
||||||
const pct = salePctOff(oldRaw, newRaw);
|
const pct = salePctOff(oldRaw, newRaw);
|
||||||
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
||||||
} else newP = paint(newP, C.cyan);
|
} else newP = paint(newP, C.cyan);
|
||||||
} else newP = paint(newP, C.cyan);
|
} else newP = paint(newP, C.cyan);
|
||||||
|
|
||||||
ln(
|
ln(
|
||||||
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`
|
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`,
|
||||||
);
|
);
|
||||||
ln(` ${paint(u.url, C.dim)}`);
|
ln(` ${paint(u.url, C.dim)}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
ln("");
|
ln("");
|
||||||
}
|
}
|
||||||
|
|
||||||
ln(paint("======== END DIFF REPORT ========", C.bold));
|
ln(paint("======== END DIFF REPORT ========", C.bold));
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
|
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
|
||||||
|
|
||||||
if (!fromSha || !toSha) {
|
if (!fromSha || !toSha) {
|
||||||
console.error(`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`);
|
console.error(
|
||||||
process.exitCode = 2;
|
`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`,
|
||||||
return;
|
);
|
||||||
}
|
process.exitCode = 2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// If user provides short SHAs, git accepts them.
|
// If user provides short SHAs, git accepts them.
|
||||||
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
|
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
|
||||||
|
|
||||||
const filesA = gitListDbFiles(fromSha, dbDir);
|
const filesA = gitListDbFiles(fromSha, dbDir);
|
||||||
const filesB = gitListDbFiles(toSha, dbDir);
|
const filesB = gitListDbFiles(toSha, dbDir);
|
||||||
const files = new Set([...filesA, ...filesB]);
|
const files = new Set([...filesA, ...filesB]);
|
||||||
|
|
||||||
const diffReport = {
|
const diffReport = {
|
||||||
categories: [],
|
categories: [],
|
||||||
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
|
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
|
||||||
newItems: [],
|
newItems: [],
|
||||||
restoredItems: [],
|
restoredItems: [],
|
||||||
removedItems: [],
|
removedItems: [],
|
||||||
updatedItems: [],
|
updatedItems: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const file of [...files].sort()) {
|
for (const file of [...files].sort()) {
|
||||||
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
|
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
|
||||||
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
|
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
|
||||||
|
|
||||||
const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?");
|
const storeLabel = String(
|
||||||
const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file));
|
nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?",
|
||||||
const catLabelFull = `${storeLabel} | ${catLabel}`;
|
);
|
||||||
|
const catLabel = String(
|
||||||
|
nextObj?.categoryLabel ||
|
||||||
|
prevObj?.categoryLabel ||
|
||||||
|
nextObj?.category ||
|
||||||
|
prevObj?.category ||
|
||||||
|
path.basename(file),
|
||||||
|
);
|
||||||
|
const catLabelFull = `${storeLabel} | ${catLabel}`;
|
||||||
|
|
||||||
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
|
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
|
||||||
|
|
||||||
diffReport.categories.push({
|
diffReport.categories.push({
|
||||||
catLabel: catLabelFull,
|
catLabel: catLabelFull,
|
||||||
newCount: newItems.length,
|
newCount: newItems.length,
|
||||||
restoredCount: restoredItems.length,
|
restoredCount: restoredItems.length,
|
||||||
removedCount: removedItems.length,
|
removedCount: removedItems.length,
|
||||||
updatedCount: updatedItems.length,
|
updatedCount: updatedItems.length,
|
||||||
});
|
});
|
||||||
|
|
||||||
diffReport.totals.newCount += newItems.length;
|
diffReport.totals.newCount += newItems.length;
|
||||||
diffReport.totals.restoredCount += restoredItems.length;
|
diffReport.totals.restoredCount += restoredItems.length;
|
||||||
diffReport.totals.removedCount += removedItems.length;
|
diffReport.totals.removedCount += removedItems.length;
|
||||||
diffReport.totals.updatedCount += updatedItems.length;
|
diffReport.totals.updatedCount += updatedItems.length;
|
||||||
|
|
||||||
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
|
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
|
||||||
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
|
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
|
||||||
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
|
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
|
||||||
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
|
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
|
||||||
}
|
}
|
||||||
|
|
||||||
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
|
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
|
||||||
process.stdout.write(reportText);
|
process.stdout.write(reportText);
|
||||||
|
|
||||||
const outPath = outFile
|
const outPath = outFile ? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile)) : "";
|
||||||
? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile))
|
|
||||||
: "";
|
|
||||||
|
|
||||||
if (outPath) {
|
if (outPath) {
|
||||||
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||||
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
|
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
main().catch((e) => {
|
main().catch((e) => {
|
||||||
const msg = e && e.stack ? e.stack : String(e);
|
const msg = e && e.stack ? e.stack : String(e);
|
||||||
console.error(msg);
|
console.error(msg);
|
||||||
process.exitCode = 1;
|
process.exitCode = 1;
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -13,31 +13,31 @@ const includeLinked = process.argv.includes("--include-linked");
|
||||||
// load linked SKUs
|
// load linked SKUs
|
||||||
const linkedSkus = new Set();
|
const linkedSkus = new Set();
|
||||||
if (!includeLinked && fs.existsSync(LINKS_FILE)) {
|
if (!includeLinked && fs.existsSync(LINKS_FILE)) {
|
||||||
const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8"));
|
const { links } = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8"));
|
||||||
for (const { fromSku, toSku } of links) {
|
for (const { fromSku, toSku } of links) {
|
||||||
linkedSkus.add(String(fromSku));
|
linkedSkus.add(String(fromSku));
|
||||||
linkedSkus.add(String(toSku));
|
linkedSkus.add(String(toSku));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const file of fs.readdirSync(DB_DIR)) {
|
for (const file of fs.readdirSync(DB_DIR)) {
|
||||||
if (!file.endsWith(".json")) continue;
|
if (!file.endsWith(".json")) continue;
|
||||||
|
|
||||||
if (!includeKegNCork && file.startsWith("kegncork__")) continue;
|
if (!includeKegNCork && file.startsWith("kegncork__")) continue;
|
||||||
if (!includeCoop && file.startsWith("coop__")) continue;
|
if (!includeCoop && file.startsWith("coop__")) continue;
|
||||||
|
|
||||||
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
|
const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8"));
|
||||||
if (!Array.isArray(data.items)) continue;
|
if (!Array.isArray(data.items)) continue;
|
||||||
|
|
||||||
for (const { sku, url, removed } of data.items) {
|
for (const { sku, url, removed } of data.items) {
|
||||||
if (
|
if (
|
||||||
removed === false &&
|
removed === false &&
|
||||||
typeof sku === "string" &&
|
typeof sku === "string" &&
|
||||||
sku.startsWith("u:") &&
|
sku.startsWith("u:") &&
|
||||||
url &&
|
url &&
|
||||||
(includeLinked || !linkedSkus.has(sku))
|
(includeLinked || !linkedSkus.has(sku))
|
||||||
) {
|
) {
|
||||||
console.log(url);
|
console.log(url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -4,12 +4,12 @@ import path from "node:path";
|
||||||
import { execSync } from "node:child_process";
|
import { execSync } from "node:child_process";
|
||||||
|
|
||||||
function die(msg) {
|
function die(msg) {
|
||||||
console.error(msg);
|
console.error(msg);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
function sh(cmd) {
|
function sh(cmd) {
|
||||||
return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim();
|
return execSync(cmd, { stdio: "pipe", encoding: "utf8" }).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
const ISSUE_BODY = process.env.ISSUE_BODY || "";
|
const ISSUE_BODY = process.env.ISSUE_BODY || "";
|
||||||
|
|
@ -20,16 +20,14 @@ const REPO = process.env.REPO || "";
|
||||||
if (!ISSUE_NUMBER) die("Missing ISSUE_NUMBER");
|
if (!ISSUE_NUMBER) die("Missing ISSUE_NUMBER");
|
||||||
if (!REPO) die("Missing REPO");
|
if (!REPO) die("Missing REPO");
|
||||||
|
|
||||||
const m = ISSUE_BODY.match(
|
const m = ISSUE_BODY.match(/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/);
|
||||||
/<!--\s*stviz-sku-edits:BEGIN\s*-->\s*([\s\S]*?)\s*<!--\s*stviz-sku-edits:END\s*-->/
|
|
||||||
);
|
|
||||||
if (!m) die("No stviz payload found in issue body.");
|
if (!m) die("No stviz payload found in issue body.");
|
||||||
|
|
||||||
let payload;
|
let payload;
|
||||||
try {
|
try {
|
||||||
payload = JSON.parse(m[1]);
|
payload = JSON.parse(m[1]);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
die(`Invalid JSON payload: ${e?.message || e}`);
|
die(`Invalid JSON payload: ${e?.message || e}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (payload?.schema !== "stviz-sku-edits-v1") die("Unsupported payload schema.");
|
if (payload?.schema !== "stviz-sku-edits-v1") die("Unsupported payload schema.");
|
||||||
|
|
@ -38,259 +36,246 @@ const linksIn = Array.isArray(payload?.links) ? payload.links : [];
|
||||||
const ignoresIn = Array.isArray(payload?.ignores) ? payload.ignores : [];
|
const ignoresIn = Array.isArray(payload?.ignores) ? payload.ignores : [];
|
||||||
|
|
||||||
function normSku(s) {
|
function normSku(s) {
|
||||||
return String(s || "").trim();
|
return String(s || "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function linkKeyFrom(a, b) {
|
function linkKeyFrom(a, b) {
|
||||||
const x = normSku(a);
|
const x = normSku(a);
|
||||||
const y = normSku(b);
|
const y = normSku(b);
|
||||||
return x && y && x !== y ? `${x}→${y}` : "";
|
return x && y && x !== y ? `${x}→${y}` : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function linkKey(x) {
|
function linkKey(x) {
|
||||||
return linkKeyFrom(x?.fromSku, x?.toSku);
|
return linkKeyFrom(x?.fromSku, x?.toSku);
|
||||||
}
|
}
|
||||||
|
|
||||||
function pairKey(a, b) {
|
function pairKey(a, b) {
|
||||||
const x = normSku(a),
|
const x = normSku(a),
|
||||||
y = normSku(b);
|
y = normSku(b);
|
||||||
if (!x || !y || x === y) return "";
|
if (!x || !y || x === y) return "";
|
||||||
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Minimal, merge-friendly JSON array insertion ---------------- */
|
/* ---------------- Minimal, merge-friendly JSON array insertion ---------------- */
|
||||||
|
|
||||||
function findJsonArraySpan(src, propName) {
|
function findJsonArraySpan(src, propName) {
|
||||||
// Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent }
|
// Finds the [ ... ] span for `"propName": [ ... ]` and returns { start, end, open, close, fieldIndent }
|
||||||
const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m");
|
const re = new RegExp(`(^[ \\t]*)"${propName}"\\s*:\\s*\\[`, "m");
|
||||||
const mm = src.match(re);
|
const mm = src.match(re);
|
||||||
if (!mm) return null;
|
if (!mm) return null;
|
||||||
|
|
||||||
const fieldIndent = mm[1] || "";
|
const fieldIndent = mm[1] || "";
|
||||||
const at = mm.index || 0;
|
const at = mm.index || 0;
|
||||||
const open = src.indexOf("[", at);
|
const open = src.indexOf("[", at);
|
||||||
if (open < 0) return null;
|
if (open < 0) return null;
|
||||||
|
|
||||||
// scan to matching ']'
|
// scan to matching ']'
|
||||||
let i = open;
|
let i = open;
|
||||||
let depth = 0;
|
let depth = 0;
|
||||||
let inStr = false;
|
let inStr = false;
|
||||||
let esc = false;
|
let esc = false;
|
||||||
|
|
||||||
for (; i < src.length; i++) {
|
for (; i < src.length; i++) {
|
||||||
const ch = src[i];
|
const ch = src[i];
|
||||||
|
|
||||||
if (inStr) {
|
if (inStr) {
|
||||||
if (esc) {
|
if (esc) {
|
||||||
esc = false;
|
esc = false;
|
||||||
} else if (ch === "\\") {
|
} else if (ch === "\\") {
|
||||||
esc = true;
|
esc = true;
|
||||||
} else if (ch === '"') {
|
} else if (ch === '"') {
|
||||||
inStr = false;
|
inStr = false;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch === '"') {
|
if (ch === '"') {
|
||||||
inStr = true;
|
inStr = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch === "[") depth++;
|
if (ch === "[") depth++;
|
||||||
else if (ch === "]") {
|
else if (ch === "]") {
|
||||||
depth--;
|
depth--;
|
||||||
if (depth === 0) {
|
if (depth === 0) {
|
||||||
const close = i;
|
const close = i;
|
||||||
return { start: at, open, close, end: close + 1, fieldIndent };
|
return { start: at, open, close, end: close + 1, fieldIndent };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function splitArrayObjectBlocks(arrayInnerText) {
|
function splitArrayObjectBlocks(arrayInnerText) {
|
||||||
// arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas)
|
// arrayInnerText is text between '[' and ']' (can include whitespace/newlines/commas)
|
||||||
// returns raw blocks (each block is the exact text for a JSON object, preserving formatting)
|
// returns raw blocks (each block is the exact text for a JSON object, preserving formatting)
|
||||||
const blocks = [];
|
const blocks = [];
|
||||||
|
|
||||||
let i = 0;
|
let i = 0;
|
||||||
const s = arrayInnerText;
|
const s = arrayInnerText;
|
||||||
|
|
||||||
function skipWsAndCommas() {
|
function skipWsAndCommas() {
|
||||||
while (i < s.length) {
|
while (i < s.length) {
|
||||||
const ch = s[i];
|
const ch = s[i];
|
||||||
if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++;
|
if (ch === "," || ch === " " || ch === "\t" || ch === "\n" || ch === "\r") i++;
|
||||||
else break;
|
else break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
skipWsAndCommas();
|
skipWsAndCommas();
|
||||||
|
|
||||||
while (i < s.length) {
|
while (i < s.length) {
|
||||||
if (s[i] !== "{") {
|
if (s[i] !== "{") {
|
||||||
// if something unexpected, advance a bit
|
// if something unexpected, advance a bit
|
||||||
i++;
|
i++;
|
||||||
skipWsAndCommas();
|
skipWsAndCommas();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const start = i;
|
const start = i;
|
||||||
let depth = 0;
|
let depth = 0;
|
||||||
let inStr = false;
|
let inStr = false;
|
||||||
let esc = false;
|
let esc = false;
|
||||||
|
|
||||||
for (; i < s.length; i++) {
|
for (; i < s.length; i++) {
|
||||||
const ch = s[i];
|
const ch = s[i];
|
||||||
|
|
||||||
if (inStr) {
|
if (inStr) {
|
||||||
if (esc) {
|
if (esc) {
|
||||||
esc = false;
|
esc = false;
|
||||||
} else if (ch === "\\") {
|
} else if (ch === "\\") {
|
||||||
esc = true;
|
esc = true;
|
||||||
} else if (ch === '"') {
|
} else if (ch === '"') {
|
||||||
inStr = false;
|
inStr = false;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch === '"') {
|
if (ch === '"') {
|
||||||
inStr = true;
|
inStr = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch === "{") depth++;
|
if (ch === "{") depth++;
|
||||||
else if (ch === "}") {
|
else if (ch === "}") {
|
||||||
depth--;
|
depth--;
|
||||||
if (depth === 0) {
|
if (depth === 0) {
|
||||||
i++; // include '}'
|
i++; // include '}'
|
||||||
const raw = s.slice(start, i);
|
const raw = s.slice(start, i);
|
||||||
blocks.push(raw);
|
blocks.push(raw);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
skipWsAndCommas();
|
skipWsAndCommas();
|
||||||
}
|
}
|
||||||
|
|
||||||
return blocks;
|
return blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectItemIndent(arrayInnerText, fieldIndent) {
|
function detectItemIndent(arrayInnerText, fieldIndent) {
|
||||||
// Try to infer indentation for the '{' line inside the array.
|
// Try to infer indentation for the '{' line inside the array.
|
||||||
// If empty array, default to fieldIndent + 2 spaces.
|
// If empty array, default to fieldIndent + 2 spaces.
|
||||||
const m = arrayInnerText.match(/\n([ \t]*)\{/);
|
const m = arrayInnerText.match(/\n([ \t]*)\{/);
|
||||||
if (m) return m[1];
|
if (m) return m[1];
|
||||||
return fieldIndent + " ";
|
return fieldIndent + " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePrettyObjBlock(objIndent, obj) {
|
function makePrettyObjBlock(objIndent, obj) {
|
||||||
// Match JSON.stringify(..., 2) object formatting inside arrays
|
// Match JSON.stringify(..., 2) object formatting inside arrays
|
||||||
const a = objIndent;
|
const a = objIndent;
|
||||||
const b = objIndent + " ";
|
const b = objIndent + " ";
|
||||||
const fromSku = normSku(obj?.fromSku);
|
const fromSku = normSku(obj?.fromSku);
|
||||||
const toSku = normSku(obj?.toSku);
|
const toSku = normSku(obj?.toSku);
|
||||||
const skuA = normSku(obj?.skuA);
|
const skuA = normSku(obj?.skuA);
|
||||||
const skuB = normSku(obj?.skuB);
|
const skuB = normSku(obj?.skuB);
|
||||||
|
|
||||||
if (fromSku && toSku) {
|
if (fromSku && toSku) {
|
||||||
return (
|
return (
|
||||||
`${a}{\n` +
|
`${a}{\n` +
|
||||||
`${b}"fromSku": ${JSON.stringify(fromSku)},\n` +
|
`${b}"fromSku": ${JSON.stringify(fromSku)},\n` +
|
||||||
`${b}"toSku": ${JSON.stringify(toSku)}\n` +
|
`${b}"toSku": ${JSON.stringify(toSku)}\n` +
|
||||||
`${a}}`
|
`${a}}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (skuA && skuB) {
|
if (skuA && skuB) {
|
||||||
return (
|
return `${a}{\n` + `${b}"skuA": ${JSON.stringify(skuA)},\n` + `${b}"skuB": ${JSON.stringify(skuB)}\n` + `${a}}`;
|
||||||
`${a}{\n` +
|
}
|
||||||
`${b}"skuA": ${JSON.stringify(skuA)},\n` +
|
|
||||||
`${b}"skuB": ${JSON.stringify(skuB)}\n` +
|
|
||||||
`${a}}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return `${a}{}`;
|
return `${a}{}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function applyInsertionsToArrayText({
|
function applyInsertionsToArrayText({ src, propName, incoming, keyFn, normalizeFn }) {
|
||||||
src,
|
const span = findJsonArraySpan(src, propName);
|
||||||
propName,
|
if (!span) die(`Could not find "${propName}" array in ${filePath}`);
|
||||||
incoming,
|
|
||||||
keyFn,
|
|
||||||
normalizeFn,
|
|
||||||
}) {
|
|
||||||
const span = findJsonArraySpan(src, propName);
|
|
||||||
if (!span) die(`Could not find "${propName}" array in ${filePath}`);
|
|
||||||
|
|
||||||
const before = src.slice(0, span.open + 1); // includes '['
|
const before = src.slice(0, span.open + 1); // includes '['
|
||||||
const inner = src.slice(span.open + 1, span.close); // between [ and ]
|
const inner = src.slice(span.open + 1, span.close); // between [ and ]
|
||||||
const after = src.slice(span.close); // starts with ']'
|
const after = src.slice(span.close); // starts with ']'
|
||||||
|
|
||||||
const itemIndent = detectItemIndent(inner, span.fieldIndent);
|
const itemIndent = detectItemIndent(inner, span.fieldIndent);
|
||||||
|
|
||||||
// Parse existing objects to build a dedupe set (does NOT modify inner text)
|
// Parse existing objects to build a dedupe set (does NOT modify inner text)
|
||||||
const rawBlocks = splitArrayObjectBlocks(inner);
|
const rawBlocks = splitArrayObjectBlocks(inner);
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
for (const raw of rawBlocks) {
|
for (const raw of rawBlocks) {
|
||||||
try {
|
try {
|
||||||
const obj = JSON.parse(raw);
|
const obj = JSON.parse(raw);
|
||||||
const k = keyFn(obj);
|
const k = keyFn(obj);
|
||||||
if (k) seen.add(k);
|
if (k) seen.add(k);
|
||||||
} catch {
|
} catch {
|
||||||
// ignore unparsable blocks for dedupe purposes
|
// ignore unparsable blocks for dedupe purposes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const toAdd = [];
|
const toAdd = [];
|
||||||
for (const x of incoming) {
|
for (const x of incoming) {
|
||||||
const nx = normalizeFn(x);
|
const nx = normalizeFn(x);
|
||||||
const k = keyFn(nx);
|
const k = keyFn(nx);
|
||||||
if (!k || seen.has(k)) continue;
|
if (!k || seen.has(k)) continue;
|
||||||
seen.add(k);
|
seen.add(k);
|
||||||
toAdd.push(nx);
|
toAdd.push(nx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!toAdd.length) return src;
|
if (!toAdd.length) return src;
|
||||||
|
|
||||||
// Deterministic order for new items only (doesn't reorder existing)
|
// Deterministic order for new items only (doesn't reorder existing)
|
||||||
const addBlocks = toAdd
|
const addBlocks = toAdd
|
||||||
.map((obj) => ({ obj, key: keyFn(obj) }))
|
.map((obj) => ({ obj, key: keyFn(obj) }))
|
||||||
.sort((a, b) => String(a.key).localeCompare(String(b.key)))
|
.sort((a, b) => String(a.key).localeCompare(String(b.key)))
|
||||||
.map((x) => makePrettyObjBlock(itemIndent, x.obj));
|
.map((x) => makePrettyObjBlock(itemIndent, x.obj));
|
||||||
|
|
||||||
const wasInlineEmpty = /^\s*$/.test(inner);
|
const wasInlineEmpty = /^\s*$/.test(inner);
|
||||||
|
|
||||||
let newInner;
|
let newInner;
|
||||||
if (wasInlineEmpty) {
|
if (wasInlineEmpty) {
|
||||||
// "links": [] -> pretty multiline
|
// "links": [] -> pretty multiline
|
||||||
newInner =
|
newInner = "\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent;
|
||||||
"\n" + addBlocks.join(",\n") + "\n" + span.fieldIndent;
|
} else {
|
||||||
} else {
|
// Keep existing whitespace EXACTLY; append before trailing whitespace
|
||||||
// Keep existing whitespace EXACTLY; append before trailing whitespace
|
const m = inner.match(/\s*$/);
|
||||||
const m = inner.match(/\s*$/);
|
const tail = m ? m[0] : "";
|
||||||
const tail = m ? m[0] : "";
|
const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
|
||||||
const body = inner.slice(0, inner.length - tail.length).replace(/\s*$/, ""); // end at last non-ws
|
|
||||||
|
|
||||||
newInner = body + ",\n" + addBlocks.join(",\n") + tail;
|
newInner = body + ",\n" + addBlocks.join(",\n") + tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
return before + newInner + after;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
return before + newInner + after;
|
||||||
|
}
|
||||||
|
|
||||||
/* ---------------- Apply edits ---------------- */
|
/* ---------------- Apply edits ---------------- */
|
||||||
|
|
||||||
const filePath = path.join("data", "sku_links.json");
|
const filePath = path.join("data", "sku_links.json");
|
||||||
|
|
||||||
function ensureFileExists() {
|
function ensureFileExists() {
|
||||||
if (fs.existsSync(filePath)) return;
|
if (fs.existsSync(filePath)) return;
|
||||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||||
// Create with stable formatting; generatedAt intentionally blank (we do not mutate it later)
|
// Create with stable formatting; generatedAt intentionally blank (we do not mutate it later)
|
||||||
const seed = { generatedAt: "", links: [], ignores: [] };
|
const seed = { generatedAt: "", links: [], ignores: [] };
|
||||||
fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8");
|
fs.writeFileSync(filePath, JSON.stringify(seed, null, 2) + "\n", "utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
ensureFileExists();
|
ensureFileExists();
|
||||||
|
|
@ -301,42 +286,42 @@ let text = fs.readFileSync(filePath, "utf8");
|
||||||
// Also: do NOT re-stringify entire JSON; we only surgically insert into arrays.
|
// Also: do NOT re-stringify entire JSON; we only surgically insert into arrays.
|
||||||
|
|
||||||
const normLinksIn = linksIn.map((x) => ({
|
const normLinksIn = linksIn.map((x) => ({
|
||||||
fromSku: normSku(x?.fromSku),
|
fromSku: normSku(x?.fromSku),
|
||||||
toSku: normSku(x?.toSku),
|
toSku: normSku(x?.toSku),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const normIgnoresIn = ignoresIn.map((x) => {
|
const normIgnoresIn = ignoresIn.map((x) => {
|
||||||
const a = normSku(x?.skuA);
|
const a = normSku(x?.skuA);
|
||||||
const b = normSku(x?.skuB);
|
const b = normSku(x?.skuB);
|
||||||
const k = pairKey(a, b);
|
const k = pairKey(a, b);
|
||||||
if (!k) return { skuA: "", skuB: "" };
|
if (!k) return { skuA: "", skuB: "" };
|
||||||
const [p, q] = k.split("|");
|
const [p, q] = k.split("|");
|
||||||
return { skuA: p, skuB: q };
|
return { skuA: p, skuB: q };
|
||||||
});
|
});
|
||||||
|
|
||||||
// Insert links (sorted by from→to)
|
// Insert links (sorted by from→to)
|
||||||
text = applyInsertionsToArrayText({
|
text = applyInsertionsToArrayText({
|
||||||
src: text,
|
src: text,
|
||||||
propName: "links",
|
propName: "links",
|
||||||
incoming: normLinksIn,
|
incoming: normLinksIn,
|
||||||
keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku),
|
keyFn: (o) => linkKeyFrom(o?.fromSku, o?.toSku),
|
||||||
normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }),
|
normalizeFn: (o) => ({ fromSku: normSku(o?.fromSku), toSku: normSku(o?.toSku) }),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Insert ignores (sorted by canonical pair)
|
// Insert ignores (sorted by canonical pair)
|
||||||
text = applyInsertionsToArrayText({
|
text = applyInsertionsToArrayText({
|
||||||
src: text,
|
src: text,
|
||||||
propName: "ignores",
|
propName: "ignores",
|
||||||
incoming: normIgnoresIn,
|
incoming: normIgnoresIn,
|
||||||
keyFn: (o) => pairKey(o?.skuA, o?.skuB),
|
keyFn: (o) => pairKey(o?.skuA, o?.skuB),
|
||||||
normalizeFn: (o) => {
|
normalizeFn: (o) => {
|
||||||
const a = normSku(o?.skuA);
|
const a = normSku(o?.skuA);
|
||||||
const b = normSku(o?.skuB);
|
const b = normSku(o?.skuB);
|
||||||
const k = pairKey(a, b);
|
const k = pairKey(a, b);
|
||||||
if (!k) return { skuA: "", skuB: "" };
|
if (!k) return { skuA: "", skuB: "" };
|
||||||
const [p, q] = k.split("|");
|
const [p, q] = k.split("|");
|
||||||
return { skuA: p, skuB: q };
|
return { skuA: p, skuB: q };
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
fs.writeFileSync(filePath, text, "utf8");
|
fs.writeFileSync(filePath, text, "utf8");
|
||||||
|
|
@ -345,10 +330,10 @@ fs.writeFileSync(filePath, text, "utf8");
|
||||||
|
|
||||||
// Ensure git identity is set for commit (Actions runners often lack it)
|
// Ensure git identity is set for commit (Actions runners often lack it)
|
||||||
try {
|
try {
|
||||||
sh(`git config user.name "github-actions[bot]"`);
|
sh(`git config user.name "github-actions[bot]"`);
|
||||||
sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`);
|
sh(`git config user.email "41898282+github-actions[bot]@users.noreply.github.com"`);
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
|
||||||
const ts = new Date().toISOString().replace(/[:.]/g, "-");
|
const ts = new Date().toISOString().replace(/[:.]/g, "-");
|
||||||
|
|
@ -360,8 +345,8 @@ sh(`git add "${filePath}"`);
|
||||||
// If no diffs (all edits were duplicates), don't create PR or close issue.
|
// If no diffs (all edits were duplicates), don't create PR or close issue.
|
||||||
const diff = sh(`git status --porcelain "${filePath}"`);
|
const diff = sh(`git status --porcelain "${filePath}"`);
|
||||||
if (!diff) {
|
if (!diff) {
|
||||||
console.log("No changes to commit (all edits already present). Leaving issue open.");
|
console.log("No changes to commit (all edits already present). Leaving issue open.");
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
sh(`git commit -m "stviz: apply sku edits (issue #${ISSUE_NUMBER})"`);
|
sh(`git commit -m "stviz: apply sku edits (issue #${ISSUE_NUMBER})"`);
|
||||||
|
|
@ -371,21 +356,20 @@ const prTitle = `STVIZ: SKU link updates (issue #${ISSUE_NUMBER})`;
|
||||||
const prBody = `Automated PR created from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}`;
|
const prBody = `Automated PR created from issue #${ISSUE_NUMBER}: ${ISSUE_TITLE}`;
|
||||||
|
|
||||||
function extractPrUrl(out) {
|
function extractPrUrl(out) {
|
||||||
// gh pr create usually prints the PR URL to stdout; be robust in case extra text appears.
|
// gh pr create usually prints the PR URL to stdout; be robust in case extra text appears.
|
||||||
const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/);
|
const m = String(out || "").match(/https?:\/\/\S+\/pull\/\d+\S*/);
|
||||||
if (!m) die(`Could not find PR URL in gh output:\n${out}`);
|
if (!m) die(`Could not find PR URL in gh output:\n${out}`);
|
||||||
return m[0];
|
return m[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create PR and capture URL/number without relying on unsupported flags
|
// Create PR and capture URL/number without relying on unsupported flags
|
||||||
const prCreateOut = sh(
|
const prCreateOut = sh(
|
||||||
`gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"`
|
`gh -R "${REPO}" pr create --base data --head "${branch}" --title "${prTitle}" --body "${prBody}"`,
|
||||||
);
|
);
|
||||||
const prUrl = extractPrUrl(prCreateOut);
|
const prUrl = extractPrUrl(prCreateOut);
|
||||||
|
|
||||||
|
|
||||||
const prNumber = sh(`gh -R "${REPO}" pr view "${prUrl}" --json number --jq .number`);
|
const prNumber = sh(`gh -R "${REPO}" pr view "${prUrl}" --json number --jq .number`);
|
||||||
|
|
||||||
sh(
|
sh(
|
||||||
`gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"`
|
`gh -R "${REPO}" issue close "${ISSUE_NUMBER}" -c "Processed by STVIZ automation. Opened PR #${prNumber}: ${prUrl}"`,
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
const { main } = require("./src/main");
|
const { main } = require("./src/main");
|
||||||
|
|
||||||
main().catch((e) => {
|
main().catch((e) => {
|
||||||
const msg = e && e.stack ? e.stack : String(e);
|
const msg = e && e.stack ? e.stack : String(e);
|
||||||
console.error(msg);
|
console.error(msg);
|
||||||
process.exitCode = 1;
|
process.exitCode = 1;
|
||||||
});
|
});
|
||||||
|
|
|
||||||
144
viz/app/api.js
144
viz/app/api.js
|
|
@ -1,62 +1,64 @@
|
||||||
export async function fetchJson(url) {
|
export async function fetchJson(url) {
|
||||||
const res = await fetch(url, { cache: "no-store" });
|
const res = await fetch(url, { cache: "no-store" });
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
||||||
return await res.json();
|
return await res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function fetchText(url) {
|
export async function fetchText(url) {
|
||||||
const res = await fetch(url, { cache: "no-store" });
|
const res = await fetch(url, { cache: "no-store" });
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
||||||
return await res.text();
|
return await res.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
export function inferGithubOwnerRepo() {
|
export function inferGithubOwnerRepo() {
|
||||||
const host = location.hostname || "";
|
const host = location.hostname || "";
|
||||||
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
|
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
|
||||||
if (m) {
|
if (m) {
|
||||||
const owner = m[1];
|
const owner = m[1];
|
||||||
const parts = (location.pathname || "/").split("/").filter(Boolean);
|
const parts = (location.pathname || "/").split("/").filter(Boolean);
|
||||||
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
|
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
|
||||||
return { owner, repo };
|
return { owner, repo };
|
||||||
}
|
}
|
||||||
return { owner: "brennanwilkes", repo: "spirit-tracker" };
|
return { owner: "brennanwilkes", repo: "spirit-tracker" };
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isLocalWriteMode() {
|
export function isLocalWriteMode() {
|
||||||
const h = String(location.hostname || "").toLowerCase();
|
const h = String(location.hostname || "").toLowerCase();
|
||||||
return (location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost");
|
return (
|
||||||
|
(location.protocol === "http:" || location.protocol === "https:") && (h === "127.0.0.1" || h === "localhost")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---- Local disk-backed SKU link API (only on viz/serve.js) ---- */
|
/* ---- Local disk-backed SKU link API (only on viz/serve.js) ---- */
|
||||||
|
|
||||||
export async function apiReadSkuMetaFromLocalServer() {
|
export async function apiReadSkuMetaFromLocalServer() {
|
||||||
const r = await fetch("/__stviz/sku-links", { cache: "no-store" });
|
const r = await fetch("/__stviz/sku-links", { cache: "no-store" });
|
||||||
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
||||||
const j = await r.json();
|
const j = await r.json();
|
||||||
return {
|
return {
|
||||||
links: Array.isArray(j?.links) ? j.links : [],
|
links: Array.isArray(j?.links) ? j.links : [],
|
||||||
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function apiWriteSkuLink(fromSku, toSku) {
|
export async function apiWriteSkuLink(fromSku, toSku) {
|
||||||
const res = await fetch("/__stviz/sku-links", {
|
const res = await fetch("/__stviz/sku-links", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "content-type": "application/json" },
|
headers: { "content-type": "application/json" },
|
||||||
body: JSON.stringify({ fromSku, toSku }),
|
body: JSON.stringify({ fromSku, toSku }),
|
||||||
});
|
});
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||||
return await res.json();
|
return await res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function apiWriteSkuIgnore(skuA, skuB) {
|
export async function apiWriteSkuIgnore(skuA, skuB) {
|
||||||
const res = await fetch("/__stviz/sku-ignores", {
|
const res = await fetch("/__stviz/sku-ignores", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "content-type": "application/json" },
|
headers: { "content-type": "application/json" },
|
||||||
body: JSON.stringify({ skuA, skuB }),
|
body: JSON.stringify({ skuA, skuB }),
|
||||||
});
|
});
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||||
return await res.json();
|
return await res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -65,50 +67,50 @@ export async function apiWriteSkuIgnore(skuA, skuB) {
|
||||||
* - On local server: reads via /__stviz/sku-links (disk)
|
* - On local server: reads via /__stviz/sku-links (disk)
|
||||||
*/
|
*/
|
||||||
export async function loadSkuMetaBestEffort() {
|
export async function loadSkuMetaBestEffort() {
|
||||||
// 1) GitHub Pages / static deploy inside viz/
|
// 1) GitHub Pages / static deploy inside viz/
|
||||||
try {
|
try {
|
||||||
const j = await fetchJson("./data/sku_links.json");
|
const j = await fetchJson("./data/sku_links.json");
|
||||||
return {
|
return {
|
||||||
links: Array.isArray(j?.links) ? j.links : [],
|
links: Array.isArray(j?.links) ? j.links : [],
|
||||||
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
||||||
};
|
};
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
// 2) alternate static path (in case you later serve viz under a subpath)
|
// 2) alternate static path (in case you later serve viz under a subpath)
|
||||||
try {
|
try {
|
||||||
const j = await fetchJson("/data/sku_links.json");
|
const j = await fetchJson("/data/sku_links.json");
|
||||||
return {
|
return {
|
||||||
links: Array.isArray(j?.links) ? j.links : [],
|
links: Array.isArray(j?.links) ? j.links : [],
|
||||||
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
ignores: Array.isArray(j?.ignores) ? j.ignores : [],
|
||||||
};
|
};
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
// 3) Local server API (disk)
|
// 3) Local server API (disk)
|
||||||
try {
|
try {
|
||||||
return await apiReadSkuMetaFromLocalServer();
|
return await apiReadSkuMetaFromLocalServer();
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
return { links: [], ignores: [] };
|
return { links: [], ignores: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---- GitHub history helpers ---- */
|
/* ---- GitHub history helpers ---- */
|
||||||
|
|
||||||
export async function githubListCommits({ owner, repo, branch, path }) {
|
export async function githubListCommits({ owner, repo, branch, path }) {
|
||||||
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
|
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
|
||||||
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
|
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
|
||||||
const page1 = await fetchJson(u1);
|
const page1 = await fetchJson(u1);
|
||||||
|
|
||||||
if (Array.isArray(page1) && page1.length === 100) {
|
if (Array.isArray(page1) && page1.length === 100) {
|
||||||
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
|
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
|
||||||
const page2 = await fetchJson(u2);
|
const page2 = await fetchJson(u2);
|
||||||
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
|
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
|
||||||
}
|
}
|
||||||
|
|
||||||
return Array.isArray(page1) ? page1 : [];
|
return Array.isArray(page1) ? page1 : [];
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function githubFetchFileAtSha({ owner, repo, sha, path }) {
|
export async function githubFetchFileAtSha({ owner, repo, sha, path }) {
|
||||||
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`;
|
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(sha)}/${path}`;
|
||||||
const txt = await fetchText(raw);
|
const txt = await fetchText(raw);
|
||||||
return JSON.parse(txt);
|
return JSON.parse(txt);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,106 +3,106 @@ import { parsePriceToNumber, keySkuForRow, normSearchText } from "./sku.js";
|
||||||
|
|
||||||
// Build one row per *canonical* SKU (after applying sku map) + combined searchable text
|
// Build one row per *canonical* SKU (after applying sku map) + combined searchable text
|
||||||
export function aggregateBySku(listings, canonicalizeSkuFn) {
|
export function aggregateBySku(listings, canonicalizeSkuFn) {
|
||||||
const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x;
|
const canon = typeof canonicalizeSkuFn === "function" ? canonicalizeSkuFn : (x) => x;
|
||||||
|
|
||||||
const bySku = new Map();
|
const bySku = new Map();
|
||||||
|
|
||||||
for (const r of listings) {
|
for (const r of listings) {
|
||||||
const rawSku = keySkuForRow(r);
|
const rawSku = keySkuForRow(r);
|
||||||
const sku = canon(rawSku);
|
const sku = canon(rawSku);
|
||||||
|
|
||||||
const name = String(r?.name || "");
|
const name = String(r?.name || "");
|
||||||
const url = String(r?.url || "");
|
const url = String(r?.url || "");
|
||||||
const storeLabel = String(r?.storeLabel || r?.store || "");
|
const storeLabel = String(r?.storeLabel || r?.store || "");
|
||||||
const removed = Boolean(r?.removed);
|
const removed = Boolean(r?.removed);
|
||||||
|
|
||||||
const img = normImg(r?.img || r?.image || r?.thumb || "");
|
const img = normImg(r?.img || r?.image || r?.thumb || "");
|
||||||
|
|
||||||
const pNum = parsePriceToNumber(r?.price);
|
const pNum = parsePriceToNumber(r?.price);
|
||||||
const pStr = String(r?.price || "");
|
const pStr = String(r?.price || "");
|
||||||
|
|
||||||
let agg = bySku.get(sku);
|
let agg = bySku.get(sku);
|
||||||
if (!agg) {
|
if (!agg) {
|
||||||
agg = {
|
agg = {
|
||||||
sku, // canonical sku
|
sku, // canonical sku
|
||||||
name: name || "",
|
name: name || "",
|
||||||
img: "",
|
img: "",
|
||||||
cheapestPriceStr: pStr || "",
|
cheapestPriceStr: pStr || "",
|
||||||
cheapestPriceNum: pNum,
|
cheapestPriceNum: pNum,
|
||||||
cheapestStoreLabel: storeLabel || "",
|
cheapestStoreLabel: storeLabel || "",
|
||||||
stores: new Set(), // LIVE stores only
|
stores: new Set(), // LIVE stores only
|
||||||
storesEver: new Set(), // live + removed presence (history)
|
storesEver: new Set(), // live + removed presence (history)
|
||||||
sampleUrl: url || "",
|
sampleUrl: url || "",
|
||||||
_searchParts: [],
|
_searchParts: [],
|
||||||
searchText: "",
|
searchText: "",
|
||||||
|
|
||||||
_imgByName: new Map(),
|
_imgByName: new Map(),
|
||||||
_imgAny: "",
|
_imgAny: "",
|
||||||
};
|
};
|
||||||
bySku.set(sku, agg);
|
bySku.set(sku, agg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (storeLabel) {
|
if (storeLabel) {
|
||||||
agg.storesEver.add(storeLabel);
|
agg.storesEver.add(storeLabel);
|
||||||
if (!removed) agg.stores.add(storeLabel);
|
if (!removed) agg.stores.add(storeLabel);
|
||||||
}
|
}
|
||||||
if (!agg.sampleUrl && url) agg.sampleUrl = url;
|
if (!agg.sampleUrl && url) agg.sampleUrl = url;
|
||||||
|
|
||||||
// Keep first non-empty name, but keep thumbnail aligned to chosen name
|
// Keep first non-empty name, but keep thumbnail aligned to chosen name
|
||||||
if (!agg.name && name) {
|
if (!agg.name && name) {
|
||||||
agg.name = name;
|
agg.name = name;
|
||||||
if (img) agg.img = img;
|
if (img) agg.img = img;
|
||||||
} else if (agg.name && name === agg.name && img && !agg.img) {
|
} else if (agg.name && name === agg.name && img && !agg.img) {
|
||||||
agg.img = img;
|
agg.img = img;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (img) {
|
if (img) {
|
||||||
if (!agg._imgAny) agg._imgAny = img;
|
if (!agg._imgAny) agg._imgAny = img;
|
||||||
if (name) agg._imgByName.set(name, img);
|
if (name) agg._imgByName.set(name, img);
|
||||||
}
|
}
|
||||||
|
|
||||||
// cheapest across LIVE rows only (so removed history doesn't "win")
|
// cheapest across LIVE rows only (so removed history doesn't "win")
|
||||||
if (!removed && pNum !== null) {
|
if (!removed && pNum !== null) {
|
||||||
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
|
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
|
||||||
agg.cheapestPriceNum = pNum;
|
agg.cheapestPriceNum = pNum;
|
||||||
agg.cheapestPriceStr = pStr || "";
|
agg.cheapestPriceStr = pStr || "";
|
||||||
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
|
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// search parts: include canonical + raw sku so searching either works
|
// search parts: include canonical + raw sku so searching either works
|
||||||
agg._searchParts.push(sku);
|
agg._searchParts.push(sku);
|
||||||
if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku);
|
if (rawSku && rawSku !== sku) agg._searchParts.push(rawSku);
|
||||||
if (name) agg._searchParts.push(name);
|
if (name) agg._searchParts.push(name);
|
||||||
if (url) agg._searchParts.push(url);
|
if (url) agg._searchParts.push(url);
|
||||||
if (storeLabel) agg._searchParts.push(storeLabel);
|
if (storeLabel) agg._searchParts.push(storeLabel);
|
||||||
if (removed) agg._searchParts.push("removed");
|
if (removed) agg._searchParts.push("removed");
|
||||||
}
|
}
|
||||||
|
|
||||||
const out = [...bySku.values()];
|
const out = [...bySku.values()];
|
||||||
|
|
||||||
for (const it of out) {
|
for (const it of out) {
|
||||||
if (!it.img) {
|
if (!it.img) {
|
||||||
const m = it._imgByName;
|
const m = it._imgByName;
|
||||||
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
|
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
|
||||||
else it.img = it._imgAny || "";
|
else it.img = it._imgAny || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
delete it._imgByName;
|
delete it._imgByName;
|
||||||
delete it._imgAny;
|
delete it._imgAny;
|
||||||
|
|
||||||
it.storeCount = it.stores.size;
|
it.storeCount = it.stores.size;
|
||||||
it.storeCountEver = it.storesEver.size;
|
it.storeCountEver = it.storesEver.size;
|
||||||
it.removedEverywhere = it.storeCount === 0;
|
it.removedEverywhere = it.storeCount === 0;
|
||||||
|
|
||||||
it._searchParts.push(it.sku);
|
it._searchParts.push(it.sku);
|
||||||
it._searchParts.push(it.name || "");
|
it._searchParts.push(it.name || "");
|
||||||
it._searchParts.push(it.sampleUrl || "");
|
it._searchParts.push(it.sampleUrl || "");
|
||||||
it._searchParts.push(it.cheapestStoreLabel || "");
|
it._searchParts.push(it.cheapestStoreLabel || "");
|
||||||
it.searchText = normSearchText(it._searchParts.join(" | "));
|
it.searchText = normSearchText(it._searchParts.join(" | "));
|
||||||
delete it._searchParts;
|
delete it._searchParts;
|
||||||
}
|
}
|
||||||
|
|
||||||
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
|
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,59 +1,61 @@
|
||||||
export function esc(s) {
|
export function esc(s) {
|
||||||
return String(s ?? "").replace(/[&<>"']/g, (c) => ({ "&": "&", "<": "<", ">": ">", '"': """, "'": "'" }[c]));
|
return String(s ?? "").replace(
|
||||||
}
|
/[&<>"']/g,
|
||||||
|
(c) => ({ "&": "&", "<": "<", ">": ">", '"': """, "'": "'" })[c],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export function normImg(s) {
|
export function normImg(s) {
|
||||||
const v = String(s || "").trim();
|
const v = String(s || "").trim();
|
||||||
if (!v) return "";
|
if (!v) return "";
|
||||||
if (/^data:/i.test(v)) return "";
|
if (/^data:/i.test(v)) return "";
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function dateOnly(iso) {
|
export function dateOnly(iso) {
|
||||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
export function prettyTs(iso) {
|
export function prettyTs(iso) {
|
||||||
const s = String(iso || "");
|
const s = String(iso || "");
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
|
|
||||||
const d0 = new Date(s);
|
const d0 = new Date(s);
|
||||||
const t0 = d0.getTime();
|
const t0 = d0.getTime();
|
||||||
if (!Number.isFinite(t0)) return "";
|
if (!Number.isFinite(t0)) return "";
|
||||||
|
|
||||||
// Round to nearest hour
|
// Round to nearest hour
|
||||||
const d = new Date(Math.round(t0 / 3600000) * 3600000);
|
const d = new Date(Math.round(t0 / 3600000) * 3600000);
|
||||||
|
|
||||||
const parts = new Intl.DateTimeFormat("en-US", {
|
const parts = new Intl.DateTimeFormat("en-US", {
|
||||||
timeZone: "America/Vancouver",
|
timeZone: "America/Vancouver",
|
||||||
month: "long",
|
month: "long",
|
||||||
day: "numeric",
|
day: "numeric",
|
||||||
hour: "numeric",
|
hour: "numeric",
|
||||||
minute: "2-digit",
|
minute: "2-digit",
|
||||||
hour12: true,
|
hour12: true,
|
||||||
}).formatToParts(d);
|
}).formatToParts(d);
|
||||||
|
|
||||||
let month = "";
|
let month = "";
|
||||||
let day = "";
|
let day = "";
|
||||||
let hour = "";
|
let hour = "";
|
||||||
let minute = "";
|
let minute = "";
|
||||||
let dayPeriod = "";
|
let dayPeriod = "";
|
||||||
|
|
||||||
for (const p of parts) {
|
for (const p of parts) {
|
||||||
if (p.type === "month") month = p.value;
|
if (p.type === "month") month = p.value;
|
||||||
else if (p.type === "day") day = p.value;
|
else if (p.type === "day") day = p.value;
|
||||||
else if (p.type === "hour") hour = p.value;
|
else if (p.type === "hour") hour = p.value;
|
||||||
else if (p.type === "minute") minute = p.value;
|
else if (p.type === "minute") minute = p.value;
|
||||||
else if (p.type === "dayPeriod") dayPeriod = p.value;
|
else if (p.type === "dayPeriod") dayPeriod = p.value;
|
||||||
}
|
}
|
||||||
|
|
||||||
return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`;
|
return `${month} ${day} ${hour}:${minute}${String(dayPeriod || "").toLowerCase()}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function renderThumbHtml(imgUrl, cls = "thumb") {
|
|
||||||
const img = normImg(imgUrl);
|
|
||||||
if (!img) return `<div class="thumbPlaceholder"></div>`;
|
|
||||||
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
export function renderThumbHtml(imgUrl, cls = "thumb") {
|
||||||
|
const img = normImg(imgUrl);
|
||||||
|
if (!img) return `<div class="thumbPlaceholder"></div>`;
|
||||||
|
return `<img referrerpolicy="no-referrer" class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
|
||||||
|
}
|
||||||
|
|
|
||||||
1854
viz/app/item_page.js
1854
viz/app/item_page.js
File diff suppressed because it is too large
Load diff
|
|
@ -2,91 +2,88 @@
|
||||||
import { keySkuForRow } from "../sku.js";
|
import { keySkuForRow } from "../sku.js";
|
||||||
|
|
||||||
function isRealSkuKey(skuKey) {
|
function isRealSkuKey(skuKey) {
|
||||||
const s = String(skuKey || "").trim();
|
const s = String(skuKey || "").trim();
|
||||||
return /^\d{6}$/.test(s);
|
return /^\d{6}$/.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isSoftSkuKey(k) {
|
function isSoftSkuKey(k) {
|
||||||
const s = String(k || "");
|
const s = String(k || "");
|
||||||
return s.startsWith("upc:") || s.startsWith("id:");
|
return s.startsWith("upc:") || s.startsWith("id:");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isUnknownSkuKey2(k) {
|
function isUnknownSkuKey2(k) {
|
||||||
return String(k || "").trim().startsWith("u:");
|
return String(k || "")
|
||||||
|
.trim()
|
||||||
|
.startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isBCStoreLabel(label) {
|
function isBCStoreLabel(label) {
|
||||||
const s = String(label || "").toLowerCase();
|
const s = String(label || "").toLowerCase();
|
||||||
return (
|
return (
|
||||||
s.includes("bcl") ||
|
s.includes("bcl") ||
|
||||||
s.includes("strath") ||
|
s.includes("strath") ||
|
||||||
s.includes("gull") ||
|
s.includes("gull") ||
|
||||||
s.includes("legacy") ||
|
s.includes("legacy") ||
|
||||||
s.includes("tudor") ||
|
s.includes("tudor") ||
|
||||||
s.includes("vessel") ||
|
s.includes("vessel") ||
|
||||||
s.includes("arc") ||
|
s.includes("arc") ||
|
||||||
s.includes("vintagespirits")
|
s.includes("vintagespirits")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function skuIsBC(allRows, skuKey) {
|
function skuIsBC(allRows, skuKey) {
|
||||||
for (const r of allRows) {
|
for (const r of allRows) {
|
||||||
if (keySkuForRow(r) !== skuKey) continue;
|
if (keySkuForRow(r) !== skuKey) continue;
|
||||||
const lab = String(r.storeLabel || r.store || "");
|
const lab = String(r.storeLabel || r.store || "");
|
||||||
if (isBCStoreLabel(lab)) return true;
|
if (isBCStoreLabel(lab)) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isABStoreLabel(label) {
|
function isABStoreLabel(label) {
|
||||||
const s = String(label || "").toLowerCase();
|
const s = String(label || "").toLowerCase();
|
||||||
return (
|
return s.includes("alberta") || s.includes("calgary") || s.includes("edmonton") || /\bab\b/.test(s);
|
||||||
s.includes("alberta") ||
|
|
||||||
s.includes("calgary") ||
|
|
||||||
s.includes("edmonton") ||
|
|
||||||
/\bab\b/.test(s)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function skuIsAB(allRows, skuKey) {
|
function skuIsAB(allRows, skuKey) {
|
||||||
for (const r of allRows) {
|
for (const r of allRows) {
|
||||||
if (keySkuForRow(r) !== skuKey) continue;
|
if (keySkuForRow(r) !== skuKey) continue;
|
||||||
const lab = String(r.storeLabel || r.store || "");
|
const lab = String(r.storeLabel || r.store || "");
|
||||||
if (isABStoreLabel(lab)) return true;
|
if (isABStoreLabel(lab)) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scoreCanonical(allRows, skuKey) {
|
function scoreCanonical(allRows, skuKey) {
|
||||||
const s = String(skuKey || "");
|
const s = String(skuKey || "");
|
||||||
const real = isRealSkuKey(s) ? 1 : 0;
|
const real = isRealSkuKey(s) ? 1 : 0;
|
||||||
const ab = skuIsAB(allRows, s) ? 1 : 0;
|
const ab = skuIsAB(allRows, s) ? 1 : 0;
|
||||||
const bc = skuIsBC(allRows, s) ? 1 : 0;
|
const bc = skuIsBC(allRows, s) ? 1 : 0;
|
||||||
const soft = isSoftSkuKey(s) ? 1 : 0;
|
const soft = isSoftSkuKey(s) ? 1 : 0;
|
||||||
const unk = isUnknownSkuKey2(s) ? 1 : 0;
|
const unk = isUnknownSkuKey2(s) ? 1 : 0;
|
||||||
|
|
||||||
let base = 0;
|
let base = 0;
|
||||||
if (real) base = 1000;
|
if (real) base = 1000;
|
||||||
else if (soft) base = 200;
|
else if (soft) base = 200;
|
||||||
else if (!unk) base = 100;
|
else if (!unk) base = 100;
|
||||||
else base = -1000;
|
else base = -1000;
|
||||||
|
|
||||||
return base + ab * 25 - bc * 10;
|
return base + ab * 25 - bc * 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function pickPreferredCanonical(allRows, skuKeys) {
|
export function pickPreferredCanonical(allRows, skuKeys) {
|
||||||
let best = "";
|
let best = "";
|
||||||
let bestScore = -Infinity;
|
let bestScore = -Infinity;
|
||||||
for (const k of skuKeys) {
|
for (const k of skuKeys) {
|
||||||
const s = String(k || "").trim();
|
const s = String(k || "").trim();
|
||||||
if (!s) continue;
|
if (!s) continue;
|
||||||
const sc = scoreCanonical(allRows, s);
|
const sc = scoreCanonical(allRows, s);
|
||||||
if (sc > bestScore) {
|
if (sc > bestScore) {
|
||||||
bestScore = sc;
|
bestScore = sc;
|
||||||
best = s;
|
best = s;
|
||||||
} else if (sc === bestScore && s && best && s < best) {
|
} else if (sc === bestScore && s && best && s < best) {
|
||||||
best = s;
|
best = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return best;
|
return best;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,78 +1,77 @@
|
||||||
// viz/app/linker/price.js
|
// viz/app/linker/price.js
|
||||||
export function buildPricePenaltyForPair({ allAgg, rules, kPerGroup = 6 }) {
|
export function buildPricePenaltyForPair({ allAgg, rules, kPerGroup = 6 }) {
|
||||||
// canonSku -> sorted array of up to kPerGroup lowest prices
|
// canonSku -> sorted array of up to kPerGroup lowest prices
|
||||||
const groupPrices = new Map();
|
const groupPrices = new Map();
|
||||||
|
|
||||||
function insertPrice(arr, p) {
|
function insertPrice(arr, p) {
|
||||||
// keep sorted ascending, cap length
|
// keep sorted ascending, cap length
|
||||||
let i = 0;
|
let i = 0;
|
||||||
while (i < arr.length && arr[i] <= p) i++;
|
while (i < arr.length && arr[i] <= p) i++;
|
||||||
arr.splice(i, 0, p);
|
arr.splice(i, 0, p);
|
||||||
if (arr.length > kPerGroup) arr.length = kPerGroup;
|
if (arr.length > kPerGroup) arr.length = kPerGroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of allAgg || []) {
|
for (const it of allAgg || []) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
const sku = String(it.sku || "");
|
const sku = String(it.sku || "");
|
||||||
if (!sku) continue;
|
if (!sku) continue;
|
||||||
|
|
||||||
const p = it.cheapestPriceNum;
|
const p = it.cheapestPriceNum;
|
||||||
if (p == null || !(p > 0)) continue;
|
if (p == null || !(p > 0)) continue;
|
||||||
|
|
||||||
const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku);
|
const canon = String((rules && rules.canonicalSku && rules.canonicalSku(sku)) || sku);
|
||||||
let arr = groupPrices.get(canon);
|
let arr = groupPrices.get(canon);
|
||||||
if (!arr) groupPrices.set(canon, (arr = []));
|
if (!arr) groupPrices.set(canon, (arr = []));
|
||||||
insertPrice(arr, p);
|
insertPrice(arr, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
function bestRelativeGap(prA, prB) {
|
function bestRelativeGap(prA, prB) {
|
||||||
// min |a-b| / min(a,b)
|
// min |a-b| / min(a,b)
|
||||||
let best = Infinity;
|
let best = Infinity;
|
||||||
for (let i = 0; i < prA.length; i++) {
|
for (let i = 0; i < prA.length; i++) {
|
||||||
const a = prA[i];
|
const a = prA[i];
|
||||||
for (let j = 0; j < prB.length; j++) {
|
for (let j = 0; j < prB.length; j++) {
|
||||||
const b = prB[j];
|
const b = prB[j];
|
||||||
const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b));
|
const gap = Math.abs(a - b) / Math.max(1e-9, Math.min(a, b));
|
||||||
if (gap < best) best = gap;
|
if (gap < best) best = gap;
|
||||||
if (best <= 0.001) return best;
|
if (best <= 0.001) return best;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return best;
|
return best;
|
||||||
}
|
}
|
||||||
|
|
||||||
function gapToMultiplier(gap) {
|
function gapToMultiplier(gap) {
|
||||||
// gap = 0.40 => 40% relative difference
|
// gap = 0.40 => 40% relative difference
|
||||||
// <=35%: no penalty
|
// <=35%: no penalty
|
||||||
// 35-50%: ease down to ~0.75
|
// 35-50%: ease down to ~0.75
|
||||||
// >50%: continue down gently, floor at 0.35
|
// >50%: continue down gently, floor at 0.35
|
||||||
if (!(gap >= 0)) return 1.0;
|
if (!(gap >= 0)) return 1.0;
|
||||||
if (gap <= 0.35) return 1.0;
|
if (gap <= 0.35) return 1.0;
|
||||||
|
|
||||||
if (gap <= 0.50) {
|
if (gap <= 0.5) {
|
||||||
const t = (gap - 0.35) / 0.15; // 0..1
|
const t = (gap - 0.35) / 0.15; // 0..1
|
||||||
return 1.0 - 0.25 * t; // 1.00 -> 0.75
|
return 1.0 - 0.25 * t; // 1.00 -> 0.75
|
||||||
}
|
}
|
||||||
|
|
||||||
const m = 0.75 * (0.5 / gap);
|
const m = 0.75 * (0.5 / gap);
|
||||||
return Math.max(0.35, m);
|
return Math.max(0.35, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
return function pricePenaltyForPair(aSku, bSku) {
|
return function pricePenaltyForPair(aSku, bSku) {
|
||||||
const a = String(aSku || "");
|
const a = String(aSku || "");
|
||||||
const b = String(bSku || "");
|
const b = String(bSku || "");
|
||||||
if (!a || !b) return 1.0;
|
if (!a || !b) return 1.0;
|
||||||
|
|
||||||
const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a);
|
const aCanon = String((rules && rules.canonicalSku && rules.canonicalSku(a)) || a);
|
||||||
const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b);
|
const bCanon = String((rules && rules.canonicalSku && rules.canonicalSku(b)) || b);
|
||||||
|
|
||||||
const prA = groupPrices.get(aCanon);
|
const prA = groupPrices.get(aCanon);
|
||||||
const prB = groupPrices.get(bCanon);
|
const prB = groupPrices.get(bCanon);
|
||||||
if (!prA || !prB || !prA.length || !prB.length) return 1.0;
|
if (!prA || !prB || !prA.length || !prB.length) return 1.0;
|
||||||
|
|
||||||
const gap = bestRelativeGap(prA, prB);
|
const gap = bestRelativeGap(prA, prB);
|
||||||
if (!isFinite(gap)) return 1.0;
|
if (!isFinite(gap)) return 1.0;
|
||||||
|
|
||||||
return gapToMultiplier(gap);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
|
return gapToMultiplier(gap);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,267 +3,288 @@ import { tokenizeQuery, normSearchText } from "../sku.js";
|
||||||
|
|
||||||
// Ignore ultra-common / low-signal tokens in bottle names.
|
// Ignore ultra-common / low-signal tokens in bottle names.
|
||||||
const SIM_STOP_TOKENS = new Set([
|
const SIM_STOP_TOKENS = new Set([
|
||||||
"the","a","an","and","of","to","in","for","with",
|
"the",
|
||||||
"year","years","yr","yrs","old",
|
"a",
|
||||||
"whisky","whiskey","scotch","single","malt","cask","finish","edition","release","batch","strength","abv","proof",
|
"an",
|
||||||
"anniversary",
|
"and",
|
||||||
|
"of",
|
||||||
|
"to",
|
||||||
|
"in",
|
||||||
|
"for",
|
||||||
|
"with",
|
||||||
|
"year",
|
||||||
|
"years",
|
||||||
|
"yr",
|
||||||
|
"yrs",
|
||||||
|
"old",
|
||||||
|
"whisky",
|
||||||
|
"whiskey",
|
||||||
|
"scotch",
|
||||||
|
"single",
|
||||||
|
"malt",
|
||||||
|
"cask",
|
||||||
|
"finish",
|
||||||
|
"edition",
|
||||||
|
"release",
|
||||||
|
"batch",
|
||||||
|
"strength",
|
||||||
|
"abv",
|
||||||
|
"proof",
|
||||||
|
"anniversary",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const SMWS_WORD_RE = /\bsmws\b/i;
|
const SMWS_WORD_RE = /\bsmws\b/i;
|
||||||
const SMWS_CODE_RE = /\b(\d{1,3}\.\d{1,4})\b/;
|
const SMWS_CODE_RE = /\b(\d{1,3}\.\d{1,4})\b/;
|
||||||
|
|
||||||
export function smwsKeyFromName(name) {
|
export function smwsKeyFromName(name) {
|
||||||
const s = String(name || "");
|
const s = String(name || "");
|
||||||
if (!SMWS_WORD_RE.test(s)) return "";
|
if (!SMWS_WORD_RE.test(s)) return "";
|
||||||
const m = s.match(SMWS_CODE_RE);
|
const m = s.match(SMWS_CODE_RE);
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;
|
const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;
|
||||||
|
|
||||||
export function numKey(t) {
|
export function numKey(t) {
|
||||||
const s = String(t || "").trim().toLowerCase();
|
const s = String(t || "")
|
||||||
if (!s) return "";
|
.trim()
|
||||||
if (/^\d+$/.test(s)) return s;
|
.toLowerCase();
|
||||||
const m = s.match(ORDINAL_RE);
|
if (!s) return "";
|
||||||
return m ? m[1] : "";
|
if (/^\d+$/.test(s)) return s;
|
||||||
|
const m = s.match(ORDINAL_RE);
|
||||||
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function isNumberToken(t) {
|
function isNumberToken(t) {
|
||||||
return !!numKey(t);
|
return !!numKey(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function extractAgeFromText(normName) {
|
export function extractAgeFromText(normName) {
|
||||||
const s = String(normName || "");
|
const s = String(normName || "");
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
|
|
||||||
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
|
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
|
||||||
if (m && m[1]) return String(parseInt(m[1], 10));
|
if (m && m[1]) return String(parseInt(m[1], 10));
|
||||||
|
|
||||||
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
|
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
|
||||||
if (m2 && m2[1]) return String(parseInt(m2[1], 10));
|
if (m2 && m2[1]) return String(parseInt(m2[1], 10));
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
export function filterSimTokens(tokens) {
|
export function filterSimTokens(tokens) {
|
||||||
const out = [];
|
const out = [];
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
|
|
||||||
const SIM_EQUIV = new Map([
|
const SIM_EQUIV = new Map([
|
||||||
["years", "yr"],
|
["years", "yr"],
|
||||||
["year", "yr"],
|
["year", "yr"],
|
||||||
["yrs", "yr"],
|
["yrs", "yr"],
|
||||||
["yr", "yr"],
|
["yr", "yr"],
|
||||||
["whiskey", "whisky"],
|
["whiskey", "whisky"],
|
||||||
["whisky", "whisky"],
|
["whisky", "whisky"],
|
||||||
["bourbon", "bourbon"],
|
["bourbon", "bourbon"],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const VOL_UNIT = new Set(["ml","l","cl","oz","liter","liters","litre","litres"]);
|
const VOL_UNIT = new Set(["ml", "l", "cl", "oz", "liter", "liters", "litre", "litres"]);
|
||||||
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l
|
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i; // 700ml, 1.14l
|
||||||
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0%
|
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/; // 46%, 40.0%
|
||||||
|
|
||||||
const arr = Array.isArray(tokens) ? tokens : [];
|
const arr = Array.isArray(tokens) ? tokens : [];
|
||||||
|
|
||||||
for (let i = 0; i < arr.length; i++) {
|
for (let i = 0; i < arr.length; i++) {
|
||||||
const raw = arr[i];
|
const raw = arr[i];
|
||||||
let t = String(raw || "").trim().toLowerCase();
|
let t = String(raw || "")
|
||||||
if (!t) continue;
|
.trim()
|
||||||
|
.toLowerCase();
|
||||||
|
if (!t) continue;
|
||||||
|
|
||||||
if (!/[a-z0-9]/i.test(t)) continue;
|
if (!/[a-z0-9]/i.test(t)) continue;
|
||||||
|
|
||||||
if (VOL_INLINE_RE.test(t)) continue;
|
if (VOL_INLINE_RE.test(t)) continue;
|
||||||
if (PCT_INLINE_RE.test(t)) continue;
|
if (PCT_INLINE_RE.test(t)) continue;
|
||||||
|
|
||||||
t = SIM_EQUIV.get(t) || t;
|
t = SIM_EQUIV.get(t) || t;
|
||||||
|
|
||||||
const nk = numKey(t);
|
const nk = numKey(t);
|
||||||
if (nk) t = nk;
|
if (nk) t = nk;
|
||||||
|
|
||||||
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
|
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
|
||||||
|
|
||||||
if (/^\d+(?:\.\d+)?$/.test(t)) {
|
if (/^\d+(?:\.\d+)?$/.test(t)) {
|
||||||
const next = String(arr[i + 1] || "").trim().toLowerCase();
|
const next = String(arr[i + 1] || "")
|
||||||
const nextNorm = SIM_EQUIV.get(next) || next;
|
.trim()
|
||||||
if (VOL_UNIT.has(nextNorm)) {
|
.toLowerCase();
|
||||||
i++;
|
const nextNorm = SIM_EQUIV.get(next) || next;
|
||||||
continue;
|
if (VOL_UNIT.has(nextNorm)) {
|
||||||
}
|
i++;
|
||||||
}
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
|
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
|
||||||
|
|
||||||
if (seen.has(t)) continue;
|
if (seen.has(t)) continue;
|
||||||
seen.add(t);
|
seen.add(t);
|
||||||
out.push(t);
|
out.push(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function numberMismatchPenalty(aTokens, bTokens) {
|
export function numberMismatchPenalty(aTokens, bTokens) {
|
||||||
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
|
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
|
||||||
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
|
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
|
||||||
if (!aNums.size || !bNums.size) return 1.0;
|
if (!aNums.size || !bNums.size) return 1.0;
|
||||||
for (const n of aNums) if (bNums.has(n)) return 1.0;
|
for (const n of aNums) if (bNums.has(n)) return 1.0;
|
||||||
return 0.28;
|
return 0.28;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function levenshtein(a, b) {
|
export function levenshtein(a, b) {
|
||||||
a = String(a || "");
|
a = String(a || "");
|
||||||
b = String(b || "");
|
b = String(b || "");
|
||||||
const n = a.length, m = b.length;
|
const n = a.length,
|
||||||
if (!n) return m;
|
m = b.length;
|
||||||
if (!m) return n;
|
if (!n) return m;
|
||||||
|
if (!m) return n;
|
||||||
|
|
||||||
const dp = new Array(m + 1);
|
const dp = new Array(m + 1);
|
||||||
for (let j = 0; j <= m; j++) dp[j] = j;
|
for (let j = 0; j <= m; j++) dp[j] = j;
|
||||||
|
|
||||||
for (let i = 1; i <= n; i++) {
|
for (let i = 1; i <= n; i++) {
|
||||||
let prev = dp[0];
|
let prev = dp[0];
|
||||||
dp[0] = i;
|
dp[0] = i;
|
||||||
const ca = a.charCodeAt(i - 1);
|
const ca = a.charCodeAt(i - 1);
|
||||||
for (let j = 1; j <= m; j++) {
|
for (let j = 1; j <= m; j++) {
|
||||||
const tmp = dp[j];
|
const tmp = dp[j];
|
||||||
const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
|
const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
|
||||||
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
|
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
|
||||||
prev = tmp;
|
prev = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return dp[m];
|
return dp[m];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function tokenContainmentScore(aTokens, bTokens) {
|
export function tokenContainmentScore(aTokens, bTokens) {
|
||||||
const A = filterSimTokens(aTokens || []);
|
const A = filterSimTokens(aTokens || []);
|
||||||
const B = filterSimTokens(bTokens || []);
|
const B = filterSimTokens(bTokens || []);
|
||||||
if (!A.length || !B.length) return 0;
|
if (!A.length || !B.length) return 0;
|
||||||
|
|
||||||
const aSet = new Set(A);
|
const aSet = new Set(A);
|
||||||
const bSet = new Set(B);
|
const bSet = new Set(B);
|
||||||
|
|
||||||
const small = aSet.size <= bSet.size ? aSet : bSet;
|
const small = aSet.size <= bSet.size ? aSet : bSet;
|
||||||
const big = aSet.size <= bSet.size ? bSet : aSet;
|
const big = aSet.size <= bSet.size ? bSet : aSet;
|
||||||
|
|
||||||
let hit = 0;
|
let hit = 0;
|
||||||
for (const t of small) if (big.has(t)) hit++;
|
for (const t of small) if (big.has(t)) hit++;
|
||||||
|
|
||||||
const recall = hit / Math.max(1, small.size);
|
const recall = hit / Math.max(1, small.size);
|
||||||
const precision = hit / Math.max(1, big.size);
|
const precision = hit / Math.max(1, big.size);
|
||||||
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
|
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
|
||||||
|
|
||||||
return f1;
|
return f1;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function similarityScore(aName, bName) {
|
export function similarityScore(aName, bName) {
|
||||||
const a = normSearchText(aName);
|
const a = normSearchText(aName);
|
||||||
const b = normSearchText(bName);
|
const b = normSearchText(bName);
|
||||||
if (!a || !b) return 0;
|
if (!a || !b) return 0;
|
||||||
|
|
||||||
const aAge = extractAgeFromText(a);
|
const aAge = extractAgeFromText(a);
|
||||||
const bAge = extractAgeFromText(b);
|
const bAge = extractAgeFromText(b);
|
||||||
const ageBoth = !!(aAge && bAge);
|
const ageBoth = !!(aAge && bAge);
|
||||||
const ageMatch = ageBoth && aAge === bAge;
|
const ageMatch = ageBoth && aAge === bAge;
|
||||||
const ageMismatch = ageBoth && aAge !== bAge;
|
const ageMismatch = ageBoth && aAge !== bAge;
|
||||||
|
|
||||||
const aToksRaw = tokenizeQuery(a);
|
const aToksRaw = tokenizeQuery(a);
|
||||||
const bToksRaw = tokenizeQuery(b);
|
const bToksRaw = tokenizeQuery(b);
|
||||||
|
|
||||||
const aToks = filterSimTokens(aToksRaw);
|
const aToks = filterSimTokens(aToksRaw);
|
||||||
const bToks = filterSimTokens(bToksRaw);
|
const bToks = filterSimTokens(bToksRaw);
|
||||||
if (!aToks.length || !bToks.length) return 0;
|
if (!aToks.length || !bToks.length) return 0;
|
||||||
|
|
||||||
const contain = tokenContainmentScore(aToksRaw, bToksRaw);
|
const contain = tokenContainmentScore(aToksRaw, bToksRaw);
|
||||||
|
|
||||||
const aFirst = aToks[0] || "";
|
const aFirst = aToks[0] || "";
|
||||||
const bFirst = bToks[0] || "";
|
const bFirst = bToks[0] || "";
|
||||||
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
||||||
|
|
||||||
const A = new Set(aToks.slice(1));
|
const A = new Set(aToks.slice(1));
|
||||||
const B = new Set(bToks.slice(1));
|
const B = new Set(bToks.slice(1));
|
||||||
let inter = 0;
|
let inter = 0;
|
||||||
for (const w of A) if (B.has(w)) inter++;
|
for (const w of A) if (B.has(w)) inter++;
|
||||||
const denom = Math.max(1, Math.max(A.size, B.size));
|
const denom = Math.max(1, Math.max(A.size, B.size));
|
||||||
const overlapTail = inter / denom;
|
const overlapTail = inter / denom;
|
||||||
|
|
||||||
const d = levenshtein(a, b);
|
const d = levenshtein(a, b);
|
||||||
const maxLen = Math.max(1, Math.max(a.length, b.length));
|
const maxLen = Math.max(1, Math.max(a.length, b.length));
|
||||||
const levSim = 1 - d / maxLen;
|
const levSim = 1 - d / maxLen;
|
||||||
|
|
||||||
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
|
let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
|
||||||
|
|
||||||
const smallN = Math.min(aToks.length, bToks.length);
|
const smallN = Math.min(aToks.length, bToks.length);
|
||||||
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
||||||
|
|
||||||
const numGate = numberMismatchPenalty(aToks, bToks);
|
const numGate = numberMismatchPenalty(aToks, bToks);
|
||||||
|
|
||||||
let s =
|
let s = numGate * (firstMatch * 3.0 + overlapTail * 2.2 * gate + levSim * (firstMatch ? 1.0 : 0.1 + 0.7 * contain));
|
||||||
numGate *
|
|
||||||
(firstMatch * 3.0 +
|
|
||||||
overlapTail * 2.2 * gate +
|
|
||||||
levSim * (firstMatch ? 1.0 : (0.10 + 0.70 * contain)));
|
|
||||||
|
|
||||||
if (ageMatch) s *= 2.2;
|
if (ageMatch) s *= 2.2;
|
||||||
else if (ageMismatch) s *= 0.18;
|
else if (ageMismatch) s *= 0.18;
|
||||||
|
|
||||||
s *= 1 + 0.9 * contain;
|
s *= 1 + 0.9 * contain;
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
export function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
const aTokensRaw = aTokens || [];
|
const aTokensRaw = aTokens || [];
|
||||||
const bTokensRaw = bTokens || [];
|
const bTokensRaw = bTokens || [];
|
||||||
|
|
||||||
const aTokF = filterSimTokens(aTokensRaw);
|
const aTokF = filterSimTokens(aTokensRaw);
|
||||||
const bTokF = filterSimTokens(bTokensRaw);
|
const bTokF = filterSimTokens(bTokensRaw);
|
||||||
if (!aTokF.length || !bTokF.length) return 0;
|
if (!aTokF.length || !bTokF.length) return 0;
|
||||||
|
|
||||||
const a = String(aNormName || "");
|
const a = String(aNormName || "");
|
||||||
const b = String(bNormName || "");
|
const b = String(bNormName || "");
|
||||||
|
|
||||||
const aAge = extractAgeFromText(a);
|
const aAge = extractAgeFromText(a);
|
||||||
const bAge = extractAgeFromText(b);
|
const bAge = extractAgeFromText(b);
|
||||||
const ageBoth = !!(aAge && bAge);
|
const ageBoth = !!(aAge && bAge);
|
||||||
const ageMatch = ageBoth && aAge === bAge;
|
const ageMatch = ageBoth && aAge === bAge;
|
||||||
const ageMismatch = ageBoth && aAge !== bAge;
|
const ageMismatch = ageBoth && aAge !== bAge;
|
||||||
|
|
||||||
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
|
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
|
||||||
|
|
||||||
const aFirst = aTokF[0] || "";
|
const aFirst = aTokF[0] || "";
|
||||||
const bFirst = bTokF[0] || "";
|
const bFirst = bTokF[0] || "";
|
||||||
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
||||||
|
|
||||||
const aTail = aTokF.slice(1);
|
const aTail = aTokF.slice(1);
|
||||||
const bTail = bTokF.slice(1);
|
const bTail = bTokF.slice(1);
|
||||||
|
|
||||||
let inter = 0;
|
let inter = 0;
|
||||||
const bSet = new Set(bTail);
|
const bSet = new Set(bTail);
|
||||||
for (const t of aTail) if (bSet.has(t)) inter++;
|
for (const t of aTail) if (bSet.has(t)) inter++;
|
||||||
|
|
||||||
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
|
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
|
||||||
const overlapTail = inter / denom;
|
const overlapTail = inter / denom;
|
||||||
|
|
||||||
const pref =
|
const pref = firstMatch && a.slice(0, 10) && b.slice(0, 10) && a.slice(0, 10) === b.slice(0, 10) ? 0.2 : 0;
|
||||||
firstMatch &&
|
|
||||||
a.slice(0, 10) &&
|
|
||||||
b.slice(0, 10) &&
|
|
||||||
a.slice(0, 10) === b.slice(0, 10)
|
|
||||||
? 0.2
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
|
let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
|
||||||
const smallN = Math.min(aTokF.length, bTokF.length);
|
const smallN = Math.min(aTokF.length, bTokF.length);
|
||||||
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
||||||
|
|
||||||
const numGate = numberMismatchPenalty(aTokF, bTokF);
|
const numGate = numberMismatchPenalty(aTokF, bTokF);
|
||||||
|
|
||||||
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
|
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
|
||||||
|
|
||||||
if (ageMatch) s *= 2.0;
|
if (ageMatch) s *= 2.0;
|
||||||
else if (ageMismatch) s *= 0.2;
|
else if (ageMismatch) s *= 0.2;
|
||||||
|
|
||||||
s *= 1 + 0.9 * contain;
|
s *= 1 + 0.9 * contain;
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,43 +4,43 @@ import { keySkuForRow } from "../sku.js";
|
||||||
const SIZE_TOLERANCE_ML = 8;
|
const SIZE_TOLERANCE_ML = 8;
|
||||||
|
|
||||||
export function parseSizesMlFromText(text) {
|
export function parseSizesMlFromText(text) {
|
||||||
const s = String(text || "").toLowerCase();
|
const s = String(text || "").toLowerCase();
|
||||||
if (!s) return [];
|
if (!s) return [];
|
||||||
|
|
||||||
const out = new Set();
|
const out = new Set();
|
||||||
const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g;
|
const re = /\b(\d+(?:\.\d+)?)\s*(ml|cl|l|litre|litres|liter|liters)\b/g;
|
||||||
|
|
||||||
let m;
|
let m;
|
||||||
while ((m = re.exec(s))) {
|
while ((m = re.exec(s))) {
|
||||||
const val = parseFloat(m[1]);
|
const val = parseFloat(m[1]);
|
||||||
const unit = m[2];
|
const unit = m[2];
|
||||||
if (!isFinite(val) || val <= 0) continue;
|
if (!isFinite(val) || val <= 0) continue;
|
||||||
|
|
||||||
let ml = 0;
|
let ml = 0;
|
||||||
if (unit === "ml") ml = Math.round(val);
|
if (unit === "ml") ml = Math.round(val);
|
||||||
else if (unit === "cl") ml = Math.round(val * 10);
|
else if (unit === "cl") ml = Math.round(val * 10);
|
||||||
else ml = Math.round(val * 1000);
|
else ml = Math.round(val * 1000);
|
||||||
|
|
||||||
if (ml >= 50 && ml <= 5000) out.add(ml);
|
if (ml >= 50 && ml <= 5000) out.add(ml);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Array.from(out);
|
return Array.from(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
function sizeSetsMatch(aSet, bSet) {
|
function sizeSetsMatch(aSet, bSet) {
|
||||||
if (!aSet?.size || !bSet?.size) return false;
|
if (!aSet?.size || !bSet?.size) return false;
|
||||||
for (const a of aSet) {
|
for (const a of aSet) {
|
||||||
for (const b of bSet) {
|
for (const b of bSet) {
|
||||||
if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true;
|
if (Math.abs(a - b) <= SIZE_TOLERANCE_ML) return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function sizePenalty(aSet, bSet) {
|
export function sizePenalty(aSet, bSet) {
|
||||||
if (!aSet?.size || !bSet?.size) return 1.0;
|
if (!aSet?.size || !bSet?.size) return 1.0;
|
||||||
if (sizeSetsMatch(aSet, bSet)) return 1.0;
|
if (sizeSetsMatch(aSet, bSet)) return 1.0;
|
||||||
return 0.08;
|
return 0.08;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -48,61 +48,61 @@ export function sizePenalty(aSet, bSet) {
|
||||||
* This keeps linker_page.js clean and makes cache rebuild explicit when rules change.
|
* This keeps linker_page.js clean and makes cache rebuild explicit when rules change.
|
||||||
*/
|
*/
|
||||||
export function buildSizePenaltyForPair({ allRows, allAgg, rules }) {
|
export function buildSizePenaltyForPair({ allRows, allAgg, rules }) {
|
||||||
const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml>
|
const SKU_SIZE_CACHE = new Map(); // skuKey -> Set<int ml>
|
||||||
|
|
||||||
function ensureSkuSet(k) {
|
function ensureSkuSet(k) {
|
||||||
let set = SKU_SIZE_CACHE.get(k);
|
let set = SKU_SIZE_CACHE.get(k);
|
||||||
if (!set) SKU_SIZE_CACHE.set(k, (set = new Set()));
|
if (!set) SKU_SIZE_CACHE.set(k, (set = new Set()));
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const r of allRows) {
|
for (const r of allRows) {
|
||||||
if (!r || r.removed) continue;
|
if (!r || r.removed) continue;
|
||||||
const skuKey = String(keySkuForRow(r) || "").trim();
|
const skuKey = String(keySkuForRow(r) || "").trim();
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const name = r.name || r.title || r.productName || "";
|
const name = r.name || r.title || r.productName || "";
|
||||||
const sizes = parseSizesMlFromText(name);
|
const sizes = parseSizesMlFromText(name);
|
||||||
if (!sizes.length) continue;
|
if (!sizes.length) continue;
|
||||||
|
|
||||||
const set = ensureSkuSet(skuKey);
|
const set = ensureSkuSet(skuKey);
|
||||||
for (const x of sizes) set.add(x);
|
for (const x of sizes) set.add(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of allAgg) {
|
for (const it of allAgg) {
|
||||||
const skuKey = String(it?.sku || "").trim();
|
const skuKey = String(it?.sku || "").trim();
|
||||||
if (!skuKey || !it?.name) continue;
|
if (!skuKey || !it?.name) continue;
|
||||||
const sizes = parseSizesMlFromText(it.name);
|
const sizes = parseSizesMlFromText(it.name);
|
||||||
if (!sizes.length) continue;
|
if (!sizes.length) continue;
|
||||||
|
|
||||||
const set = ensureSkuSet(skuKey);
|
const set = ensureSkuSet(skuKey);
|
||||||
for (const x of sizes) set.add(x);
|
for (const x of sizes) set.add(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml>
|
const CANON_SIZE_CACHE = new Map(); // canon -> Set<int ml>
|
||||||
|
|
||||||
function ensureCanonSet(k) {
|
function ensureCanonSet(k) {
|
||||||
let set = CANON_SIZE_CACHE.get(k);
|
let set = CANON_SIZE_CACHE.get(k);
|
||||||
if (!set) CANON_SIZE_CACHE.set(k, (set = new Set()));
|
if (!set) CANON_SIZE_CACHE.set(k, (set = new Set()));
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const it of allAgg) {
|
for (const it of allAgg) {
|
||||||
const skuKey = String(it?.sku || "").trim();
|
const skuKey = String(it?.sku || "").trim();
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const canon = String(rules.canonicalSku(skuKey) || skuKey);
|
const canon = String(rules.canonicalSku(skuKey) || skuKey);
|
||||||
const canonSet = ensureCanonSet(canon);
|
const canonSet = ensureCanonSet(canon);
|
||||||
|
|
||||||
const skuSet = SKU_SIZE_CACHE.get(skuKey);
|
const skuSet = SKU_SIZE_CACHE.get(skuKey);
|
||||||
if (skuSet) for (const x of skuSet) canonSet.add(x);
|
if (skuSet) for (const x of skuSet) canonSet.add(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return function sizePenaltyForPair(aSku, bSku) {
|
return function sizePenaltyForPair(aSku, bSku) {
|
||||||
const aCanon = String(rules.canonicalSku(String(aSku || "")) || "");
|
const aCanon = String(rules.canonicalSku(String(aSku || "")) || "");
|
||||||
const bCanon = String(rules.canonicalSku(String(bSku || "")) || "");
|
const bCanon = String(rules.canonicalSku(String(bSku || "")) || "");
|
||||||
const A = aCanon ? (CANON_SIZE_CACHE.get(aCanon) || new Set()) : new Set();
|
const A = aCanon ? CANON_SIZE_CACHE.get(aCanon) || new Set() : new Set();
|
||||||
const B = bCanon ? (CANON_SIZE_CACHE.get(bCanon) || new Set()) : new Set();
|
const B = bCanon ? CANON_SIZE_CACHE.get(bCanon) || new Set() : new Set();
|
||||||
return sizePenalty(A, B);
|
return sizePenalty(A, B);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,43 +1,42 @@
|
||||||
// viz/app/linker/store_cache.js
|
// viz/app/linker/store_cache.js
|
||||||
|
|
||||||
function canonKeyForSku(rules, skuKey) {
|
function canonKeyForSku(rules, skuKey) {
|
||||||
const s = String(skuKey || "").trim();
|
const s = String(skuKey || "").trim();
|
||||||
if (!s) return "";
|
if (!s) return "";
|
||||||
return String(rules.canonicalSku(s) || s);
|
return String(rules.canonicalSku(s) || s);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildCanonStoreCache(allAgg, rules) {
|
export function buildCanonStoreCache(allAgg, rules) {
|
||||||
const m = new Map(); // canonSku -> Set<storeLabel>
|
const m = new Map(); // canonSku -> Set<storeLabel>
|
||||||
|
|
||||||
for (const it of allAgg) {
|
for (const it of allAgg) {
|
||||||
if (!it) continue;
|
if (!it) continue;
|
||||||
|
|
||||||
const skuKey = String(it.sku || "").trim();
|
const skuKey = String(it.sku || "").trim();
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const canon = String(rules.canonicalSku(skuKey) || skuKey);
|
const canon = String(rules.canonicalSku(skuKey) || skuKey);
|
||||||
let set = m.get(canon);
|
let set = m.get(canon);
|
||||||
if (!set) m.set(canon, (set = new Set()));
|
if (!set) m.set(canon, (set = new Set()));
|
||||||
|
|
||||||
const stores = it.stores;
|
const stores = it.stores;
|
||||||
if (stores && stores.size) for (const s of stores) set.add(s);
|
if (stores && stores.size) for (const s of stores) set.add(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonStoresForSku(rules, canonStoreCache, skuKey) {
|
function canonStoresForSku(rules, canonStoreCache, skuKey) {
|
||||||
const canon = canonKeyForSku(rules, skuKey);
|
const canon = canonKeyForSku(rules, skuKey);
|
||||||
return canon ? canonStoreCache.get(canon) || new Set() : new Set();
|
return canon ? canonStoreCache.get(canon) || new Set() : new Set();
|
||||||
}
|
}
|
||||||
|
|
||||||
export function makeSameStoreCanonFn(rules, canonStoreCache) {
|
|
||||||
return function sameStoreCanon(aSku, bSku) {
|
|
||||||
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
|
|
||||||
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
|
|
||||||
if (!A.size || !B.size) return false;
|
|
||||||
for (const s of A) if (B.has(s)) return true;
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
|
export function makeSameStoreCanonFn(rules, canonStoreCache) {
|
||||||
|
return function sameStoreCanon(aSku, bSku) {
|
||||||
|
const A = canonStoresForSku(rules, canonStoreCache, String(aSku || ""));
|
||||||
|
const B = canonStoresForSku(rules, canonStoreCache, String(bSku || ""));
|
||||||
|
if (!A.size || !B.size) return false;
|
||||||
|
for (const s of A) if (B.has(s)) return true;
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -2,46 +2,46 @@
|
||||||
import { keySkuForRow } from "../sku.js";
|
import { keySkuForRow } from "../sku.js";
|
||||||
|
|
||||||
function urlQuality(r) {
|
function urlQuality(r) {
|
||||||
const u = String(r?.url || "").trim();
|
const u = String(r?.url || "").trim();
|
||||||
if (!u) return -1;
|
if (!u) return -1;
|
||||||
let s = 0;
|
let s = 0;
|
||||||
s += u.length;
|
s += u.length;
|
||||||
if (/\bproduct\/\d+\//.test(u)) s += 50;
|
if (/\bproduct\/\d+\//.test(u)) s += 50;
|
||||||
if (/[a-z0-9-]{8,}/i.test(u)) s += 10;
|
if (/[a-z0-9-]{8,}/i.test(u)) s += 10;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildUrlBySkuStore(allRows) {
|
export function buildUrlBySkuStore(allRows) {
|
||||||
const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url)
|
const URL_BY_SKU_STORE = new Map(); // skuKey -> Map(storeLabel -> url)
|
||||||
|
|
||||||
for (const r of allRows) {
|
for (const r of allRows) {
|
||||||
if (!r || r.removed) continue;
|
if (!r || r.removed) continue;
|
||||||
|
|
||||||
const skuKey = String(keySkuForRow(r) || "").trim();
|
const skuKey = String(keySkuForRow(r) || "").trim();
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const storeLabel = String(r.storeLabel || r.store || "").trim();
|
const storeLabel = String(r.storeLabel || r.store || "").trim();
|
||||||
const url = String(r.url || "").trim();
|
const url = String(r.url || "").trim();
|
||||||
if (!storeLabel || !url) continue;
|
if (!storeLabel || !url) continue;
|
||||||
|
|
||||||
let m = URL_BY_SKU_STORE.get(skuKey);
|
let m = URL_BY_SKU_STORE.get(skuKey);
|
||||||
if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map()));
|
if (!m) URL_BY_SKU_STORE.set(skuKey, (m = new Map()));
|
||||||
|
|
||||||
const prevUrl = m.get(storeLabel);
|
const prevUrl = m.get(storeLabel);
|
||||||
if (!prevUrl) {
|
if (!prevUrl) {
|
||||||
m.set(storeLabel, url);
|
m.set(storeLabel, url);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const prevScore = urlQuality({ url: prevUrl });
|
const prevScore = urlQuality({ url: prevUrl });
|
||||||
const nextScore = urlQuality(r);
|
const nextScore = urlQuality(r);
|
||||||
|
|
||||||
if (nextScore > prevScore) {
|
if (nextScore > prevScore) {
|
||||||
m.set(storeLabel, url);
|
m.set(storeLabel, url);
|
||||||
} else if (nextScore === prevScore && url < prevUrl) {
|
} else if (nextScore === prevScore && url < prevUrl) {
|
||||||
m.set(storeLabel, url);
|
m.set(storeLabel, url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return URL_BY_SKU_STORE;
|
return URL_BY_SKU_STORE;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -15,23 +15,23 @@ import { renderStore } from "./store_page.js";
|
||||||
import { renderStats, destroyStatsChart } from "./stats_page.js";
|
import { renderStats, destroyStatsChart } from "./stats_page.js";
|
||||||
|
|
||||||
function route() {
|
function route() {
|
||||||
const $app = document.getElementById("app");
|
const $app = document.getElementById("app");
|
||||||
if (!$app) return;
|
if (!$app) return;
|
||||||
|
|
||||||
// always clean up chart when navigating
|
// always clean up chart when navigating
|
||||||
destroyChart();
|
destroyChart();
|
||||||
destroyStatsChart();
|
destroyStatsChart();
|
||||||
|
|
||||||
const h = location.hash || "#/";
|
const h = location.hash || "#/";
|
||||||
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
|
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
|
||||||
|
|
||||||
if (parts.length === 0) return renderSearch($app);
|
if (parts.length === 0) return renderSearch($app);
|
||||||
if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1]));
|
if (parts[0] === "item" && parts[1]) return renderItem($app, decodeURIComponent(parts[1]));
|
||||||
if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1]));
|
if (parts[0] === "store" && parts[1]) return renderStore($app, decodeURIComponent(parts[1]));
|
||||||
if (parts[0] === "link") return renderSkuLinker($app);
|
if (parts[0] === "link") return renderSkuLinker($app);
|
||||||
if (parts[0] === "stats") return renderStats($app);
|
if (parts[0] === "stats") return renderStats($app);
|
||||||
|
|
||||||
return renderSearch($app);
|
return renderSearch($app);
|
||||||
}
|
}
|
||||||
|
|
||||||
window.addEventListener("hashchange", route);
|
window.addEventListener("hashchange", route);
|
||||||
|
|
|
||||||
|
|
@ -5,225 +5,225 @@ import { applyPendingToMeta } from "./pending.js";
|
||||||
let CACHED = null;
|
let CACHED = null;
|
||||||
|
|
||||||
export function clearSkuRulesCache() {
|
export function clearSkuRulesCache() {
|
||||||
CACHED = null;
|
CACHED = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeImplicitSkuKey(k) {
|
function normalizeImplicitSkuKey(k) {
|
||||||
const s = String(k || "").trim();
|
const s = String(k || "").trim();
|
||||||
const m = s.match(/^id:(\d{1,6})$/i);
|
const m = s.match(/^id:(\d{1,6})$/i);
|
||||||
if (m) return String(m[1]).padStart(6, "0");
|
if (m) return String(m[1]).padStart(6, "0");
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalPairKey(a, b) {
|
function canonicalPairKey(a, b) {
|
||||||
const x = normalizeImplicitSkuKey(a);
|
const x = normalizeImplicitSkuKey(a);
|
||||||
const y = normalizeImplicitSkuKey(b);
|
const y = normalizeImplicitSkuKey(b);
|
||||||
if (!x || !y) return "";
|
if (!x || !y) return "";
|
||||||
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildForwardMap(links) {
|
function buildForwardMap(links) {
|
||||||
// Keep this for reference/debug; grouping no longer depends on direction.
|
// Keep this for reference/debug; grouping no longer depends on direction.
|
||||||
const m = new Map();
|
const m = new Map();
|
||||||
for (const x of Array.isArray(links) ? links : []) {
|
for (const x of Array.isArray(links) ? links : []) {
|
||||||
const fromSku = normalizeImplicitSkuKey(x?.fromSku);
|
const fromSku = normalizeImplicitSkuKey(x?.fromSku);
|
||||||
const toSku = normalizeImplicitSkuKey(x?.toSku);
|
const toSku = normalizeImplicitSkuKey(x?.toSku);
|
||||||
if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku);
|
if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku);
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildIgnoreSet(ignores) {
|
function buildIgnoreSet(ignores) {
|
||||||
const s = new Set();
|
const s = new Set();
|
||||||
for (const x of Array.isArray(ignores) ? ignores : []) {
|
for (const x of Array.isArray(ignores) ? ignores : []) {
|
||||||
const a = String(x?.skuA || x?.a || x?.left || "").trim();
|
const a = String(x?.skuA || x?.a || x?.left || "").trim();
|
||||||
const b = String(x?.skuB || x?.b || x?.right || "").trim();
|
const b = String(x?.skuB || x?.b || x?.right || "").trim();
|
||||||
const k = canonicalPairKey(a, b);
|
const k = canonicalPairKey(a, b);
|
||||||
if (k) s.add(k);
|
if (k) s.add(k);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Union-Find grouping (hardened) ---------------- */
|
/* ---------------- Union-Find grouping (hardened) ---------------- */
|
||||||
|
|
||||||
class DSU {
|
class DSU {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.parent = new Map();
|
this.parent = new Map();
|
||||||
this.rank = new Map();
|
this.rank = new Map();
|
||||||
}
|
}
|
||||||
_add(x) {
|
_add(x) {
|
||||||
if (!this.parent.has(x)) {
|
if (!this.parent.has(x)) {
|
||||||
this.parent.set(x, x);
|
this.parent.set(x, x);
|
||||||
this.rank.set(x, 0);
|
this.rank.set(x, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
find(x) {
|
find(x) {
|
||||||
x = String(x || "").trim();
|
x = String(x || "").trim();
|
||||||
if (!x) return "";
|
if (!x) return "";
|
||||||
this._add(x);
|
this._add(x);
|
||||||
let p = this.parent.get(x);
|
let p = this.parent.get(x);
|
||||||
if (p !== x) {
|
if (p !== x) {
|
||||||
p = this.find(p);
|
p = this.find(p);
|
||||||
this.parent.set(x, p);
|
this.parent.set(x, p);
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
union(a, b) {
|
union(a, b) {
|
||||||
a = String(a || "").trim();
|
a = String(a || "").trim();
|
||||||
b = String(b || "").trim();
|
b = String(b || "").trim();
|
||||||
if (!a || !b || a === b) return;
|
if (!a || !b || a === b) return;
|
||||||
const ra = this.find(a);
|
const ra = this.find(a);
|
||||||
const rb = this.find(b);
|
const rb = this.find(b);
|
||||||
if (!ra || !rb || ra === rb) return;
|
if (!ra || !rb || ra === rb) return;
|
||||||
|
|
||||||
const rka = this.rank.get(ra) || 0;
|
const rka = this.rank.get(ra) || 0;
|
||||||
const rkb = this.rank.get(rb) || 0;
|
const rkb = this.rank.get(rb) || 0;
|
||||||
|
|
||||||
if (rka < rkb) {
|
if (rka < rkb) {
|
||||||
this.parent.set(ra, rb);
|
this.parent.set(ra, rb);
|
||||||
} else if (rkb < rka) {
|
} else if (rkb < rka) {
|
||||||
this.parent.set(rb, ra);
|
this.parent.set(rb, ra);
|
||||||
} else {
|
} else {
|
||||||
this.parent.set(rb, ra);
|
this.parent.set(rb, ra);
|
||||||
this.rank.set(ra, rka + 1);
|
this.rank.set(ra, rka + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isUnknownSkuKey(key) {
|
function isUnknownSkuKey(key) {
|
||||||
return String(key || "").startsWith("u:");
|
return String(key || "").startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isNumericSku(key) {
|
function isNumericSku(key) {
|
||||||
return /^\d+$/.test(String(key || "").trim());
|
return /^\d+$/.test(String(key || "").trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareSku(a, b) {
|
function compareSku(a, b) {
|
||||||
// Stable ordering to choose a canonical representative.
|
// Stable ordering to choose a canonical representative.
|
||||||
// Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex.
|
// Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex.
|
||||||
a = String(a || "").trim();
|
a = String(a || "").trim();
|
||||||
b = String(b || "").trim();
|
b = String(b || "").trim();
|
||||||
if (a === b) return 0;
|
if (a === b) return 0;
|
||||||
|
|
||||||
const aUnknown = isUnknownSkuKey(a);
|
const aUnknown = isUnknownSkuKey(a);
|
||||||
const bUnknown = isUnknownSkuKey(b);
|
const bUnknown = isUnknownSkuKey(b);
|
||||||
if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first
|
if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first
|
||||||
|
|
||||||
const aNum = isNumericSku(a);
|
const aNum = isNumericSku(a);
|
||||||
const bNum = isNumericSku(b);
|
const bNum = isNumericSku(b);
|
||||||
if (aNum && bNum) {
|
if (aNum && bNum) {
|
||||||
// compare as integers (safe: these are small SKU strings)
|
// compare as integers (safe: these are small SKU strings)
|
||||||
const na = Number(a);
|
const na = Number(a);
|
||||||
const nb = Number(b);
|
const nb = Number(b);
|
||||||
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback lex
|
// fallback lex
|
||||||
return a < b ? -1 : 1;
|
return a < b ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildGroupsAndCanonicalMap(links) {
|
function buildGroupsAndCanonicalMap(links) {
|
||||||
const dsu = new DSU();
|
const dsu = new DSU();
|
||||||
const all = new Set();
|
const all = new Set();
|
||||||
|
|
||||||
for (const x of Array.isArray(links) ? links : []) {
|
for (const x of Array.isArray(links) ? links : []) {
|
||||||
const a = normalizeImplicitSkuKey(x?.fromSku);
|
const a = normalizeImplicitSkuKey(x?.fromSku);
|
||||||
const b = normalizeImplicitSkuKey(x?.toSku);
|
const b = normalizeImplicitSkuKey(x?.toSku);
|
||||||
if (!a || !b) continue;
|
if (!a || !b) continue;
|
||||||
all.add(a);
|
all.add(a);
|
||||||
all.add(b);
|
all.add(b);
|
||||||
|
|
||||||
// IMPORTANT: union is undirected for grouping (hardened vs cycles)
|
// IMPORTANT: union is undirected for grouping (hardened vs cycles)
|
||||||
dsu.union(a, b);
|
dsu.union(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
// root -> Set(members)
|
// root -> Set(members)
|
||||||
const groupsByRoot = new Map();
|
const groupsByRoot = new Map();
|
||||||
for (const s of all) {
|
for (const s of all) {
|
||||||
const r = dsu.find(s);
|
const r = dsu.find(s);
|
||||||
if (!r) continue;
|
if (!r) continue;
|
||||||
let set = groupsByRoot.get(r);
|
let set = groupsByRoot.get(r);
|
||||||
if (!set) groupsByRoot.set(r, (set = new Set()));
|
if (!set) groupsByRoot.set(r, (set = new Set()));
|
||||||
set.add(s);
|
set.add(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Choose a canonical representative per group
|
// Choose a canonical representative per group
|
||||||
const repByRoot = new Map();
|
const repByRoot = new Map();
|
||||||
for (const [root, members] of groupsByRoot.entries()) {
|
for (const [root, members] of groupsByRoot.entries()) {
|
||||||
const arr = Array.from(members);
|
const arr = Array.from(members);
|
||||||
arr.sort(compareSku);
|
arr.sort(compareSku);
|
||||||
const rep = arr[0] || root;
|
const rep = arr[0] || root;
|
||||||
repByRoot.set(root, rep);
|
repByRoot.set(root, rep);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sku -> canonical rep
|
// sku -> canonical rep
|
||||||
const canonBySku = new Map();
|
const canonBySku = new Map();
|
||||||
// canonical rep -> Set(members) (what the rest of the app uses)
|
// canonical rep -> Set(members) (what the rest of the app uses)
|
||||||
const groupsByCanon = new Map();
|
const groupsByCanon = new Map();
|
||||||
|
|
||||||
for (const [root, members] of groupsByRoot.entries()) {
|
for (const [root, members] of groupsByRoot.entries()) {
|
||||||
const rep = repByRoot.get(root) || root;
|
const rep = repByRoot.get(root) || root;
|
||||||
let g = groupsByCanon.get(rep);
|
let g = groupsByCanon.get(rep);
|
||||||
if (!g) groupsByCanon.set(rep, (g = new Set([rep])));
|
if (!g) groupsByCanon.set(rep, (g = new Set([rep])));
|
||||||
for (const s of members) {
|
for (const s of members) {
|
||||||
canonBySku.set(s, rep);
|
canonBySku.set(s, rep);
|
||||||
g.add(s);
|
g.add(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { canonBySku, groupsByCanon };
|
return { canonBySku, groupsByCanon };
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function loadSkuRules() {
|
export async function loadSkuRules() {
|
||||||
if (CACHED) return CACHED;
|
if (CACHED) return CACHED;
|
||||||
|
|
||||||
let meta = await loadSkuMetaBestEffort();
|
let meta = await loadSkuMetaBestEffort();
|
||||||
|
|
||||||
// On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage
|
// On GitHub Pages (read-only), overlay local pending+submitted edits from localStorage
|
||||||
if (!isLocalWriteMode()) {
|
if (!isLocalWriteMode()) {
|
||||||
meta = applyPendingToMeta(meta);
|
meta = applyPendingToMeta(meta);
|
||||||
}
|
}
|
||||||
|
|
||||||
const links = Array.isArray(meta?.links) ? meta.links : [];
|
const links = Array.isArray(meta?.links) ? meta.links : [];
|
||||||
const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
|
const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
|
||||||
|
|
||||||
// keep forwardMap for visibility/debug; grouping uses union-find
|
// keep forwardMap for visibility/debug; grouping uses union-find
|
||||||
const forwardMap = buildForwardMap(links);
|
const forwardMap = buildForwardMap(links);
|
||||||
|
|
||||||
const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links);
|
const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links);
|
||||||
const ignoreSet = buildIgnoreSet(ignores);
|
const ignoreSet = buildIgnoreSet(ignores);
|
||||||
|
|
||||||
function canonicalSku(sku) {
|
function canonicalSku(sku) {
|
||||||
const s = normalizeImplicitSkuKey(sku);
|
const s = normalizeImplicitSkuKey(sku);
|
||||||
if (!s) return s;
|
if (!s) return s;
|
||||||
return canonBySku.get(s) || s;
|
return canonBySku.get(s) || s;
|
||||||
}
|
}
|
||||||
|
|
||||||
function groupForCanonical(toSku) {
|
function groupForCanonical(toSku) {
|
||||||
const canon = canonicalSku(toSku);
|
const canon = canonicalSku(toSku);
|
||||||
const g = groupsByCanon.get(canon);
|
const g = groupsByCanon.get(canon);
|
||||||
return g ? new Set(g) : new Set([canon]);
|
return g ? new Set(g) : new Set([canon]);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isIgnoredPair(a, b) {
|
function isIgnoredPair(a, b) {
|
||||||
const k = canonicalPairKey(a, b);
|
const k = canonicalPairKey(a, b);
|
||||||
return k ? ignoreSet.has(k) : false;
|
return k ? ignoreSet.has(k) : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
CACHED = {
|
CACHED = {
|
||||||
links,
|
links,
|
||||||
ignores,
|
ignores,
|
||||||
forwardMap,
|
forwardMap,
|
||||||
|
|
||||||
// "toGroups" retained name for compatibility with existing code
|
// "toGroups" retained name for compatibility with existing code
|
||||||
toGroups: groupsByCanon,
|
toGroups: groupsByCanon,
|
||||||
ignoreSet,
|
ignoreSet,
|
||||||
|
|
||||||
canonicalSku,
|
canonicalSku,
|
||||||
groupForCanonical,
|
groupForCanonical,
|
||||||
isIgnoredPair,
|
isIgnoredPair,
|
||||||
canonicalPairKey,
|
canonicalPairKey,
|
||||||
};
|
};
|
||||||
|
|
||||||
return CACHED;
|
return CACHED;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,219 +3,217 @@ const LS_KEY = "stviz:v1:pendingSkuEdits";
|
||||||
const LS_SUBMITTED_KEY = "stviz:v1:submittedSkuEdits";
|
const LS_SUBMITTED_KEY = "stviz:v1:submittedSkuEdits";
|
||||||
|
|
||||||
function safeParseJson(s) {
|
function safeParseJson(s) {
|
||||||
try {
|
try {
|
||||||
return JSON.parse(String(s || ""));
|
return JSON.parse(String(s || ""));
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normSku(s) {
|
function normSku(s) {
|
||||||
return String(s || "").trim();
|
return String(s || "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function linkKey(fromSku, toSku) {
|
function linkKey(fromSku, toSku) {
|
||||||
const f = normSku(fromSku);
|
const f = normSku(fromSku);
|
||||||
const t = normSku(toSku);
|
const t = normSku(toSku);
|
||||||
if (!f || !t || f === t) return "";
|
if (!f || !t || f === t) return "";
|
||||||
return `${f}→${t}`;
|
return `${f}→${t}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pairKey(a, b) {
|
function pairKey(a, b) {
|
||||||
const x = normSku(a);
|
const x = normSku(a);
|
||||||
const y = normSku(b);
|
const y = normSku(b);
|
||||||
if (!x || !y || x === y) return "";
|
if (!x || !y || x === y) return "";
|
||||||
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
return x < y ? `${x}|${y}` : `${y}|${x}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadEditsFromKey(key) {
|
function loadEditsFromKey(key) {
|
||||||
const raw = (() => {
|
const raw = (() => {
|
||||||
try {
|
try {
|
||||||
return localStorage.getItem(key) || "";
|
return localStorage.getItem(key) || "";
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const j = safeParseJson(raw);
|
const j = safeParseJson(raw);
|
||||||
const links = Array.isArray(j?.links) ? j.links : [];
|
const links = Array.isArray(j?.links) ? j.links : [];
|
||||||
const ignores = Array.isArray(j?.ignores) ? j.ignores : [];
|
const ignores = Array.isArray(j?.ignores) ? j.ignores : [];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
createdAt: String(j?.createdAt || ""),
|
createdAt: String(j?.createdAt || ""),
|
||||||
links: links
|
links: links
|
||||||
.map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) }))
|
.map((x) => ({ fromSku: normSku(x?.fromSku), toSku: normSku(x?.toSku) }))
|
||||||
.filter((x) => linkKey(x.fromSku, x.toSku)),
|
.filter((x) => linkKey(x.fromSku, x.toSku)),
|
||||||
ignores: ignores
|
ignores: ignores
|
||||||
.map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) }))
|
.map((x) => ({ skuA: normSku(x?.skuA || x?.a), skuB: normSku(x?.skuB || x?.b) }))
|
||||||
.filter((x) => pairKey(x.skuA, x.skuB)),
|
.filter((x) => pairKey(x.skuA, x.skuB)),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveEditsToKey(key, edits) {
|
function saveEditsToKey(key, edits) {
|
||||||
const out = {
|
const out = {
|
||||||
createdAt: edits?.createdAt || new Date().toISOString(),
|
createdAt: edits?.createdAt || new Date().toISOString(),
|
||||||
links: Array.isArray(edits?.links) ? edits.links : [],
|
links: Array.isArray(edits?.links) ? edits.links : [],
|
||||||
ignores: Array.isArray(edits?.ignores) ? edits.ignores : [],
|
ignores: Array.isArray(edits?.ignores) ? edits.ignores : [],
|
||||||
};
|
};
|
||||||
try {
|
try {
|
||||||
localStorage.setItem(key, JSON.stringify(out));
|
localStorage.setItem(key, JSON.stringify(out));
|
||||||
} catch {}
|
} catch {}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function loadPendingEdits() {
|
export function loadPendingEdits() {
|
||||||
return loadEditsFromKey(LS_KEY);
|
return loadEditsFromKey(LS_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function savePendingEdits(edits) {
|
export function savePendingEdits(edits) {
|
||||||
return saveEditsToKey(LS_KEY, edits);
|
return saveEditsToKey(LS_KEY, edits);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function clearPendingEdits() {
|
export function clearPendingEdits() {
|
||||||
try {
|
try {
|
||||||
localStorage.removeItem(LS_KEY);
|
localStorage.removeItem(LS_KEY);
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function loadSubmittedEdits() {
|
export function loadSubmittedEdits() {
|
||||||
return loadEditsFromKey(LS_SUBMITTED_KEY);
|
return loadEditsFromKey(LS_SUBMITTED_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function saveSubmittedEdits(edits) {
|
export function saveSubmittedEdits(edits) {
|
||||||
return saveEditsToKey(LS_SUBMITTED_KEY, edits);
|
return saveEditsToKey(LS_SUBMITTED_KEY, edits);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function clearSubmittedEdits() {
|
export function clearSubmittedEdits() {
|
||||||
try {
|
try {
|
||||||
localStorage.removeItem(LS_SUBMITTED_KEY);
|
localStorage.removeItem(LS_SUBMITTED_KEY);
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function pendingCounts() {
|
export function pendingCounts() {
|
||||||
const e = loadPendingEdits();
|
const e = loadPendingEdits();
|
||||||
return {
|
return {
|
||||||
links: e.links.length,
|
links: e.links.length,
|
||||||
ignores: e.ignores.length,
|
ignores: e.ignores.length,
|
||||||
total: e.links.length + e.ignores.length,
|
total: e.links.length + e.ignores.length,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export function addPendingLink(fromSku, toSku) {
|
export function addPendingLink(fromSku, toSku) {
|
||||||
const f = normSku(fromSku);
|
const f = normSku(fromSku);
|
||||||
const t = normSku(toSku);
|
const t = normSku(toSku);
|
||||||
const k = linkKey(f, t);
|
const k = linkKey(f, t);
|
||||||
if (!k) return false;
|
if (!k) return false;
|
||||||
|
|
||||||
const pending = loadPendingEdits();
|
const pending = loadPendingEdits();
|
||||||
const submitted = loadSubmittedEdits();
|
const submitted = loadSubmittedEdits();
|
||||||
|
|
||||||
const seen = new Set(
|
const seen = new Set(
|
||||||
[
|
[
|
||||||
...pending.links.map((x) => linkKey(x.fromSku, x.toSku)),
|
...pending.links.map((x) => linkKey(x.fromSku, x.toSku)),
|
||||||
...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)),
|
...submitted.links.map((x) => linkKey(x.fromSku, x.toSku)),
|
||||||
].filter(Boolean)
|
].filter(Boolean),
|
||||||
);
|
);
|
||||||
|
|
||||||
if (seen.has(k)) return false;
|
if (seen.has(k)) return false;
|
||||||
|
|
||||||
pending.links.push({ fromSku: f, toSku: t });
|
pending.links.push({ fromSku: f, toSku: t });
|
||||||
savePendingEdits(pending);
|
savePendingEdits(pending);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function addPendingIgnore(skuA, skuB) {
|
export function addPendingIgnore(skuA, skuB) {
|
||||||
const a = normSku(skuA);
|
const a = normSku(skuA);
|
||||||
const b = normSku(skuB);
|
const b = normSku(skuB);
|
||||||
const k = pairKey(a, b);
|
const k = pairKey(a, b);
|
||||||
if (!k) return false;
|
if (!k) return false;
|
||||||
|
|
||||||
const pending = loadPendingEdits();
|
const pending = loadPendingEdits();
|
||||||
const submitted = loadSubmittedEdits();
|
const submitted = loadSubmittedEdits();
|
||||||
|
|
||||||
const seen = new Set(
|
const seen = new Set(
|
||||||
[
|
[
|
||||||
...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)),
|
...pending.ignores.map((x) => pairKey(x.skuA, x.skuB)),
|
||||||
...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)),
|
...submitted.ignores.map((x) => pairKey(x.skuA, x.skuB)),
|
||||||
].filter(Boolean)
|
].filter(Boolean),
|
||||||
);
|
);
|
||||||
|
|
||||||
if (seen.has(k)) return false;
|
if (seen.has(k)) return false;
|
||||||
|
|
||||||
pending.ignores.push({ skuA: a, skuB: b });
|
pending.ignores.push({ skuA: a, skuB: b });
|
||||||
savePendingEdits(pending);
|
savePendingEdits(pending);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge PENDING + SUBMITTED into a meta object {links, ignores}
|
// Merge PENDING + SUBMITTED into a meta object {links, ignores}
|
||||||
export function applyPendingToMeta(meta) {
|
export function applyPendingToMeta(meta) {
|
||||||
const base = {
|
const base = {
|
||||||
generatedAt: String(meta?.generatedAt || ""),
|
generatedAt: String(meta?.generatedAt || ""),
|
||||||
links: Array.isArray(meta?.links) ? meta.links.slice() : [],
|
links: Array.isArray(meta?.links) ? meta.links.slice() : [],
|
||||||
ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [],
|
ignores: Array.isArray(meta?.ignores) ? meta.ignores.slice() : [],
|
||||||
};
|
};
|
||||||
|
|
||||||
const p0 = loadPendingEdits();
|
const p0 = loadPendingEdits();
|
||||||
const p1 = loadSubmittedEdits();
|
const p1 = loadSubmittedEdits();
|
||||||
const overlay = {
|
const overlay = {
|
||||||
links: [...(p0.links || []), ...(p1.links || [])],
|
links: [...(p0.links || []), ...(p1.links || [])],
|
||||||
ignores: [...(p0.ignores || []), ...(p1.ignores || [])],
|
ignores: [...(p0.ignores || []), ...(p1.ignores || [])],
|
||||||
};
|
};
|
||||||
|
|
||||||
// merge links (dedupe by from→to)
|
// merge links (dedupe by from→to)
|
||||||
const seenL = new Set(
|
const seenL = new Set(
|
||||||
base.links
|
base.links.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim())).filter(Boolean),
|
||||||
.map((x) => linkKey(String(x?.fromSku || "").trim(), String(x?.toSku || "").trim()))
|
);
|
||||||
.filter(Boolean)
|
for (const x of overlay.links) {
|
||||||
);
|
const k = linkKey(x.fromSku, x.toSku);
|
||||||
for (const x of overlay.links) {
|
if (!k || seenL.has(k)) continue;
|
||||||
const k = linkKey(x.fromSku, x.toSku);
|
seenL.add(k);
|
||||||
if (!k || seenL.has(k)) continue;
|
base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
|
||||||
seenL.add(k);
|
}
|
||||||
base.links.push({ fromSku: x.fromSku, toSku: x.toSku });
|
|
||||||
}
|
|
||||||
|
|
||||||
// merge ignores (dedupe by canonical pair key)
|
// merge ignores (dedupe by canonical pair key)
|
||||||
const seenI = new Set(
|
const seenI = new Set(
|
||||||
base.ignores
|
base.ignores
|
||||||
.map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim()))
|
.map((x) => pairKey(String(x?.skuA || x?.a || "").trim(), String(x?.skuB || x?.b || "").trim()))
|
||||||
.filter(Boolean)
|
.filter(Boolean),
|
||||||
);
|
);
|
||||||
for (const x of overlay.ignores) {
|
for (const x of overlay.ignores) {
|
||||||
const k = pairKey(x.skuA, x.skuB);
|
const k = pairKey(x.skuA, x.skuB);
|
||||||
if (!k || seenI.has(k)) continue;
|
if (!k || seenI.has(k)) continue;
|
||||||
seenI.add(k);
|
seenI.add(k);
|
||||||
base.ignores.push({ skuA: x.skuA, skuB: x.skuB });
|
base.ignores.push({ skuA: x.skuA, skuB: x.skuB });
|
||||||
}
|
}
|
||||||
|
|
||||||
return base;
|
return base;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move everything from pending -> submitted, then clear pending.
|
// Move everything from pending -> submitted, then clear pending.
|
||||||
// Returns the moved payload (what should be sent in PR/issue).
|
// Returns the moved payload (what should be sent in PR/issue).
|
||||||
export function movePendingToSubmitted() {
|
export function movePendingToSubmitted() {
|
||||||
const pending = loadPendingEdits();
|
const pending = loadPendingEdits();
|
||||||
if (!pending.links.length && !pending.ignores.length) return pending;
|
if (!pending.links.length && !pending.ignores.length) return pending;
|
||||||
|
|
||||||
const sub = loadSubmittedEdits();
|
const sub = loadSubmittedEdits();
|
||||||
|
|
||||||
const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean));
|
const seenL = new Set(sub.links.map((x) => linkKey(x.fromSku, x.toSku)).filter(Boolean));
|
||||||
for (const x of pending.links) {
|
for (const x of pending.links) {
|
||||||
const k = linkKey(x.fromSku, x.toSku);
|
const k = linkKey(x.fromSku, x.toSku);
|
||||||
if (!k || seenL.has(k)) continue;
|
if (!k || seenL.has(k)) continue;
|
||||||
seenL.add(k);
|
seenL.add(k);
|
||||||
sub.links.push({ fromSku: x.fromSku, toSku: x.toSku });
|
sub.links.push({ fromSku: x.fromSku, toSku: x.toSku });
|
||||||
}
|
}
|
||||||
|
|
||||||
const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean));
|
const seenI = new Set(sub.ignores.map((x) => pairKey(x.skuA, x.skuB)).filter(Boolean));
|
||||||
for (const x of pending.ignores) {
|
for (const x of pending.ignores) {
|
||||||
const k = pairKey(x.skuA, x.skuB);
|
const k = pairKey(x.skuA, x.skuB);
|
||||||
if (!k || seenI.has(k)) continue;
|
if (!k || seenI.has(k)) continue;
|
||||||
seenI.add(k);
|
seenI.add(k);
|
||||||
sub.ignores.push({ skuA: x.skuA, skuB: x.skuB });
|
sub.ignores.push({ skuA: x.skuA, skuB: x.skuB });
|
||||||
}
|
}
|
||||||
|
|
||||||
saveSubmittedEdits(sub);
|
saveSubmittedEdits(sub);
|
||||||
clearPendingEdits();
|
clearPendingEdits();
|
||||||
return pending;
|
return pending;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,12 @@
|
||||||
import { esc, renderThumbHtml, prettyTs } from "./dom.js";
|
import { esc, renderThumbHtml, prettyTs } from "./dom.js";
|
||||||
import {
|
import { tokenizeQuery, matchesAllTokens, displaySku, keySkuForRow, parsePriceToNumber } from "./sku.js";
|
||||||
tokenizeQuery,
|
|
||||||
matchesAllTokens,
|
|
||||||
displaySku,
|
|
||||||
keySkuForRow,
|
|
||||||
parsePriceToNumber,
|
|
||||||
} from "./sku.js";
|
|
||||||
import { loadIndex, loadRecent, loadSavedQuery, saveQuery } from "./state.js";
|
import { loadIndex, loadRecent, loadSavedQuery, saveQuery } from "./state.js";
|
||||||
import { aggregateBySku } from "./catalog.js";
|
import { aggregateBySku } from "./catalog.js";
|
||||||
import { loadSkuRules } from "./mapping.js";
|
import { loadSkuRules } from "./mapping.js";
|
||||||
import {
|
import { smwsDistilleryCodesForQueryPrefix, smwsDistilleryCodeFromName } from "./smws.js";
|
||||||
smwsDistilleryCodesForQueryPrefix,
|
|
||||||
smwsDistilleryCodeFromName,
|
|
||||||
} from "./smws.js";
|
|
||||||
|
|
||||||
export function renderSearch($app) {
|
export function renderSearch($app) {
|
||||||
$app.innerHTML = `
|
$app.innerHTML = `
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<!-- Row 1 -->
|
<!-- Row 1 -->
|
||||||
|
|
@ -50,123 +41,117 @@ export function renderSearch($app) {
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const $q = document.getElementById("q");
|
const $q = document.getElementById("q");
|
||||||
const $results = document.getElementById("results");
|
const $results = document.getElementById("results");
|
||||||
const $stores = document.getElementById("stores");
|
const $stores = document.getElementById("stores");
|
||||||
const $clearSearch = document.getElementById("clearSearch");
|
const $clearSearch = document.getElementById("clearSearch");
|
||||||
|
|
||||||
$q.value = loadSavedQuery();
|
$q.value = loadSavedQuery();
|
||||||
|
|
||||||
let aggBySku = new Map();
|
let aggBySku = new Map();
|
||||||
let allAgg = [];
|
let allAgg = [];
|
||||||
let indexReady = false;
|
let indexReady = false;
|
||||||
|
|
||||||
// canonicalSku -> storeLabel -> url
|
// canonicalSku -> storeLabel -> url
|
||||||
let URL_BY_SKU_STORE = new Map();
|
let URL_BY_SKU_STORE = new Map();
|
||||||
|
|
||||||
function buildUrlMap(listings, canonicalSkuFn) {
|
function buildUrlMap(listings, canonicalSkuFn) {
|
||||||
const out = new Map();
|
const out = new Map();
|
||||||
for (const r of Array.isArray(listings) ? listings : []) {
|
for (const r of Array.isArray(listings) ? listings : []) {
|
||||||
if (!r || r.removed) continue;
|
if (!r || r.removed) continue;
|
||||||
|
|
||||||
const skuKey = String(keySkuForRow(r) || "").trim();
|
const skuKey = String(keySkuForRow(r) || "").trim();
|
||||||
if (!skuKey) continue;
|
if (!skuKey) continue;
|
||||||
|
|
||||||
const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey);
|
const sku = String(canonicalSkuFn ? canonicalSkuFn(skuKey) : skuKey);
|
||||||
if (!sku) continue;
|
if (!sku) continue;
|
||||||
|
|
||||||
const storeLabel = String(r.storeLabel || r.store || "").trim();
|
const storeLabel = String(r.storeLabel || r.store || "").trim();
|
||||||
const url = String(r.url || "").trim();
|
const url = String(r.url || "").trim();
|
||||||
if (!storeLabel || !url) continue;
|
if (!storeLabel || !url) continue;
|
||||||
|
|
||||||
let m = out.get(sku);
|
let m = out.get(sku);
|
||||||
if (!m) out.set(sku, (m = new Map()));
|
if (!m) out.set(sku, (m = new Map()));
|
||||||
if (!m.has(storeLabel)) m.set(storeLabel, url);
|
if (!m.has(storeLabel)) m.set(storeLabel, url);
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function urlForAgg(it, storeLabel) {
|
function urlForAgg(it, storeLabel) {
|
||||||
const sku = String(it?.sku || "");
|
const sku = String(it?.sku || "");
|
||||||
const s = String(storeLabel || "");
|
const s = String(storeLabel || "");
|
||||||
return URL_BY_SKU_STORE.get(sku)?.get(s) || "";
|
return URL_BY_SKU_STORE.get(sku)?.get(s) || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function normStoreLabel(s) {
|
function normStoreLabel(s) {
|
||||||
return String(s || "").trim();
|
return String(s || "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderStoreButtons(listings) {
|
function renderStoreButtons(listings) {
|
||||||
// include all stores seen (live or removed) so the selector is stable
|
// include all stores seen (live or removed) so the selector is stable
|
||||||
const set = new Set();
|
const set = new Set();
|
||||||
for (const r of Array.isArray(listings) ? listings : []) {
|
for (const r of Array.isArray(listings) ? listings : []) {
|
||||||
const lab = normStoreLabel(r?.storeLabel || r?.store || "");
|
const lab = normStoreLabel(r?.storeLabel || r?.store || "");
|
||||||
if (lab) set.add(lab);
|
if (lab) set.add(lab);
|
||||||
}
|
}
|
||||||
const stores = Array.from(set).sort((a, b) => a.localeCompare(b));
|
const stores = Array.from(set).sort((a, b) => a.localeCompare(b));
|
||||||
|
|
||||||
if (!stores.length) {
|
if (!stores.length) {
|
||||||
$stores.innerHTML = "";
|
$stores.innerHTML = "";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const totalChars = stores.reduce((n, s) => n + s.length, 0);
|
const totalChars = stores.reduce((n, s) => n + s.length, 0);
|
||||||
const target = totalChars / 2;
|
const target = totalChars / 2;
|
||||||
|
|
||||||
let acc = 0;
|
let acc = 0;
|
||||||
let breakAt = stores.length;
|
let breakAt = stores.length;
|
||||||
|
|
||||||
for (let i = 0; i < stores.length; i++) {
|
for (let i = 0; i < stores.length; i++) {
|
||||||
acc += stores[i].length;
|
acc += stores[i].length;
|
||||||
if (acc >= target) {
|
if (acc >= target) {
|
||||||
breakAt = i + 1;
|
breakAt = i + 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$stores.innerHTML = stores
|
$stores.innerHTML = stores
|
||||||
.map((s, i) => {
|
.map((s, i) => {
|
||||||
const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent(
|
const btn = `<a class="storeBtn" href="#/store/${encodeURIComponent(s)}">${esc(s)}</a>`;
|
||||||
s
|
const brk =
|
||||||
)}">${esc(s)}</a>`;
|
i === breakAt - 1 && stores.length > 1 ? `<span class="storeBreak" aria-hidden="true"></span>` : "";
|
||||||
const brk =
|
return btn + brk;
|
||||||
i === breakAt - 1 && stores.length > 1
|
})
|
||||||
? `<span class="storeBreak" aria-hidden="true"></span>`
|
.join("");
|
||||||
: "";
|
}
|
||||||
return btn + brk;
|
|
||||||
})
|
|
||||||
.join("");
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderAggregates(items) {
|
function renderAggregates(items) {
|
||||||
if (!items.length) {
|
if (!items.length) {
|
||||||
$results.innerHTML = `<div class="small">No matches.</div>`;
|
$results.innerHTML = `<div class="small">No matches.</div>`;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const limited = items.slice(0, 80);
|
const limited = items.slice(0, 80);
|
||||||
$results.innerHTML = limited
|
$results.innerHTML = limited
|
||||||
.map((it) => {
|
.map((it) => {
|
||||||
const storeCount = it.stores.size || 0;
|
const storeCount = it.stores.size || 0;
|
||||||
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
|
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
|
||||||
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
|
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
|
||||||
const store = it.cheapestStoreLabel || [...it.stores][0] || "Store";
|
const store = it.cheapestStoreLabel || [...it.stores][0] || "Store";
|
||||||
|
|
||||||
// link must match the displayed store label
|
// link must match the displayed store label
|
||||||
const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim();
|
const href = urlForAgg(it, store) || String(it.sampleUrl || "").trim();
|
||||||
const storeBadge = href
|
const storeBadge = href
|
||||||
? `<a class="badge" href="${esc(
|
? `<a class="badge" href="${esc(
|
||||||
href
|
href,
|
||||||
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
||||||
store
|
store,
|
||||||
)}${esc(plus)}</a>`
|
)}${esc(plus)}</a>`
|
||||||
: `<span class="badge">${esc(store)}${esc(plus)}</span>`;
|
: `<span class="badge">${esc(store)}${esc(plus)}</span>`;
|
||||||
|
|
||||||
const skuLink = `#/link/?left=${encodeURIComponent(
|
const skuLink = `#/link/?left=${encodeURIComponent(String(it.sku || ""))}`;
|
||||||
String(it.sku || "")
|
|
||||||
)}`;
|
|
||||||
|
|
||||||
return `
|
return `
|
||||||
<div class="item" data-sku="${esc(it.sku)}">
|
<div class="item" data-sku="${esc(it.sku)}">
|
||||||
<div class="itemRow">
|
<div class="itemRow">
|
||||||
<div class="thumbBox">
|
<div class="thumbBox">
|
||||||
|
|
@ -176,10 +161,10 @@ export function renderSearch($app) {
|
||||||
<div class="itemTop">
|
<div class="itemTop">
|
||||||
<div class="itemName">${esc(it.name || "(no name)")}</div>
|
<div class="itemName">${esc(it.name || "(no name)")}</div>
|
||||||
<a class="badge mono skuLink" href="${esc(
|
<a class="badge mono skuLink" href="${esc(
|
||||||
skuLink
|
skuLink,
|
||||||
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
||||||
displaySku(it.sku)
|
displaySku(it.sku),
|
||||||
)}</a>
|
)}</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="metaRow">
|
<div class="metaRow">
|
||||||
<span class="mono price">${esc(price)}</span>
|
<span class="mono price">${esc(price)}</span>
|
||||||
|
|
@ -189,299 +174,280 @@ export function renderSearch($app) {
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
})
|
})
|
||||||
.join("");
|
.join("");
|
||||||
|
|
||||||
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
||||||
el.addEventListener("click", () => {
|
el.addEventListener("click", () => {
|
||||||
const sku = el.getAttribute("data-sku") || "";
|
const sku = el.getAttribute("data-sku") || "";
|
||||||
if (!sku) return;
|
if (!sku) return;
|
||||||
saveQuery($q.value);
|
saveQuery($q.value);
|
||||||
sessionStorage.setItem("viz:lastRoute", location.hash);
|
sessionStorage.setItem("viz:lastRoute", location.hash);
|
||||||
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function salePctOff(oldRaw, newRaw) {
|
function salePctOff(oldRaw, newRaw) {
|
||||||
const oldN = parsePriceToNumber(oldRaw);
|
const oldN = parsePriceToNumber(oldRaw);
|
||||||
const newN = parsePriceToNumber(newRaw);
|
const newN = parsePriceToNumber(newRaw);
|
||||||
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
|
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
|
||||||
if (!(oldN > 0)) return null;
|
if (!(oldN > 0)) return null;
|
||||||
if (!(newN < oldN)) return null;
|
if (!(newN < oldN)) return null;
|
||||||
const pct = Math.round(((oldN - newN) / oldN) * 100);
|
const pct = Math.round(((oldN - newN) / oldN) * 100);
|
||||||
return Number.isFinite(pct) && pct > 0 ? pct : null;
|
return Number.isFinite(pct) && pct > 0 ? pct : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pctChange(oldRaw, newRaw) {
|
function pctChange(oldRaw, newRaw) {
|
||||||
const oldN = parsePriceToNumber(oldRaw);
|
const oldN = parsePriceToNumber(oldRaw);
|
||||||
const newN = parsePriceToNumber(newRaw);
|
const newN = parsePriceToNumber(newRaw);
|
||||||
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
|
if (!Number.isFinite(oldN) || !Number.isFinite(newN)) return null;
|
||||||
if (!(oldN > 0)) return null;
|
if (!(oldN > 0)) return null;
|
||||||
const pct = Math.round(((newN - oldN) / oldN) * 100);
|
const pct = Math.round(((newN - oldN) / oldN) * 100);
|
||||||
return Number.isFinite(pct) ? pct : null;
|
return Number.isFinite(pct) ? pct : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function tsValue(r) {
|
function tsValue(r) {
|
||||||
const t = String(r?.ts || "");
|
const t = String(r?.ts || "");
|
||||||
const ms = t ? Date.parse(t) : NaN;
|
const ms = t ? Date.parse(t) : NaN;
|
||||||
if (Number.isFinite(ms)) return ms;
|
if (Number.isFinite(ms)) return ms;
|
||||||
const d = String(r?.date || "");
|
const d = String(r?.date || "");
|
||||||
const ms2 = d ? Date.parse(d) : NaN;
|
const ms2 = d ? Date.parse(d) : NaN;
|
||||||
return Number.isFinite(ms2) ? ms2 : 0;
|
return Number.isFinite(ms2) ? ms2 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Custom priority (unchanged)
|
// Custom priority (unchanged)
|
||||||
function rankRecent(r, canonSkuFn) {
|
function rankRecent(r, canonSkuFn) {
|
||||||
const rawSku = String(r?.sku || "");
|
const rawSku = String(r?.sku || "");
|
||||||
const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku);
|
const sku = String(canonSkuFn ? canonSkuFn(rawSku) : rawSku);
|
||||||
|
|
||||||
const agg = aggBySku.get(sku) || null;
|
const agg = aggBySku.get(sku) || null;
|
||||||
|
|
||||||
const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim();
|
const storeLabelRaw = String(r?.storeLabel || r?.store || "").trim();
|
||||||
const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim();
|
const bestStoreRaw = String(agg?.cheapestStoreLabel || "").trim();
|
||||||
|
|
||||||
const normStore = (s) => String(s || "").trim().toLowerCase();
|
const normStore = (s) =>
|
||||||
|
String(s || "")
|
||||||
|
.trim()
|
||||||
|
.toLowerCase();
|
||||||
|
|
||||||
// Normalize kind
|
// Normalize kind
|
||||||
let kind = String(r?.kind || "");
|
let kind = String(r?.kind || "");
|
||||||
if (kind === "price_change") {
|
if (kind === "price_change") {
|
||||||
const o = parsePriceToNumber(r?.oldPrice || "");
|
const o = parsePriceToNumber(r?.oldPrice || "");
|
||||||
const n = parsePriceToNumber(r?.newPrice || "");
|
const n = parsePriceToNumber(r?.newPrice || "");
|
||||||
if (Number.isFinite(o) && Number.isFinite(n)) {
|
if (Number.isFinite(o) && Number.isFinite(n)) {
|
||||||
if (n < o) kind = "price_down";
|
if (n < o) kind = "price_down";
|
||||||
else if (n > o) kind = "price_up";
|
else if (n > o) kind = "price_up";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const pctOff =
|
const pctOff = kind === "price_down" ? salePctOff(r?.oldPrice || "", r?.newPrice || "") : null;
|
||||||
kind === "price_down"
|
const pctUp = kind === "price_up" ? pctChange(r?.oldPrice || "", r?.newPrice || "") : null;
|
||||||
? salePctOff(r?.oldPrice || "", r?.newPrice || "")
|
|
||||||
: null;
|
|
||||||
const pctUp =
|
|
||||||
kind === "price_up"
|
|
||||||
? pctChange(r?.oldPrice || "", r?.newPrice || "")
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const isNew = kind === "new";
|
const isNew = kind === "new";
|
||||||
const storeCount = agg?.stores?.size || 0;
|
const storeCount = agg?.stores?.size || 0;
|
||||||
const isNewUnique = isNew && storeCount <= 1;
|
const isNewUnique = isNew && storeCount <= 1;
|
||||||
|
|
||||||
// Cheapest checks (use aggregate index)
|
// Cheapest checks (use aggregate index)
|
||||||
const newPriceNum =
|
const newPriceNum = kind === "price_down" || kind === "price_up" ? parsePriceToNumber(r?.newPrice || "") : null;
|
||||||
kind === "price_down" || kind === "price_up"
|
const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum) ? agg.cheapestPriceNum : null;
|
||||||
? parsePriceToNumber(r?.newPrice || "")
|
|
||||||
: null;
|
|
||||||
const bestPriceNum = Number.isFinite(agg?.cheapestPriceNum)
|
|
||||||
? agg.cheapestPriceNum
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const EPS = 0.01;
|
const EPS = 0.01;
|
||||||
const priceMatchesBest =
|
const priceMatchesBest =
|
||||||
Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum)
|
Number.isFinite(newPriceNum) && Number.isFinite(bestPriceNum)
|
||||||
? Math.abs(newPriceNum - bestPriceNum) <= EPS
|
? Math.abs(newPriceNum - bestPriceNum) <= EPS
|
||||||
: false;
|
: false;
|
||||||
|
|
||||||
const storeIsBest =
|
const storeIsBest =
|
||||||
normStore(storeLabelRaw) &&
|
normStore(storeLabelRaw) && normStore(bestStoreRaw) && normStore(storeLabelRaw) === normStore(bestStoreRaw);
|
||||||
normStore(bestStoreRaw) &&
|
|
||||||
normStore(storeLabelRaw) === normStore(bestStoreRaw);
|
|
||||||
|
|
||||||
const saleIsCheapestHere =
|
const saleIsCheapestHere = kind === "price_down" && storeIsBest && priceMatchesBest;
|
||||||
kind === "price_down" && storeIsBest && priceMatchesBest;
|
const saleIsTiedCheapest = kind === "price_down" && !storeIsBest && priceMatchesBest;
|
||||||
const saleIsTiedCheapest =
|
const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
|
||||||
kind === "price_down" && !storeIsBest && priceMatchesBest;
|
|
||||||
const saleIsCheapest = saleIsCheapestHere || saleIsTiedCheapest;
|
|
||||||
|
|
||||||
// Bucketed scoring (higher = earlier)
|
// Bucketed scoring (higher = earlier)
|
||||||
let score = 0;
|
let score = 0;
|
||||||
|
|
||||||
function saleBucketScore(isCheapest, pct) {
|
function saleBucketScore(isCheapest, pct) {
|
||||||
const p = Number.isFinite(pct) ? pct : 0;
|
const p = Number.isFinite(pct) ? pct : 0;
|
||||||
|
|
||||||
if (isCheapest) {
|
if (isCheapest) {
|
||||||
if (p >= 20) return 9000 + p;
|
if (p >= 20) return 9000 + p;
|
||||||
if (p >= 10) return 7000 + p;
|
if (p >= 10) return 7000 + p;
|
||||||
if (p > 0) return 6000 + p;
|
if (p > 0) return 6000 + p;
|
||||||
return 5900;
|
return 5900;
|
||||||
} else {
|
} else {
|
||||||
if (p >= 20) return 4500 + p;
|
if (p >= 20) return 4500 + p;
|
||||||
if (p >= 10) return 1500 + p;
|
if (p >= 10) return 1500 + p;
|
||||||
if (p > 0) return 1200 + p;
|
if (p > 0) return 1200 + p;
|
||||||
return 1000;
|
return 1000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kind === "price_down") {
|
if (kind === "price_down") {
|
||||||
score = saleBucketScore(saleIsCheapest, pctOff);
|
score = saleBucketScore(saleIsCheapest, pctOff);
|
||||||
} else if (isNewUnique) {
|
} else if (isNewUnique) {
|
||||||
score = 8000;
|
score = 8000;
|
||||||
} else if (kind === "removed") {
|
} else if (kind === "removed") {
|
||||||
score = 3000;
|
score = 3000;
|
||||||
} else if (kind === "price_up") {
|
} else if (kind === "price_up") {
|
||||||
score = 2000 + Math.min(99, Math.max(0, pctUp || 0));
|
score = 2000 + Math.min(99, Math.max(0, pctUp || 0));
|
||||||
} else if (kind === "new") {
|
} else if (kind === "new") {
|
||||||
score = 1100;
|
score = 1100;
|
||||||
} else if (kind === "restored") {
|
} else if (kind === "restored") {
|
||||||
score = 5000;
|
score = 5000;
|
||||||
} else {
|
} else {
|
||||||
score = 0;
|
score = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let tie = 0;
|
let tie = 0;
|
||||||
if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r);
|
if (kind === "price_down") tie = (pctOff || 0) * 100000 + tsValue(r);
|
||||||
else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r);
|
else if (kind === "price_up") tie = (pctUp || 0) * 100000 + tsValue(r);
|
||||||
else tie = tsValue(r);
|
else tie = tsValue(r);
|
||||||
|
|
||||||
return { sku, kind, pctOff, storeCount, isNewUnique, score, tie };
|
return { sku, kind, pctOff, storeCount, isNewUnique, score, tie };
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderRecent(recent, canonicalSkuFn) {
|
function renderRecent(recent, canonicalSkuFn) {
|
||||||
const items = Array.isArray(recent?.items) ? recent.items : [];
|
const items = Array.isArray(recent?.items) ? recent.items : [];
|
||||||
if (!items.length) {
|
if (!items.length) {
|
||||||
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const canon =
|
const canon = typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
|
||||||
typeof canonicalSkuFn === "function" ? canonicalSkuFn : (x) => x;
|
|
||||||
|
|
||||||
const nowMs = Date.now();
|
const nowMs = Date.now();
|
||||||
const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000;
|
const cutoffMs = nowMs - 3 * 24 * 60 * 60 * 1000;
|
||||||
|
|
||||||
function eventMs(r) {
|
function eventMs(r) {
|
||||||
const t = String(r?.ts || "");
|
const t = String(r?.ts || "");
|
||||||
const ms = t ? Date.parse(t) : NaN;
|
const ms = t ? Date.parse(t) : NaN;
|
||||||
if (Number.isFinite(ms)) return ms;
|
if (Number.isFinite(ms)) return ms;
|
||||||
|
|
||||||
const d = String(r?.date || "");
|
const d = String(r?.date || "");
|
||||||
const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN;
|
const ms2 = d ? Date.parse(d + "T00:00:00Z") : NaN;
|
||||||
return Number.isFinite(ms2) ? ms2 : 0;
|
return Number.isFinite(ms2) ? ms2 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const inWindow = items.filter((r) => {
|
const inWindow = items.filter((r) => {
|
||||||
const ms = eventMs(r);
|
const ms = eventMs(r);
|
||||||
return ms >= cutoffMs && ms <= nowMs;
|
return ms >= cutoffMs && ms <= nowMs;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!inWindow.length) {
|
if (!inWindow.length) {
|
||||||
$results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`;
|
$results.innerHTML = `<div class="small">No changes in the last 3 days.</div>`;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const bySkuStore = new Map();
|
const bySkuStore = new Map();
|
||||||
|
|
||||||
for (const r of inWindow) {
|
for (const r of inWindow) {
|
||||||
const rawSku = String(r?.sku || "").trim();
|
const rawSku = String(r?.sku || "").trim();
|
||||||
if (!rawSku) continue;
|
if (!rawSku) continue;
|
||||||
|
|
||||||
const sku = String(canon(rawSku) || "").trim();
|
const sku = String(canon(rawSku) || "").trim();
|
||||||
if (!sku) continue;
|
if (!sku) continue;
|
||||||
|
|
||||||
const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store";
|
const storeLabel = String(r?.storeLabel || r?.store || "Store").trim() || "Store";
|
||||||
const ms = eventMs(r);
|
const ms = eventMs(r);
|
||||||
|
|
||||||
let storeMap = bySkuStore.get(sku);
|
let storeMap = bySkuStore.get(sku);
|
||||||
if (!storeMap) bySkuStore.set(sku, (storeMap = new Map()));
|
if (!storeMap) bySkuStore.set(sku, (storeMap = new Map()));
|
||||||
|
|
||||||
const prev = storeMap.get(storeLabel);
|
const prev = storeMap.get(storeLabel);
|
||||||
if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r);
|
if (!prev || eventMs(prev) < ms) storeMap.set(storeLabel, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
const picked = [];
|
const picked = [];
|
||||||
for (const [sku, storeMap] of bySkuStore.entries()) {
|
for (const [sku, storeMap] of bySkuStore.entries()) {
|
||||||
let best = null;
|
let best = null;
|
||||||
|
|
||||||
for (const r of storeMap.values()) {
|
for (const r of storeMap.values()) {
|
||||||
const meta = rankRecent(r, canon);
|
const meta = rankRecent(r, canon);
|
||||||
const ms = eventMs(r);
|
const ms = eventMs(r);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
!best ||
|
!best ||
|
||||||
meta.score > best.meta.score ||
|
meta.score > best.meta.score ||
|
||||||
(meta.score === best.meta.score && meta.tie > best.meta.tie) ||
|
(meta.score === best.meta.score && meta.tie > best.meta.tie) ||
|
||||||
(meta.score === best.meta.score &&
|
(meta.score === best.meta.score && meta.tie === best.meta.tie && ms > best.ms)
|
||||||
meta.tie === best.meta.tie &&
|
) {
|
||||||
ms > best.ms)
|
best = { r, meta, ms };
|
||||||
) {
|
}
|
||||||
best = { r, meta, ms };
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (best) picked.push(best);
|
if (best) picked.push(best);
|
||||||
}
|
}
|
||||||
|
|
||||||
const ranked = picked.sort((a, b) => {
|
const ranked = picked.sort((a, b) => {
|
||||||
if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score;
|
if (b.meta.score !== a.meta.score) return b.meta.score - a.meta.score;
|
||||||
if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie;
|
if (b.meta.tie !== a.meta.tie) return b.meta.tie - a.meta.tie;
|
||||||
return String(a.meta.sku || "").localeCompare(String(b.meta.sku || ""));
|
return String(a.meta.sku || "").localeCompare(String(b.meta.sku || ""));
|
||||||
});
|
});
|
||||||
|
|
||||||
const limited = ranked.slice(0, 140);
|
const limited = ranked.slice(0, 140);
|
||||||
|
|
||||||
$results.innerHTML =
|
$results.innerHTML =
|
||||||
`<div class="small">Recently changed (last 3 days):</div>` +
|
`<div class="small">Recently changed (last 3 days):</div>` +
|
||||||
limited
|
limited
|
||||||
.map(({ r, meta }) => {
|
.map(({ r, meta }) => {
|
||||||
const kindLabel =
|
const kindLabel =
|
||||||
meta.kind === "new"
|
meta.kind === "new"
|
||||||
? "NEW"
|
? "NEW"
|
||||||
: meta.kind === "restored"
|
: meta.kind === "restored"
|
||||||
? "RESTORED"
|
? "RESTORED"
|
||||||
: meta.kind === "removed"
|
: meta.kind === "removed"
|
||||||
? "REMOVED"
|
? "REMOVED"
|
||||||
: meta.kind === "price_down"
|
: meta.kind === "price_down"
|
||||||
? "PRICE ↓"
|
? "PRICE ↓"
|
||||||
: meta.kind === "price_up"
|
: meta.kind === "price_up"
|
||||||
? "PRICE ↑"
|
? "PRICE ↑"
|
||||||
: meta.kind === "price_change"
|
: meta.kind === "price_change"
|
||||||
? "PRICE"
|
? "PRICE"
|
||||||
: "CHANGE";
|
: "CHANGE";
|
||||||
|
|
||||||
const priceLine =
|
const priceLine =
|
||||||
meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed"
|
meta.kind === "new" || meta.kind === "restored" || meta.kind === "removed"
|
||||||
? `${esc(r.price || "")}`
|
? `${esc(r.price || "")}`
|
||||||
: `${esc(r.oldPrice || "")} → ${esc(r.newPrice || "")}`;
|
: `${esc(r.oldPrice || "")} → ${esc(r.newPrice || "")}`;
|
||||||
|
|
||||||
const when = r.ts ? prettyTs(r.ts) : r.date || "";
|
const when = r.ts ? prettyTs(r.ts) : r.date || "";
|
||||||
|
|
||||||
const sku = meta.sku; // canonical SKU
|
const sku = meta.sku; // canonical SKU
|
||||||
const agg = aggBySku.get(sku) || null;
|
const agg = aggBySku.get(sku) || null;
|
||||||
const img = agg?.img || "";
|
const img = agg?.img || "";
|
||||||
|
|
||||||
const storeCount = agg?.stores?.size || 0;
|
const storeCount = agg?.stores?.size || 0;
|
||||||
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
|
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
|
||||||
|
|
||||||
const href = String(r.url || "").trim();
|
const href = String(r.url || "").trim();
|
||||||
const storeBadge = href
|
const storeBadge = href
|
||||||
? `<a class="badge" href="${esc(
|
? `<a class="badge" href="${esc(
|
||||||
href
|
href,
|
||||||
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
||||||
(r.storeLabel || r.store || "") + plus
|
(r.storeLabel || r.store || "") + plus,
|
||||||
)}</a>`
|
)}</a>`
|
||||||
: `<span class="badge">${esc(
|
: `<span class="badge">${esc((r.storeLabel || r.store || "") + plus)}</span>`;
|
||||||
(r.storeLabel || r.store || "") + plus
|
|
||||||
)}</span>`;
|
|
||||||
|
|
||||||
const dateBadge = when
|
const dateBadge = when ? `<span class="badge mono">${esc(when)}</span>` : "";
|
||||||
? `<span class="badge mono">${esc(when)}</span>`
|
|
||||||
: "";
|
|
||||||
|
|
||||||
const offBadge =
|
const offBadge =
|
||||||
meta.kind === "price_down" && meta.pctOff !== null
|
meta.kind === "price_down" && meta.pctOff !== null
|
||||||
? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc(
|
? `<span class="badge" style="margin-left:6px; color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);">[${esc(
|
||||||
meta.pctOff
|
meta.pctOff,
|
||||||
)}% Off]</span>`
|
)}% Off]</span>`
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
const kindBadgeStyle =
|
const kindBadgeStyle =
|
||||||
meta.kind === "new" && meta.isNewUnique
|
meta.kind === "new" && meta.isNewUnique
|
||||||
? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"`
|
? ` style="color:rgba(20,110,40,0.95); background:rgba(20,110,40,0.10); border:1px solid rgba(20,110,40,0.20);"`
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`;
|
const skuLink = `#/link/?left=${encodeURIComponent(String(sku || ""))}`;
|
||||||
|
|
||||||
return `
|
return `
|
||||||
<div class="item" data-sku="${esc(sku)}">
|
<div class="item" data-sku="${esc(sku)}">
|
||||||
<div class="itemRow">
|
<div class="itemRow">
|
||||||
<div class="thumbBox">
|
<div class="thumbBox">
|
||||||
|
|
@ -491,10 +457,10 @@ export function renderSearch($app) {
|
||||||
<div class="itemTop">
|
<div class="itemTop">
|
||||||
<div class="itemName">${esc(r.name || "(no name)")}</div>
|
<div class="itemName">${esc(r.name || "(no name)")}</div>
|
||||||
<a class="badge mono skuLink" href="${esc(
|
<a class="badge mono skuLink" href="${esc(
|
||||||
skuLink
|
skuLink,
|
||||||
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
)}" target="_blank" rel="noopener noreferrer" onclick="event.stopPropagation()">${esc(
|
||||||
displaySku(sku)
|
displaySku(sku),
|
||||||
)}</a>
|
)}</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="metaRow">
|
<div class="metaRow">
|
||||||
<span class="badge"${kindBadgeStyle}>${esc(kindLabel)}</span>
|
<span class="badge"${kindBadgeStyle}>${esc(kindLabel)}</span>
|
||||||
|
|
@ -507,113 +473,103 @@ export function renderSearch($app) {
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
})
|
})
|
||||||
.join("");
|
.join("");
|
||||||
|
|
||||||
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
||||||
el.addEventListener("click", () => {
|
el.addEventListener("click", () => {
|
||||||
const sku = el.getAttribute("data-sku") || "";
|
const sku = el.getAttribute("data-sku") || "";
|
||||||
if (!sku) return;
|
if (!sku) return;
|
||||||
saveQuery($q.value);
|
saveQuery($q.value);
|
||||||
sessionStorage.setItem("viz:lastRoute", location.hash);
|
sessionStorage.setItem("viz:lastRoute", location.hash);
|
||||||
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function applySearch() {
|
function applySearch() {
|
||||||
if (!indexReady) return;
|
if (!indexReady) return;
|
||||||
|
|
||||||
const tokens = tokenizeQuery($q.value);
|
const tokens = tokenizeQuery($q.value);
|
||||||
if (!tokens.length) return;
|
if (!tokens.length) return;
|
||||||
|
|
||||||
const matches = allAgg.filter((it) =>
|
const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens));
|
||||||
matchesAllTokens(it.searchText, tokens)
|
|
||||||
);
|
|
||||||
|
|
||||||
const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value));
|
const wantCodes = new Set(smwsDistilleryCodesForQueryPrefix($q.value));
|
||||||
if (!wantCodes.size) {
|
if (!wantCodes.size) {
|
||||||
renderAggregates(matches);
|
renderAggregates(matches);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const seen = new Set(matches.map((it) => String(it?.sku || "")));
|
const seen = new Set(matches.map((it) => String(it?.sku || "")));
|
||||||
const extra = [];
|
const extra = [];
|
||||||
for (const it of allAgg) {
|
for (const it of allAgg) {
|
||||||
const sku = String(it?.sku || "");
|
const sku = String(it?.sku || "");
|
||||||
if (!sku || seen.has(sku)) continue;
|
if (!sku || seen.has(sku)) continue;
|
||||||
const dCode = smwsDistilleryCodeFromName(it?.name || "");
|
const dCode = smwsDistilleryCodeFromName(it?.name || "");
|
||||||
if (dCode && wantCodes.has(String(dCode))) {
|
if (dCode && wantCodes.has(String(dCode))) {
|
||||||
extra.push(it);
|
extra.push(it);
|
||||||
seen.add(sku);
|
seen.add(sku);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
renderAggregates([...extra, ...matches]);
|
renderAggregates([...extra, ...matches]);
|
||||||
}
|
}
|
||||||
|
|
||||||
$results.innerHTML = `<div class="small">Loading index…</div>`;
|
$results.innerHTML = `<div class="small">Loading index…</div>`;
|
||||||
|
|
||||||
Promise.all([loadIndex(), loadSkuRules()])
|
Promise.all([loadIndex(), loadSkuRules()])
|
||||||
.then(([idx, rules]) => {
|
.then(([idx, rules]) => {
|
||||||
const listings = Array.isArray(idx.items) ? idx.items : [];
|
const listings = Array.isArray(idx.items) ? idx.items : [];
|
||||||
|
|
||||||
renderStoreButtons(listings);
|
renderStoreButtons(listings);
|
||||||
|
|
||||||
allAgg = aggregateBySku(listings, rules.canonicalSku);
|
allAgg = aggregateBySku(listings, rules.canonicalSku);
|
||||||
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
|
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
|
||||||
URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku);
|
URL_BY_SKU_STORE = buildUrlMap(listings, rules.canonicalSku);
|
||||||
|
|
||||||
indexReady = true;
|
indexReady = true;
|
||||||
$q.focus();
|
$q.focus();
|
||||||
|
|
||||||
const tokens = tokenizeQuery($q.value);
|
const tokens = tokenizeQuery($q.value);
|
||||||
if (tokens.length) {
|
if (tokens.length) {
|
||||||
applySearch();
|
applySearch();
|
||||||
} else {
|
} else {
|
||||||
return loadRecent().then((recent) =>
|
return loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku));
|
||||||
renderRecent(recent, rules.canonicalSku)
|
}
|
||||||
);
|
})
|
||||||
}
|
.catch((e) => {
|
||||||
})
|
$results.innerHTML = `<div class="small">Failed to load: ${esc(e.message)}</div>`;
|
||||||
.catch((e) => {
|
});
|
||||||
$results.innerHTML = `<div class="small">Failed to load: ${esc(
|
|
||||||
e.message
|
|
||||||
)}</div>`;
|
|
||||||
});
|
|
||||||
|
|
||||||
$clearSearch.addEventListener("click", () => {
|
$clearSearch.addEventListener("click", () => {
|
||||||
if ($q.value) {
|
if ($q.value) {
|
||||||
$q.value = "";
|
$q.value = "";
|
||||||
saveQuery("");
|
saveQuery("");
|
||||||
}
|
}
|
||||||
loadSkuRules()
|
loadSkuRules()
|
||||||
.then((rules) =>
|
.then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
|
||||||
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku))
|
.catch(() => {
|
||||||
)
|
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
||||||
.catch(() => {
|
});
|
||||||
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
$q.focus();
|
||||||
});
|
});
|
||||||
$q.focus();
|
|
||||||
});
|
|
||||||
|
|
||||||
let t = null;
|
let t = null;
|
||||||
$q.addEventListener("input", () => {
|
$q.addEventListener("input", () => {
|
||||||
saveQuery($q.value);
|
saveQuery($q.value);
|
||||||
if (t) clearTimeout(t);
|
if (t) clearTimeout(t);
|
||||||
t = setTimeout(() => {
|
t = setTimeout(() => {
|
||||||
const tokens = tokenizeQuery($q.value);
|
const tokens = tokenizeQuery($q.value);
|
||||||
if (!tokens.length) {
|
if (!tokens.length) {
|
||||||
loadSkuRules()
|
loadSkuRules()
|
||||||
.then((rules) =>
|
.then((rules) => loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku)))
|
||||||
loadRecent().then((recent) => renderRecent(recent, rules.canonicalSku))
|
.catch(() => {
|
||||||
)
|
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
||||||
.catch(() => {
|
});
|
||||||
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
return;
|
||||||
});
|
}
|
||||||
return;
|
applySearch();
|
||||||
}
|
}, 50);
|
||||||
applySearch();
|
});
|
||||||
}, 50);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
101
viz/app/sku.js
101
viz/app/sku.js
|
|
@ -1,60 +1,59 @@
|
||||||
export function parsePriceToNumber(v) {
|
export function parsePriceToNumber(v) {
|
||||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||||
const n = Number(s);
|
const n = Number(s);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function fnv1a32(str) {
|
export function fnv1a32(str) {
|
||||||
let h = 0x811c9dc5;
|
let h = 0x811c9dc5;
|
||||||
for (let i = 0; i < str.length; i++) {
|
for (let i = 0; i < str.length; i++) {
|
||||||
h ^= str.charCodeAt(i);
|
h ^= str.charCodeAt(i);
|
||||||
h = Math.imul(h, 0x01000193);
|
h = Math.imul(h, 0x01000193);
|
||||||
}
|
}
|
||||||
return (h >>> 0).toString(16).padStart(8, "0");
|
return (h >>> 0).toString(16).padStart(8, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
export function makeSyntheticSku(r) {
|
export function makeSyntheticSku(r) {
|
||||||
const store = String(r?.storeLabel || r?.store || "store");
|
const store = String(r?.storeLabel || r?.store || "store");
|
||||||
const url = String(r?.url || "");
|
const url = String(r?.url || "");
|
||||||
const key = `${store}|${url}`;
|
const key = `${store}|${url}`;
|
||||||
return `u:${fnv1a32(key)}`;
|
return `u:${fnv1a32(key)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function keySkuForRow(r) {
|
export function keySkuForRow(r) {
|
||||||
const real0 = String(r?.sku || "").trim();
|
const real0 = String(r?.sku || "").trim();
|
||||||
if (real0) {
|
if (real0) {
|
||||||
const m = real0.match(/^id:(\d{1,6})$/i);
|
const m = real0.match(/^id:(\d{1,6})$/i);
|
||||||
return m ? String(m[1]).padStart(6, "0") : real0;
|
return m ? String(m[1]).padStart(6, "0") : real0;
|
||||||
}
|
}
|
||||||
return makeSyntheticSku(r);
|
return makeSyntheticSku(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function displaySku(key) {
|
export function displaySku(key) {
|
||||||
const s = String(key || "");
|
const s = String(key || "");
|
||||||
return s.startsWith("u:") ? "unknown" : s;
|
return s.startsWith("u:") ? "unknown" : s;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isUnknownSkuKey(key) {
|
export function isUnknownSkuKey(key) {
|
||||||
return String(key || "").startsWith("u:");
|
return String(key || "").startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize for search: lowercase, punctuation -> space, collapse spaces
|
// Normalize for search: lowercase, punctuation -> space, collapse spaces
|
||||||
export function normSearchText(s) {
|
export function normSearchText(s) {
|
||||||
return String(s ?? "")
|
return String(s ?? "")
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.replace(/[^a-z0-9]+/g, " ")
|
.replace(/[^a-z0-9]+/g, " ")
|
||||||
.replace(/\s+/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
export function tokenizeQuery(q) {
|
export function tokenizeQuery(q) {
|
||||||
const n = normSearchText(q);
|
const n = normSearchText(q);
|
||||||
return n ? n.split(" ").filter(Boolean) : [];
|
return n ? n.split(" ").filter(Boolean) : [];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function matchesAllTokens(hayNorm, tokens) {
|
|
||||||
if (!tokens.length) return true;
|
|
||||||
for (const t of tokens) if (!hayNorm.includes(t)) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
export function matchesAllTokens(hayNorm, tokens) {
|
||||||
|
if (!tokens.length) return true;
|
||||||
|
for (const t of tokens) if (!hayNorm.includes(t)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
||||||
356
viz/app/smws.js
356
viz/app/smws.js
|
|
@ -2,196 +2,196 @@
|
||||||
import { normSearchText } from "./sku.js";
|
import { normSearchText } from "./sku.js";
|
||||||
|
|
||||||
const DISTILLERIES = [
|
const DISTILLERIES = [
|
||||||
{ code: "1", name: "Glenfarclas" },
|
{ code: "1", name: "Glenfarclas" },
|
||||||
{ code: "2", name: "Glenlivet" },
|
{ code: "2", name: "Glenlivet" },
|
||||||
{ code: "3", name: "Bowmore" },
|
{ code: "3", name: "Bowmore" },
|
||||||
{ code: "4", name: "Highland Park" },
|
{ code: "4", name: "Highland Park" },
|
||||||
{ code: "5", name: "Auchentoshan" },
|
{ code: "5", name: "Auchentoshan" },
|
||||||
{ code: "6", name: "Macduff" },
|
{ code: "6", name: "Macduff" },
|
||||||
{ code: "7", name: "Longmorn" },
|
{ code: "7", name: "Longmorn" },
|
||||||
{ code: "8", name: "Tamdhu" },
|
{ code: "8", name: "Tamdhu" },
|
||||||
{ code: "9", name: "Glen Grant" },
|
{ code: "9", name: "Glen Grant" },
|
||||||
{ code: "10", name: "Bunnahabhain" },
|
{ code: "10", name: "Bunnahabhain" },
|
||||||
{ code: "11", name: "Tomatin" },
|
{ code: "11", name: "Tomatin" },
|
||||||
{ code: "12", name: "BenRiach" },
|
{ code: "12", name: "BenRiach" },
|
||||||
{ code: "13", name: "Dalmore" },
|
{ code: "13", name: "Dalmore" },
|
||||||
{ code: "14", name: "Talisker" },
|
{ code: "14", name: "Talisker" },
|
||||||
{ code: "15", name: "Glenfiddich" },
|
{ code: "15", name: "Glenfiddich" },
|
||||||
{ code: "16", name: "Glenturret" },
|
{ code: "16", name: "Glenturret" },
|
||||||
{ code: "17", name: "Scapa" },
|
{ code: "17", name: "Scapa" },
|
||||||
{ code: "18", name: "Inchgower" },
|
{ code: "18", name: "Inchgower" },
|
||||||
{ code: "19", name: "Glen Garioch" },
|
{ code: "19", name: "Glen Garioch" },
|
||||||
{ code: "20", name: "Inverleven" },
|
{ code: "20", name: "Inverleven" },
|
||||||
{ code: "21", name: "Glenglassaugh" },
|
{ code: "21", name: "Glenglassaugh" },
|
||||||
{ code: "22", name: "Glenkinchie" },
|
{ code: "22", name: "Glenkinchie" },
|
||||||
{ code: "23", name: "Bruichladdich" },
|
{ code: "23", name: "Bruichladdich" },
|
||||||
{ code: "24", name: "Macallan" },
|
{ code: "24", name: "Macallan" },
|
||||||
{ code: "25", name: "Rosebank" },
|
{ code: "25", name: "Rosebank" },
|
||||||
{ code: "26", name: "Clynelish" },
|
{ code: "26", name: "Clynelish" },
|
||||||
{ code: "27", name: "Springbank" },
|
{ code: "27", name: "Springbank" },
|
||||||
{ code: "28", name: "Tullibardine" },
|
{ code: "28", name: "Tullibardine" },
|
||||||
{ code: "29", name: "Laphroaig" },
|
{ code: "29", name: "Laphroaig" },
|
||||||
{ code: "30", name: "Glenrothes" },
|
{ code: "30", name: "Glenrothes" },
|
||||||
{ code: "31", name: "Isle of Jura" },
|
{ code: "31", name: "Isle of Jura" },
|
||||||
{ code: "32", name: "Edradour" },
|
{ code: "32", name: "Edradour" },
|
||||||
{ code: "33", name: "Ardbeg" },
|
{ code: "33", name: "Ardbeg" },
|
||||||
{ code: "34", name: "Tamnavulin" },
|
{ code: "34", name: "Tamnavulin" },
|
||||||
{ code: "35", name: "Glen Moray" },
|
{ code: "35", name: "Glen Moray" },
|
||||||
{ code: "36", name: "Benrinnes" },
|
{ code: "36", name: "Benrinnes" },
|
||||||
{ code: "37", name: "Cragganmore" },
|
{ code: "37", name: "Cragganmore" },
|
||||||
{ code: "38", name: "Caperdonich" },
|
{ code: "38", name: "Caperdonich" },
|
||||||
{ code: "39", name: "Linkwood" },
|
{ code: "39", name: "Linkwood" },
|
||||||
{ code: "40", name: "Balvenie" },
|
{ code: "40", name: "Balvenie" },
|
||||||
{ code: "41", name: "Dailuaine" },
|
{ code: "41", name: "Dailuaine" },
|
||||||
{ code: "42", name: "Tobermory" },
|
{ code: "42", name: "Tobermory" },
|
||||||
{ code: "43", name: "Port Ellen" },
|
{ code: "43", name: "Port Ellen" },
|
||||||
{ code: "44", name: "Craigellachie" },
|
{ code: "44", name: "Craigellachie" },
|
||||||
{ code: "45", name: "Dallas Dhu" },
|
{ code: "45", name: "Dallas Dhu" },
|
||||||
{ code: "46", name: "Glenlossie" },
|
{ code: "46", name: "Glenlossie" },
|
||||||
{ code: "47", name: "Benromach" },
|
{ code: "47", name: "Benromach" },
|
||||||
{ code: "48", name: "Balmenach" },
|
{ code: "48", name: "Balmenach" },
|
||||||
{ code: "49", name: "St. Magdalene" },
|
{ code: "49", name: "St. Magdalene" },
|
||||||
{ code: "50", name: "Bladnoch" },
|
{ code: "50", name: "Bladnoch" },
|
||||||
{ code: "51", name: "Bushmills" },
|
{ code: "51", name: "Bushmills" },
|
||||||
{ code: "52", name: "Old Pulteney" },
|
{ code: "52", name: "Old Pulteney" },
|
||||||
{ code: "53", name: "Caol Ila" },
|
{ code: "53", name: "Caol Ila" },
|
||||||
{ code: "54", name: "Aberlour" },
|
{ code: "54", name: "Aberlour" },
|
||||||
{ code: "55", name: "Royal Brackla" },
|
{ code: "55", name: "Royal Brackla" },
|
||||||
{ code: "56", name: "Coleburn" },
|
{ code: "56", name: "Coleburn" },
|
||||||
{ code: "57", name: "Glen Mhor" },
|
{ code: "57", name: "Glen Mhor" },
|
||||||
{ code: "58", name: "Strathisla" },
|
{ code: "58", name: "Strathisla" },
|
||||||
{ code: "59", name: "Teaninich" },
|
{ code: "59", name: "Teaninich" },
|
||||||
{ code: "60", name: "Aberfeldy" },
|
{ code: "60", name: "Aberfeldy" },
|
||||||
{ code: "61", name: "Brora" },
|
{ code: "61", name: "Brora" },
|
||||||
{ code: "62", name: "Glenlochy" },
|
{ code: "62", name: "Glenlochy" },
|
||||||
{ code: "63", name: "Glentauchers" },
|
{ code: "63", name: "Glentauchers" },
|
||||||
{ code: "64", name: "Mannochmore" },
|
{ code: "64", name: "Mannochmore" },
|
||||||
{ code: "65", name: "Imperial" },
|
{ code: "65", name: "Imperial" },
|
||||||
{ code: "66", name: "Ardmore" },
|
{ code: "66", name: "Ardmore" },
|
||||||
{ code: "67", name: "Banff" },
|
{ code: "67", name: "Banff" },
|
||||||
{ code: "68", name: "Blair Athol" },
|
{ code: "68", name: "Blair Athol" },
|
||||||
{ code: "69", name: "Glen Albyn" },
|
{ code: "69", name: "Glen Albyn" },
|
||||||
{ code: "70", name: "Balblair" },
|
{ code: "70", name: "Balblair" },
|
||||||
{ code: "71", name: "Glenburgie" },
|
{ code: "71", name: "Glenburgie" },
|
||||||
{ code: "72", name: "Miltonduff" },
|
{ code: "72", name: "Miltonduff" },
|
||||||
{ code: "73", name: "Aultmore" },
|
{ code: "73", name: "Aultmore" },
|
||||||
{ code: "74", name: "North Port" },
|
{ code: "74", name: "North Port" },
|
||||||
{ code: "75", name: "Glenury / Glenury Royal" },
|
{ code: "75", name: "Glenury / Glenury Royal" },
|
||||||
{ code: "76", name: "Mortlach" },
|
{ code: "76", name: "Mortlach" },
|
||||||
{ code: "77", name: "Glen Ord" },
|
{ code: "77", name: "Glen Ord" },
|
||||||
{ code: "78", name: "Ben Nevis" },
|
{ code: "78", name: "Ben Nevis" },
|
||||||
{ code: "79", name: "Deanston" },
|
{ code: "79", name: "Deanston" },
|
||||||
{ code: "80", name: "Glen Spey" },
|
{ code: "80", name: "Glen Spey" },
|
||||||
{ code: "81", name: "Glen Keith" },
|
{ code: "81", name: "Glen Keith" },
|
||||||
{ code: "82", name: "Glencadam" },
|
{ code: "82", name: "Glencadam" },
|
||||||
{ code: "83", name: "Convalmore" },
|
{ code: "83", name: "Convalmore" },
|
||||||
{ code: "84", name: "Glendullan" },
|
{ code: "84", name: "Glendullan" },
|
||||||
{ code: "85", name: "Glen Elgin" },
|
{ code: "85", name: "Glen Elgin" },
|
||||||
{ code: "86", name: "Glenesk" },
|
{ code: "86", name: "Glenesk" },
|
||||||
{ code: "87", name: "Millburn" },
|
{ code: "87", name: "Millburn" },
|
||||||
{ code: "88", name: "Speyburn" },
|
{ code: "88", name: "Speyburn" },
|
||||||
{ code: "89", name: "Tomintoul" },
|
{ code: "89", name: "Tomintoul" },
|
||||||
{ code: "90", name: "Pittyvaich" },
|
{ code: "90", name: "Pittyvaich" },
|
||||||
{ code: "91", name: "Dufftown" },
|
{ code: "91", name: "Dufftown" },
|
||||||
{ code: "92", name: "Lochside" },
|
{ code: "92", name: "Lochside" },
|
||||||
{ code: "93", name: "Glen Scotia" },
|
{ code: "93", name: "Glen Scotia" },
|
||||||
{ code: "94", name: "Fettercairn" },
|
{ code: "94", name: "Fettercairn" },
|
||||||
{ code: "95", name: "Auchroisk" },
|
{ code: "95", name: "Auchroisk" },
|
||||||
{ code: "96", name: "GlenDronach" },
|
{ code: "96", name: "GlenDronach" },
|
||||||
{ code: "97", name: "Littlemill" },
|
{ code: "97", name: "Littlemill" },
|
||||||
{ code: "98", name: "Inverleven" },
|
{ code: "98", name: "Inverleven" },
|
||||||
{ code: "99", name: "Glenugie" },
|
{ code: "99", name: "Glenugie" },
|
||||||
{ code: "100", name: "Strathmill" },
|
{ code: "100", name: "Strathmill" },
|
||||||
{ code: "101", name: "Knockando" },
|
{ code: "101", name: "Knockando" },
|
||||||
{ code: "102", name: "Dalwhinnie" },
|
{ code: "102", name: "Dalwhinnie" },
|
||||||
{ code: "103", name: "Royal Lochnagar" },
|
{ code: "103", name: "Royal Lochnagar" },
|
||||||
{ code: "104", name: "Glenburgie (Glencraig)" },
|
{ code: "104", name: "Glenburgie (Glencraig)" },
|
||||||
{ code: "105", name: "Tormore" },
|
{ code: "105", name: "Tormore" },
|
||||||
{ code: "106", name: "Cardhu" },
|
{ code: "106", name: "Cardhu" },
|
||||||
{ code: "107", name: "Glenallachie" },
|
{ code: "107", name: "Glenallachie" },
|
||||||
{ code: "108", name: "Allt-a-Bhainne" },
|
{ code: "108", name: "Allt-a-Bhainne" },
|
||||||
{ code: "109", name: "Miltonduff (Mosstowie)" },
|
{ code: "109", name: "Miltonduff (Mosstowie)" },
|
||||||
{ code: "110", name: "Oban" },
|
{ code: "110", name: "Oban" },
|
||||||
{ code: "111", name: "Lagavulin" },
|
{ code: "111", name: "Lagavulin" },
|
||||||
{ code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" },
|
{ code: "112", name: "Loch Lomond (Inchmurrin / Inchmoan)" },
|
||||||
{ code: "113", name: "Braeval (Braes of Glenlivet)" },
|
{ code: "113", name: "Braeval (Braes of Glenlivet)" },
|
||||||
{ code: "114", name: "Springbank (Longrow)" },
|
{ code: "114", name: "Springbank (Longrow)" },
|
||||||
{ code: "115", name: "Knockdhu (AnCnoc)" },
|
{ code: "115", name: "Knockdhu (AnCnoc)" },
|
||||||
{ code: "116", name: "Yoichi" },
|
{ code: "116", name: "Yoichi" },
|
||||||
{ code: "117", name: "Cooley (Unpeated)" },
|
{ code: "117", name: "Cooley (Unpeated)" },
|
||||||
{ code: "118", name: "Cooley / Connemara (Peated)" },
|
{ code: "118", name: "Cooley / Connemara (Peated)" },
|
||||||
{ code: "119", name: "Yamazaki" },
|
{ code: "119", name: "Yamazaki" },
|
||||||
{ code: "120", name: "Hakushu" },
|
{ code: "120", name: "Hakushu" },
|
||||||
{ code: "121", name: "Isle of Arran" },
|
{ code: "121", name: "Isle of Arran" },
|
||||||
{ code: "122", name: "Loch Lomond (Croftengea)" },
|
{ code: "122", name: "Loch Lomond (Croftengea)" },
|
||||||
{ code: "123", name: "Glengoyne" },
|
{ code: "123", name: "Glengoyne" },
|
||||||
{ code: "124", name: "Miyagikyo" },
|
{ code: "124", name: "Miyagikyo" },
|
||||||
{ code: "125", name: "Glenmorangie" },
|
{ code: "125", name: "Glenmorangie" },
|
||||||
{ code: "126", name: "Springbank (Hazelburn)" },
|
{ code: "126", name: "Springbank (Hazelburn)" },
|
||||||
{ code: "127", name: "Bruichladdich (Port Charlotte)" },
|
{ code: "127", name: "Bruichladdich (Port Charlotte)" },
|
||||||
{ code: "128", name: "Penderyn" },
|
{ code: "128", name: "Penderyn" },
|
||||||
{ code: "129", name: "Kilchoman" },
|
{ code: "129", name: "Kilchoman" },
|
||||||
{ code: "130", name: "Chichibu" },
|
{ code: "130", name: "Chichibu" },
|
||||||
{ code: "131", name: "Hanyu" },
|
{ code: "131", name: "Hanyu" },
|
||||||
{ code: "132", name: "Karuizawa" },
|
{ code: "132", name: "Karuizawa" },
|
||||||
{ code: "133", name: "Westland" },
|
{ code: "133", name: "Westland" },
|
||||||
{ code: "134", name: "Paul John" },
|
{ code: "134", name: "Paul John" },
|
||||||
{ code: "135", name: "Loch Lomond" },
|
{ code: "135", name: "Loch Lomond" },
|
||||||
{ code: "136", name: "Eden Mill" },
|
{ code: "136", name: "Eden Mill" },
|
||||||
{ code: "137", name: "St. George’s (The English Whisky Co.)" },
|
{ code: "137", name: "St. George’s (The English Whisky Co.)" },
|
||||||
{ code: "138", name: "Nantou" },
|
{ code: "138", name: "Nantou" },
|
||||||
{ code: "139", name: "Kavalan" },
|
{ code: "139", name: "Kavalan" },
|
||||||
{ code: "140", name: "Balcones" },
|
{ code: "140", name: "Balcones" },
|
||||||
{ code: "141", name: "Fary Lochan" },
|
{ code: "141", name: "Fary Lochan" },
|
||||||
{ code: "142", name: "Breuckelen Distilling" },
|
{ code: "142", name: "Breuckelen Distilling" },
|
||||||
{ code: "143", name: "Copperworks Distilling Co." },
|
{ code: "143", name: "Copperworks Distilling Co." },
|
||||||
{ code: "144", name: "High Coast Distillery" },
|
{ code: "144", name: "High Coast Distillery" },
|
||||||
{ code: "145", name: "Smögen Whisky" },
|
{ code: "145", name: "Smögen Whisky" },
|
||||||
{ code: "146", name: "Cotswolds" },
|
{ code: "146", name: "Cotswolds" },
|
||||||
{ code: "147", name: "Archie Rose" },
|
{ code: "147", name: "Archie Rose" },
|
||||||
{ code: "148", name: "Starward" },
|
{ code: "148", name: "Starward" },
|
||||||
{ code: "149", name: "Ardnamurchan" },
|
{ code: "149", name: "Ardnamurchan" },
|
||||||
{ code: "150", name: "West Cork Distillers" },
|
{ code: "150", name: "West Cork Distillers" },
|
||||||
{ code: "151", name: "Mackmyra" },
|
{ code: "151", name: "Mackmyra" },
|
||||||
{ code: "152", name: "Shelter Point" },
|
{ code: "152", name: "Shelter Point" },
|
||||||
{ code: "153", name: "Thy Whisky" },
|
{ code: "153", name: "Thy Whisky" },
|
||||||
{ code: "154", name: "Mosgaard Whisky" },
|
{ code: "154", name: "Mosgaard Whisky" },
|
||||||
{ code: "155", name: "Milk & Honey Distillery" },
|
{ code: "155", name: "Milk & Honey Distillery" },
|
||||||
{ code: "156", name: "Glasgow Distillery" },
|
{ code: "156", name: "Glasgow Distillery" },
|
||||||
{ code: "157", name: "Distillerie de Warenghem" },
|
{ code: "157", name: "Distillerie de Warenghem" },
|
||||||
{ code: "158", name: "Yuza Distillery" },
|
{ code: "158", name: "Yuza Distillery" },
|
||||||
{ code: "159", name: "Mars Shinshu" },
|
{ code: "159", name: "Mars Shinshu" },
|
||||||
{ code: "160", name: "Mars Tsunuki" },
|
{ code: "160", name: "Mars Tsunuki" },
|
||||||
{ code: "161", name: "Nc'nean Distillery" },
|
{ code: "161", name: "Nc'nean Distillery" },
|
||||||
{ code: "162", name: "Isle of Raasay" },
|
{ code: "162", name: "Isle of Raasay" },
|
||||||
{ code: "163", name: "Isle of Harris Distillery" },
|
{ code: "163", name: "Isle of Harris Distillery" },
|
||||||
{ code: "164", name: "Penderyn" },
|
{ code: "164", name: "Penderyn" },
|
||||||
{ code: "165", name: "Wolfburn" },
|
{ code: "165", name: "Wolfburn" },
|
||||||
];
|
];
|
||||||
|
|
||||||
const DIST = DISTILLERIES.map((d) => ({
|
const DIST = DISTILLERIES.map((d) => ({
|
||||||
code: String(d.code),
|
code: String(d.code),
|
||||||
nameNorm: normSearchText(d.name),
|
nameNorm: normSearchText(d.name),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const SMWS_WORD_RE = /\bsmws\b/i;
|
const SMWS_WORD_RE = /\bsmws\b/i;
|
||||||
const SMWS_CODE_RE = /\b(\d{1,3})\.(\d{1,4})\b/;
|
const SMWS_CODE_RE = /\b(\d{1,3})\.(\d{1,4})\b/;
|
||||||
|
|
||||||
export function smwsDistilleryCodesForQueryPrefix(qRaw) {
|
export function smwsDistilleryCodesForQueryPrefix(qRaw) {
|
||||||
const q = normSearchText(qRaw);
|
const q = normSearchText(qRaw);
|
||||||
if (!q || q.length < 2) return [];
|
if (!q || q.length < 2) return [];
|
||||||
|
|
||||||
const out = new Set();
|
const out = new Set();
|
||||||
for (const d of DIST) {
|
for (const d of DIST) {
|
||||||
if (d.nameNorm.startsWith(q)) out.add(d.code);
|
if (d.nameNorm.startsWith(q)) out.add(d.code);
|
||||||
}
|
}
|
||||||
return Array.from(out);
|
return Array.from(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If a listing name contains "SMWS" and an SMWS code like 35.123, returns "35" (distillery code).
|
// If a listing name contains "SMWS" and an SMWS code like 35.123, returns "35" (distillery code).
|
||||||
export function smwsDistilleryCodeFromName(name) {
|
export function smwsDistilleryCodeFromName(name) {
|
||||||
const s = String(name || "");
|
const s = String(name || "");
|
||||||
if (!SMWS_WORD_RE.test(s)) return "";
|
if (!SMWS_WORD_RE.test(s)) return "";
|
||||||
const m = s.match(SMWS_CODE_RE);
|
const m = s.match(SMWS_CODE_RE);
|
||||||
return m ? String(m[1] || "") : "";
|
return m ? String(m[1] || "") : "";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,34 +4,34 @@ let INDEX = null;
|
||||||
let RECENT = null;
|
let RECENT = null;
|
||||||
|
|
||||||
export async function loadIndex() {
|
export async function loadIndex() {
|
||||||
if (INDEX) return INDEX;
|
if (INDEX) return INDEX;
|
||||||
INDEX = await fetchJson("./data/index.json");
|
INDEX = await fetchJson("./data/index.json");
|
||||||
return INDEX;
|
return INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function loadRecent() {
|
export async function loadRecent() {
|
||||||
if (RECENT) return RECENT;
|
if (RECENT) return RECENT;
|
||||||
try {
|
try {
|
||||||
RECENT = await fetchJson("./data/recent.json");
|
RECENT = await fetchJson("./data/recent.json");
|
||||||
} catch {
|
} catch {
|
||||||
RECENT = { count: 0, items: [] };
|
RECENT = { count: 0, items: [] };
|
||||||
}
|
}
|
||||||
return RECENT;
|
return RECENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// persist search box value across navigation
|
// persist search box value across navigation
|
||||||
const Q_LS_KEY = "stviz:v1:search:q";
|
const Q_LS_KEY = "stviz:v1:search:q";
|
||||||
|
|
||||||
export function loadSavedQuery() {
|
export function loadSavedQuery() {
|
||||||
try {
|
try {
|
||||||
return localStorage.getItem(Q_LS_KEY) || "";
|
return localStorage.getItem(Q_LS_KEY) || "";
|
||||||
} catch {
|
} catch {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function saveQuery(v) {
|
export function saveQuery(v) {
|
||||||
try {
|
try {
|
||||||
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
|
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,184 +1,213 @@
|
||||||
function normalizeId(s) {
|
function normalizeId(s) {
|
||||||
return String(s || "").toLowerCase().replace(/[^a-z0-9]+/g, "");
|
return String(s || "")
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map normalized store *labels* to canonical ids used by OVERRIDES
|
// Map normalized store *labels* to canonical ids used by OVERRIDES
|
||||||
const ALIASES = {
|
const ALIASES = {
|
||||||
strathliquor: "strath",
|
strathliquor: "strath",
|
||||||
vesselliquor: "vessel",
|
vesselliquor: "vessel",
|
||||||
tudorhouse: "tudor",
|
tudorhouse: "tudor",
|
||||||
coopworldofwhisky: "coop",
|
coopworldofwhisky: "coop",
|
||||||
|
|
||||||
kensingtonwinemarket: "kensingtonwinemarket",
|
kensingtonwinemarket: "kensingtonwinemarket",
|
||||||
gullliquor: "gullliquor",
|
gullliquor: "gullliquor",
|
||||||
legacyliquor: "legacyliquor",
|
legacyliquor: "legacyliquor",
|
||||||
vintagespirits: "vintagespirits",
|
vintagespirits: "vintagespirits",
|
||||||
kegncork: "kegncork",
|
kegncork: "kegncork",
|
||||||
|
|
||||||
// short forms
|
// short forms
|
||||||
gull: "gullliquor",
|
gull: "gullliquor",
|
||||||
legacy: "legacyliquor",
|
legacy: "legacyliquor",
|
||||||
vintage: "vintagespirits",
|
vintage: "vintagespirits",
|
||||||
kwm: "kensingtonwinemarket",
|
kwm: "kensingtonwinemarket",
|
||||||
};
|
};
|
||||||
|
|
||||||
// Your pinned colors
|
// Your pinned colors
|
||||||
const OVERRIDES = {
|
const OVERRIDES = {
|
||||||
strath: "#76B7FF",
|
strath: "#76B7FF",
|
||||||
bsw: "#E9DF7A",
|
bsw: "#E9DF7A",
|
||||||
kensingtonwinemarket: "#F2C200",
|
kensingtonwinemarket: "#F2C200",
|
||||||
vessel: "#FFFFFF",
|
vessel: "#FFFFFF",
|
||||||
gullliquor: "#6B0F1A",
|
gullliquor: "#6B0F1A",
|
||||||
kegncork: "#111111",
|
kegncork: "#111111",
|
||||||
legacyliquor: "#7B4A12",
|
legacyliquor: "#7B4A12",
|
||||||
vintagespirits: "#E34A2C",
|
vintagespirits: "#E34A2C",
|
||||||
|
|
||||||
craftcellars: "#E31B23",
|
craftcellars: "#E31B23",
|
||||||
maltsandgrains: "#A67C52",
|
maltsandgrains: "#A67C52",
|
||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
gull: "#6B0F1A",
|
gull: "#6B0F1A",
|
||||||
legacy: "#7B4A12",
|
legacy: "#7B4A12",
|
||||||
vintage: "#E34A2C",
|
vintage: "#E34A2C",
|
||||||
kwm: "#F2C200",
|
kwm: "#F2C200",
|
||||||
};
|
};
|
||||||
|
|
||||||
// High-contrast qualitative palette
|
// High-contrast qualitative palette
|
||||||
const PALETTE = [
|
const PALETTE = [
|
||||||
"#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD",
|
"#1F77B4",
|
||||||
"#8C564B", "#E377C2", "#7F7F7F", "#17BECF", "#BCBD22",
|
"#FF7F0E",
|
||||||
"#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5",
|
"#2CA02C",
|
||||||
"#C49C94", "#F7B6D2", "#C7C7C7", "#9EDAE5", "#DBDB8D",
|
"#D62728",
|
||||||
"#393B79", "#637939", "#8C6D31", "#843C39", "#7B4173",
|
"#9467BD",
|
||||||
"#3182BD", "#31A354", "#756BB1", "#636363", "#E6550D",
|
"#8C564B",
|
||||||
|
"#E377C2",
|
||||||
|
"#7F7F7F",
|
||||||
|
"#17BECF",
|
||||||
|
"#BCBD22",
|
||||||
|
"#AEC7E8",
|
||||||
|
"#FFBB78",
|
||||||
|
"#98DF8A",
|
||||||
|
"#FF9896",
|
||||||
|
"#C5B0D5",
|
||||||
|
"#C49C94",
|
||||||
|
"#F7B6D2",
|
||||||
|
"#C7C7C7",
|
||||||
|
"#9EDAE5",
|
||||||
|
"#DBDB8D",
|
||||||
|
"#393B79",
|
||||||
|
"#637939",
|
||||||
|
"#8C6D31",
|
||||||
|
"#843C39",
|
||||||
|
"#7B4173",
|
||||||
|
"#3182BD",
|
||||||
|
"#31A354",
|
||||||
|
"#756BB1",
|
||||||
|
"#636363",
|
||||||
|
"#E6550D",
|
||||||
];
|
];
|
||||||
|
|
||||||
function uniq(arr) {
|
function uniq(arr) {
|
||||||
return [...new Set(arr)];
|
return [...new Set(arr)];
|
||||||
}
|
}
|
||||||
|
|
||||||
function canonicalId(s) {
|
function canonicalId(s) {
|
||||||
const id = normalizeId(s);
|
const id = normalizeId(s);
|
||||||
return ALIASES[id] || id;
|
return ALIASES[id] || id;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildUniverse(base, extra) {
|
function buildUniverse(base, extra) {
|
||||||
const a = Array.isArray(base) ? base : [];
|
const a = Array.isArray(base) ? base : [];
|
||||||
const b = Array.isArray(extra) ? extra : [];
|
const b = Array.isArray(extra) ? extra : [];
|
||||||
return uniq([...a, ...b].map(canonicalId).filter(Boolean));
|
return uniq([...a, ...b].map(canonicalId).filter(Boolean));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep mapping stable even if page sees a subset
|
// Keep mapping stable even if page sees a subset
|
||||||
const DEFAULT_UNIVERSE = buildUniverse(Object.keys(OVERRIDES), [
|
const DEFAULT_UNIVERSE = buildUniverse(Object.keys(OVERRIDES), [
|
||||||
"bcl",
|
"bcl",
|
||||||
"bsw",
|
"bsw",
|
||||||
"coop",
|
"coop",
|
||||||
"craftcellars",
|
"craftcellars",
|
||||||
"gullliquor",
|
"gullliquor",
|
||||||
"gull",
|
"gull",
|
||||||
"kegncork",
|
"kegncork",
|
||||||
"kwm",
|
"kwm",
|
||||||
"kensingtonwinemarket",
|
"kensingtonwinemarket",
|
||||||
"legacy",
|
"legacy",
|
||||||
"legacyliquor",
|
"legacyliquor",
|
||||||
"maltsandgrains",
|
"maltsandgrains",
|
||||||
"sierrasprings",
|
"sierrasprings",
|
||||||
"strath",
|
"strath",
|
||||||
"tudor",
|
"tudor",
|
||||||
"vessel",
|
"vessel",
|
||||||
"vintage",
|
"vintage",
|
||||||
"vintagespirits",
|
"vintagespirits",
|
||||||
"willowpark",
|
"willowpark",
|
||||||
"arc"
|
"arc",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
function isWhiteHex(c) {
|
function isWhiteHex(c) {
|
||||||
return String(c || "").trim().toUpperCase() === "#FFFFFF";
|
return (
|
||||||
|
String(c || "")
|
||||||
|
.trim()
|
||||||
|
.toUpperCase() === "#FFFFFF"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildStoreColorMap(extraUniverse = []) {
|
export function buildStoreColorMap(extraUniverse = []) {
|
||||||
const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort();
|
const universe = buildUniverse(DEFAULT_UNIVERSE, extraUniverse).sort();
|
||||||
|
|
||||||
const used = new Set();
|
const used = new Set();
|
||||||
const map = new Map();
|
const map = new Map();
|
||||||
|
|
||||||
// Pin overrides first
|
// Pin overrides first
|
||||||
for (const id of universe) {
|
for (const id of universe) {
|
||||||
const c = OVERRIDES[id];
|
const c = OVERRIDES[id];
|
||||||
if (c) {
|
if (c) {
|
||||||
map.set(id, c);
|
map.set(id, c);
|
||||||
used.add(String(c).toUpperCase());
|
used.add(String(c).toUpperCase());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter palette to avoid collisions and keep white/black reserved
|
// Filter palette to avoid collisions and keep white/black reserved
|
||||||
const palette = PALETTE
|
const palette = PALETTE.map((c) => String(c).toUpperCase()).filter(
|
||||||
.map((c) => String(c).toUpperCase())
|
(c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111",
|
||||||
.filter((c) => !used.has(c) && c !== "#FFFFFF" && c !== "#111111");
|
);
|
||||||
|
|
||||||
let pi = 0;
|
let pi = 0;
|
||||||
for (const id of universe) {
|
for (const id of universe) {
|
||||||
if (map.has(id)) continue;
|
if (map.has(id)) continue;
|
||||||
if (pi >= palette.length) pi = 0;
|
if (pi >= palette.length) pi = 0;
|
||||||
const c = palette[pi++];
|
const c = palette[pi++];
|
||||||
map.set(id, c);
|
map.set(id, c);
|
||||||
used.add(c);
|
used.add(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function storeColor(storeKeyOrLabel, colorMap) {
|
export function storeColor(storeKeyOrLabel, colorMap) {
|
||||||
const id = canonicalId(storeKeyOrLabel);
|
const id = canonicalId(storeKeyOrLabel);
|
||||||
if (!id) return "#7F7F7F";
|
if (!id) return "#7F7F7F";
|
||||||
|
|
||||||
const forced = OVERRIDES[id];
|
const forced = OVERRIDES[id];
|
||||||
if (forced) return forced;
|
if (forced) return forced;
|
||||||
|
|
||||||
if (colorMap && typeof colorMap.get === "function") {
|
if (colorMap && typeof colorMap.get === "function") {
|
||||||
const c = colorMap.get(id);
|
const c = colorMap.get(id);
|
||||||
if (c) return c;
|
if (c) return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length];
|
return PALETTE[(id.length + id.charCodeAt(0)) % PALETTE.length];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function datasetStrokeWidth(color) {
|
export function datasetStrokeWidth(color) {
|
||||||
return isWhiteHex(color) ? 2.5 : 1.5;
|
return isWhiteHex(color) ? 2.5 : 1.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function datasetPointRadius(color) {
|
export function datasetPointRadius(color) {
|
||||||
return isWhiteHex(color) ? 2.8 : 2.2;
|
return isWhiteHex(color) ? 2.8 : 2.2;
|
||||||
}
|
}
|
||||||
|
|
||||||
function clamp(v, lo, hi) {
|
function clamp(v, lo, hi) {
|
||||||
return Math.max(lo, Math.min(hi, v));
|
return Math.max(lo, Math.min(hi, v));
|
||||||
}
|
}
|
||||||
|
|
||||||
function hexToRgb(hex) {
|
function hexToRgb(hex) {
|
||||||
const m = String(hex).replace("#", "");
|
const m = String(hex).replace("#", "");
|
||||||
if (m.length !== 6) return null;
|
if (m.length !== 6) return null;
|
||||||
const n = parseInt(m, 16);
|
const n = parseInt(m, 16);
|
||||||
return {
|
return {
|
||||||
r: (n >> 16) & 255,
|
r: (n >> 16) & 255,
|
||||||
g: (n >> 8) & 255,
|
g: (n >> 8) & 255,
|
||||||
b: n & 255,
|
b: n & 255,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function rgbToHex({ r, g, b }) {
|
function rgbToHex({ r, g, b }) {
|
||||||
const h = (x) =>
|
const h = (x) => clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0");
|
||||||
clamp(Math.round(x), 0, 255).toString(16).padStart(2, "0");
|
return `#${h(r)}${h(g)}${h(b)}`;
|
||||||
return `#${h(r)}${h(g)}${h(b)}`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lighten by mixing with white (0–1)
|
// Lighten by mixing with white (0–1)
|
||||||
export function lighten(hex, amount = 0.25) {
|
export function lighten(hex, amount = 0.25) {
|
||||||
const rgb = hexToRgb(hex);
|
const rgb = hexToRgb(hex);
|
||||||
if (!rgb) return hex;
|
if (!rgb) return hex;
|
||||||
return rgbToHex({
|
return rgbToHex({
|
||||||
r: rgb.r + (255 - rgb.r) * amount,
|
r: rgb.r + (255 - rgb.r) * amount,
|
||||||
g: rgb.g + (255 - rgb.g) * amount,
|
g: rgb.g + (255 - rgb.g) * amount,
|
||||||
b: rgb.b + (255 - rgb.b) * amount,
|
b: rgb.b + (255 - rgb.b) * amount,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
212
viz/serve.js
212
viz/serve.js
|
|
@ -9,151 +9,151 @@ const root = path.resolve(__dirname); // viz/
|
||||||
const projectRoot = path.resolve(__dirname, ".."); // repo root
|
const projectRoot = path.resolve(__dirname, ".."); // repo root
|
||||||
|
|
||||||
const MIME = {
|
const MIME = {
|
||||||
".html": "text/html; charset=utf-8",
|
".html": "text/html; charset=utf-8",
|
||||||
".js": "application/javascript; charset=utf-8",
|
".js": "application/javascript; charset=utf-8",
|
||||||
".css": "text/css; charset=utf-8",
|
".css": "text/css; charset=utf-8",
|
||||||
".json": "application/json; charset=utf-8",
|
".json": "application/json; charset=utf-8",
|
||||||
".png": "image/png",
|
".png": "image/png",
|
||||||
".jpg": "image/jpeg",
|
".jpg": "image/jpeg",
|
||||||
".jpeg": "image/jpeg",
|
".jpeg": "image/jpeg",
|
||||||
".svg": "image/svg+xml",
|
".svg": "image/svg+xml",
|
||||||
};
|
};
|
||||||
|
|
||||||
function safePath(urlPath) {
|
function safePath(urlPath) {
|
||||||
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
|
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
|
||||||
const joined = path.join(root, p);
|
const joined = path.join(root, p);
|
||||||
const norm = path.normalize(joined);
|
const norm = path.normalize(joined);
|
||||||
if (!norm.startsWith(root)) return null;
|
if (!norm.startsWith(root)) return null;
|
||||||
return norm;
|
return norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Project-level file (shared by viz + report tooling)
|
// Project-level file (shared by viz + report tooling)
|
||||||
const LINKS_FILE = path.join(projectRoot, "data", "sku_links.json");
|
const LINKS_FILE = path.join(projectRoot, "data", "sku_links.json");
|
||||||
|
|
||||||
function readMeta() {
|
function readMeta() {
|
||||||
try {
|
try {
|
||||||
const raw = fs.readFileSync(LINKS_FILE, "utf8");
|
const raw = fs.readFileSync(LINKS_FILE, "utf8");
|
||||||
const obj = JSON.parse(raw);
|
const obj = JSON.parse(raw);
|
||||||
|
|
||||||
const links = obj && Array.isArray(obj.links) ? obj.links : [];
|
const links = obj && Array.isArray(obj.links) ? obj.links : [];
|
||||||
const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : [];
|
const ignores = obj && Array.isArray(obj.ignores) ? obj.ignores : [];
|
||||||
|
|
||||||
return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores };
|
return { generatedAt: obj?.generatedAt || new Date().toISOString(), links, ignores };
|
||||||
} catch {}
|
} catch {}
|
||||||
return { generatedAt: new Date().toISOString(), links: [], ignores: [] };
|
return { generatedAt: new Date().toISOString(), links: [], ignores: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeMeta(obj) {
|
function writeMeta(obj) {
|
||||||
obj.generatedAt = new Date().toISOString();
|
obj.generatedAt = new Date().toISOString();
|
||||||
fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true });
|
fs.mkdirSync(path.dirname(LINKS_FILE), { recursive: true });
|
||||||
fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
fs.writeFileSync(LINKS_FILE, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
function send(res, code, body, headers) {
|
function send(res, code, body, headers) {
|
||||||
res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) });
|
res.writeHead(code, { "Content-Type": "text/plain; charset=utf-8", ...(headers || {}) });
|
||||||
res.end(body);
|
res.end(body);
|
||||||
}
|
}
|
||||||
|
|
||||||
function sendJson(res, code, obj) {
|
function sendJson(res, code, obj) {
|
||||||
res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" });
|
res.writeHead(code, { "Content-Type": "application/json; charset=utf-8" });
|
||||||
res.end(JSON.stringify(obj));
|
res.end(JSON.stringify(obj));
|
||||||
}
|
}
|
||||||
|
|
||||||
const server = http.createServer((req, res) => {
|
const server = http.createServer((req, res) => {
|
||||||
const u = req.url || "/";
|
const u = req.url || "/";
|
||||||
const url = new URL(u, "http://127.0.0.1");
|
const url = new URL(u, "http://127.0.0.1");
|
||||||
|
|
||||||
// Local API: read/write sku links + ignore pairs on disk (only exists when using this local server)
|
// Local API: read/write sku links + ignore pairs on disk (only exists when using this local server)
|
||||||
|
|
||||||
if (url.pathname === "/__stviz/sku-links") {
|
if (url.pathname === "/__stviz/sku-links") {
|
||||||
if (req.method === "GET") {
|
if (req.method === "GET") {
|
||||||
const obj = readMeta();
|
const obj = readMeta();
|
||||||
return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores });
|
return sendJson(res, 200, { ok: true, count: obj.links.length, links: obj.links, ignores: obj.ignores });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.method === "POST") {
|
if (req.method === "POST") {
|
||||||
let body = "";
|
let body = "";
|
||||||
req.on("data", (c) => (body += c));
|
req.on("data", (c) => (body += c));
|
||||||
req.on("end", () => {
|
req.on("end", () => {
|
||||||
try {
|
try {
|
||||||
const inp = JSON.parse(body || "{}");
|
const inp = JSON.parse(body || "{}");
|
||||||
const fromSku = String(inp.fromSku || "").trim();
|
const fromSku = String(inp.fromSku || "").trim();
|
||||||
const toSku = String(inp.toSku || "").trim();
|
const toSku = String(inp.toSku || "").trim();
|
||||||
if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" });
|
if (!fromSku || !toSku) return sendJson(res, 400, { ok: false, error: "fromSku/toSku required" });
|
||||||
|
|
||||||
const obj = readMeta();
|
const obj = readMeta();
|
||||||
obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() });
|
obj.links.push({ fromSku, toSku, createdAt: new Date().toISOString() });
|
||||||
writeMeta(obj);
|
writeMeta(obj);
|
||||||
|
|
||||||
return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" });
|
return sendJson(res, 200, { ok: true, count: obj.links.length, file: "data/sku_links.json" });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
|
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
return send(res, 405, "Method Not Allowed");
|
return send(res, 405, "Method Not Allowed");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (url.pathname === "/__stviz/sku-ignores") {
|
if (url.pathname === "/__stviz/sku-ignores") {
|
||||||
if (req.method === "GET") {
|
if (req.method === "GET") {
|
||||||
const obj = readMeta();
|
const obj = readMeta();
|
||||||
return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores });
|
return sendJson(res, 200, { ok: true, count: obj.ignores.length, ignores: obj.ignores });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.method === "POST") {
|
if (req.method === "POST") {
|
||||||
let body = "";
|
let body = "";
|
||||||
req.on("data", (c) => (body += c));
|
req.on("data", (c) => (body += c));
|
||||||
req.on("end", () => {
|
req.on("end", () => {
|
||||||
try {
|
try {
|
||||||
const inp = JSON.parse(body || "{}");
|
const inp = JSON.parse(body || "{}");
|
||||||
const skuA = String(inp.skuA || "").trim();
|
const skuA = String(inp.skuA || "").trim();
|
||||||
const skuB = String(inp.skuB || "").trim();
|
const skuB = String(inp.skuB || "").trim();
|
||||||
if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" });
|
if (!skuA || !skuB) return sendJson(res, 400, { ok: false, error: "skuA/skuB required" });
|
||||||
if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" });
|
if (skuA === skuB) return sendJson(res, 400, { ok: false, error: "skuA and skuB must differ" });
|
||||||
|
|
||||||
const obj = readMeta();
|
const obj = readMeta();
|
||||||
obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() });
|
obj.ignores.push({ skuA, skuB, createdAt: new Date().toISOString() });
|
||||||
writeMeta(obj);
|
writeMeta(obj);
|
||||||
|
|
||||||
return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" });
|
return sendJson(res, 200, { ok: true, count: obj.ignores.length, file: "data/sku_links.json" });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
|
return sendJson(res, 400, { ok: false, error: String(e && e.message ? e.message : e) });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
return send(res, 405, "Method Not Allowed");
|
return send(res, 405, "Method Not Allowed");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Static
|
// Static
|
||||||
let file = safePath(u === "/" ? "/index.html" : u);
|
let file = safePath(u === "/" ? "/index.html" : u);
|
||||||
if (!file) {
|
if (!file) {
|
||||||
res.writeHead(400);
|
res.writeHead(400);
|
||||||
res.end("Bad path");
|
res.end("Bad path");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
|
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
|
||||||
file = path.join(file, "index.html");
|
file = path.join(file, "index.html");
|
||||||
}
|
}
|
||||||
|
|
||||||
fs.readFile(file, (err, buf) => {
|
fs.readFile(file, (err, buf) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
res.writeHead(404);
|
res.writeHead(404);
|
||||||
res.end("Not found");
|
res.end("Not found");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const ext = path.extname(file);
|
const ext = path.extname(file);
|
||||||
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
|
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
|
||||||
res.end(buf);
|
res.end(buf);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const port = Number(process.env.PORT || 8080);
|
const port = Number(process.env.PORT || 8080);
|
||||||
server.listen(port, "127.0.0.1", () => {
|
server.listen(port, "127.0.0.1", () => {
|
||||||
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
|
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
|
||||||
process.stdout.write(`SKU links file: ${LINKS_FILE}\n`);
|
process.stdout.write(`SKU links file: ${LINKS_FILE}\n`);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue