From 7a33d51c909fe11e1594dbb11a98ae0bb3cef716 Mon Sep 17 00:00:00 2001 From: "Brennan Wilkes (Text Groove)" Date: Tue, 10 Feb 2026 16:45:22 -0800 Subject: [PATCH] UX Improvements --- bin/tracker.js | 6 +- src/core/http.js | 474 ++++---- src/core/logger.js | 80 +- src/main.js | 268 ++--- src/stores/arc.js | 632 +++++----- src/stores/bcl.js | 530 +++++---- src/stores/bsw.js | 614 +++++----- src/stores/coop.js | 561 ++++----- src/stores/craftcellars.js | 583 +++++----- src/stores/gull.js | 354 +++--- src/stores/index.js | 36 +- src/stores/kegncork.js | 107 +- src/stores/kwm.js | 266 ++--- src/stores/legacyliquor.js | 369 +++--- src/stores/maltsandgrains.js | 149 +-- src/stores/sierrasprings.js | 546 +++++---- src/stores/strath.js | 704 ++++++------ src/stores/tudor.js | 819 ++++++------- src/stores/vessel.js | 233 ++-- src/stores/vintagespirits.js | 398 ++++--- src/stores/willowpark.js | 365 +++--- src/tracker/category_scan.js | 538 ++++----- src/tracker/db.js | 187 +-- src/tracker/merge.js | 333 +++--- src/tracker/report.js | 418 +++---- src/tracker/run_all.js | 135 ++- src/utils/ansi.js | 20 +- src/utils/args.js | 128 +-- src/utils/async.js | 32 +- src/utils/bytes.js | 12 +- src/utils/html.js | 192 ++-- src/utils/price.js | 20 +- src/utils/sku.js | 117 +- src/utils/sku_map.js | 279 +++-- src/utils/string.js | 22 +- src/utils/text.js | 10 +- src/utils/time.js | 17 +- src/utils/url.js | 70 +- src/utils/woocommerce.js | 70 +- tools/build_common_listings.js | 460 ++++---- tools/build_email_alert.js | 620 +++++----- tools/build_viz_commits.js | 188 +-- tools/build_viz_index.js | 327 +++--- tools/build_viz_recent.js | 729 ++++++------ tools/dedupe_skulinks.js | 82 +- tools/diff_report.js | 495 ++++---- tools/discover_bad_skus.js | 42 +- tools/rank_discrepency.js | 1067 +++++++++-------- tools/stviz_apply_issue_edits.js | 484 ++++---- tracker.js | 6 +- viz/app/api.js | 144 +-- viz/app/catalog.js | 168 +-- viz/app/dom.js | 118 +- viz/app/item_page.js | 1854 +++++++++++++++--------------- viz/app/linker/canonical_pref.js | 119 +- viz/app/linker/price.js | 151 ++- viz/app/linker/similarity.js | 373 +++--- viz/app/linker/size.js | 142 +-- viz/app/linker/store_cache.js | 79 +- viz/app/linker/suggestions.js | 1135 +++++++++--------- viz/app/linker/url_map.js | 62 +- viz/app/linker_page.js | 1335 +++++++++++---------- viz/app/main.js | 26 +- viz/app/mapping.js | 330 +++--- viz/app/pending.js | 290 +++-- viz/app/search_page.js | 860 +++++++------- viz/app/sku.js | 117 +- viz/app/smws.js | 356 +++--- viz/app/state.js | 36 +- viz/app/stats_page.js | 1371 +++++++++++----------- viz/app/storeColors.js | 263 +++-- viz/app/store_page.js | 1453 +++++++++++------------ viz/serve.js | 212 ++-- 73 files changed, 13094 insertions(+), 13094 deletions(-) diff --git a/bin/tracker.js b/bin/tracker.js index f7df7d2..0a9388d 100755 --- a/bin/tracker.js +++ b/bin/tracker.js @@ -4,7 +4,7 @@ const { main } = require("../src/main"); main().catch((e) => { - const msg = e && e.stack ? e.stack : String(e); - console.error(msg); - process.exitCode = 1; + const msg = e && e.stack ? e.stack : String(e); + console.error(msg); + process.exitCode = 1; }); diff --git a/src/core/http.js b/src/core/http.js index a60552c..65450c8 100644 --- a/src/core/http.js +++ b/src/core/http.js @@ -7,327 +7,327 @@ const { setTimeout: setTimeoutCb, clearTimeout } = require("timers"); /* ---------------- Errors ---------------- */ class RetryableError extends Error { - constructor(msg) { - super(msg); - this.name = "RetryableError"; - } + constructor(msg) { + super(msg); + this.name = "RetryableError"; + } } function isRetryable(e) { - if (!e) return false; - if (e.name === "AbortError") return true; - if (e instanceof RetryableError) return true; - const msg = String(e.message || e); - return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg); + if (!e) return false; + if (e.name === "AbortError") return true; + if (e instanceof RetryableError) return true; + const msg = String(e.message || e); + return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg); } /* ---------------- Backoff ---------------- */ function backoffMs(attempt) { - const base = Math.min(12000, 500 * Math.pow(2, attempt)); - const jitter = Math.floor(Math.random() * 400); - return base + jitter; + const base = Math.min(12000, 500 * Math.pow(2, attempt)); + const jitter = Math.floor(Math.random() * 400); + return base + jitter; } function retryAfterMs(res) { - const ra = res?.headers?.get ? res.headers.get("retry-after") : null; - if (!ra) return 0; + const ra = res?.headers?.get ? res.headers.get("retry-after") : null; + if (!ra) return 0; - const secs = Number(String(ra).trim()); - if (Number.isFinite(secs)) return Math.max(0, secs * 1000); + const secs = Number(String(ra).trim()); + if (Number.isFinite(secs)) return Math.max(0, secs * 1000); - const dt = Date.parse(String(ra)); - if (Number.isFinite(dt)) return Math.max(0, dt - Date.now()); + const dt = Date.parse(String(ra)); + if (Number.isFinite(dt)) return Math.max(0, dt - Date.now()); - return 0; + return 0; } /* ---------------- Utils ---------------- */ async function safeText(res) { - try { - return await res.text(); - } catch { - return ""; - } + try { + return await res.text(); + } catch { + return ""; + } } function hostFromUrl(u) { - try { - return new URL(u).host || ""; - } catch { - return ""; - } + try { + return new URL(u).host || ""; + } catch { + return ""; + } } /* ---------------- Cookies (simple jar) ---------------- */ // host -> Map(cookieName -> "name=value") function createCookieJar() { - const jar = new Map(); + const jar = new Map(); - function parseSetCookieLine(line) { - const s = String(line || "").trim(); - if (!s) return null; - const first = s.split(";")[0] || ""; - const eq = first.indexOf("="); - if (eq <= 0) return null; - const name = first.slice(0, eq).trim(); - const value = first.slice(eq + 1).trim(); - if (!name) return null; - return { name, pair: `${name}=${value}` }; - } + function parseSetCookieLine(line) { + const s = String(line || "").trim(); + if (!s) return null; + const first = s.split(";")[0] || ""; + const eq = first.indexOf("="); + if (eq <= 0) return null; + const name = first.slice(0, eq).trim(); + const value = first.slice(eq + 1).trim(); + if (!name) return null; + return { name, pair: `${name}=${value}` }; + } - function getSetCookieArray(headers) { - if (headers && typeof headers.getSetCookie === "function") { - try { - const arr = headers.getSetCookie(); - return Array.isArray(arr) ? arr : []; - } catch {} - } + function getSetCookieArray(headers) { + if (headers && typeof headers.getSetCookie === "function") { + try { + const arr = headers.getSetCookie(); + return Array.isArray(arr) ? arr : []; + } catch {} + } - const one = headers?.get ? headers.get("set-cookie") : null; - if (!one) return []; + const one = headers?.get ? headers.get("set-cookie") : null; + if (!one) return []; - return String(one) - .split(/,(?=[^;,]*=)/g) - .map((x) => x.trim()) - .filter(Boolean); - } + return String(one) + .split(/,(?=[^;,]*=)/g) + .map((x) => x.trim()) + .filter(Boolean); + } - function storeFromResponse(url, res) { - const host = hostFromUrl(res?.url || url); - if (!host) return; + function storeFromResponse(url, res) { + const host = hostFromUrl(res?.url || url); + if (!host) return; - const lines = getSetCookieArray(res?.headers); - if (!lines.length) return; + const lines = getSetCookieArray(res?.headers); + if (!lines.length) return; - let m = jar.get(host); - if (!m) { - m = new Map(); - jar.set(host, m); - } + let m = jar.get(host); + if (!m) { + m = new Map(); + jar.set(host, m); + } - for (const line of lines) { - const c = parseSetCookieLine(line); - if (c) m.set(c.name, c.pair); - } - } + for (const line of lines) { + const c = parseSetCookieLine(line); + if (c) m.set(c.name, c.pair); + } + } - function cookieHeaderFor(url) { - const host = hostFromUrl(url); - if (!host) return ""; - const m = jar.get(host); - if (!m || m.size === 0) return ""; - return [...m.values()].join("; "); - } + function cookieHeaderFor(url) { + const host = hostFromUrl(url); + if (!host) return ""; + const m = jar.get(host); + if (!m || m.size === 0) return ""; + return [...m.values()].join("; "); + } - return { storeFromResponse, cookieHeaderFor }; + return { storeFromResponse, cookieHeaderFor }; } /* ---------------- HTTP client ---------------- */ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { - let inflight = 0; - let reqSeq = 0; + let inflight = 0; + let reqSeq = 0; - const cookieJar = createCookieJar(); + const cookieJar = createCookieJar(); - // host -> epoch ms when next request is allowed - const hostNextOkAt = new Map(); + // host -> epoch ms when next request is allowed + const hostNextOkAt = new Map(); - // Conservative pacing defaults (slow > blocked) - const minHostIntervalMs = 2500; + // Conservative pacing defaults (slow > blocked) + const minHostIntervalMs = 2500; - // Per-host inflight clamp (prevents bursts when global concurrency is high) - const hostInflight = new Map(); - const maxHostInflight = 1; + // Per-host inflight clamp (prevents bursts when global concurrency is high) + const hostInflight = new Map(); + const maxHostInflight = 1; - function inflightStr() { - return `inflight=${inflight}`; - } + function inflightStr() { + return `inflight=${inflight}`; + } - async function acquireHost(url) { - const host = hostFromUrl(url); - if (!host) return () => {}; + async function acquireHost(url) { + const host = hostFromUrl(url); + if (!host) return () => {}; - while (true) { - const cur = hostInflight.get(host) || 0; - if (cur < maxHostInflight) { - hostInflight.set(host, cur + 1); - return () => { - const n = (hostInflight.get(host) || 1) - 1; - if (n <= 0) hostInflight.delete(host); - else hostInflight.set(host, n); - }; - } - await sleep(50); - } - } + while (true) { + const cur = hostInflight.get(host) || 0; + if (cur < maxHostInflight) { + hostInflight.set(host, cur + 1); + return () => { + const n = (hostInflight.get(host) || 1) - 1; + if (n <= 0) hostInflight.delete(host); + else hostInflight.set(host, n); + }; + } + await sleep(50); + } + } - // ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent - async function throttleHost(url) { - const host = hostFromUrl(url); - if (!host) return; + // ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent + async function throttleHost(url) { + const host = hostFromUrl(url); + if (!host) return; - while (true) { - const now = Date.now(); - const next = hostNextOkAt.get(host) || 0; - const wait = next - now; + while (true) { + const now = Date.now(); + const next = hostNextOkAt.get(host) || 0; + const wait = next - now; - if (wait > 0) { - logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`); - await sleep(wait); - continue; - } + if (wait > 0) { + logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`); + await sleep(wait); + continue; + } - // Reserve immediately to prevent concurrent pass-through - hostNextOkAt.set(host, now + minHostIntervalMs); - return; - } - } + // Reserve immediately to prevent concurrent pass-through + hostNextOkAt.set(host, now + minHostIntervalMs); + return; + } + } - function noteHost(url, extraDelayMs = 0) { - const host = hostFromUrl(url); - if (!host) return; + function noteHost(url, extraDelayMs = 0) { + const host = hostFromUrl(url); + if (!host) return; - const now = Date.now(); - const current = hostNextOkAt.get(host) || 0; + const now = Date.now(); + const current = hostNextOkAt.get(host) || 0; - // Extend (never shorten) any existing cooldown - const target = now + minHostIntervalMs + Math.max(0, extraDelayMs); - hostNextOkAt.set(host, Math.max(current, target)); + // Extend (never shorten) any existing cooldown + const target = now + minHostIntervalMs + Math.max(0, extraDelayMs); + hostNextOkAt.set(host, Math.max(current, target)); - logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`); - } + logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`); + } - async function fetchWithRetry( - url, - tag, - ua, - { mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {} - ) { - for (let attempt = 0; attempt <= maxRetries; attempt++) { - const reqId = ++reqSeq; - const start = Date.now(); + async function fetchWithRetry( + url, + tag, + ua, + { mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {}, + ) { + for (let attempt = 0; attempt <= maxRetries; attempt++) { + const reqId = ++reqSeq; + const start = Date.now(); - inflight++; - logger?.dbg?.(`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`); + inflight++; + logger?.dbg?.( + `REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`, + ); - const releaseHost = await acquireHost(url); + const releaseHost = await acquireHost(url); - try { - await throttleHost(url); + try { + await throttleHost(url); - const ctrl = new AbortController(); - const t = setTimeoutCb(() => ctrl.abort(), timeoutMs); + const ctrl = new AbortController(); + const t = setTimeoutCb(() => ctrl.abort(), timeoutMs); - const cookieHdr = - cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : ""; + const cookieHdr = + cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : ""; - const res = await fetch(url, { - method, - redirect: "follow", - headers: { - "user-agent": ua || defaultUa, - "accept-language": "en-US,en;q=0.9", - ...(mode === "text" - ? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" } - : { accept: "application/json, text/plain, */*" }), - ...(cookieHdr ? { cookie: cookieHdr } : {}), - ...headers, - }, - body, - signal: ctrl.signal, - }).finally(() => clearTimeout(t)); + const res = await fetch(url, { + method, + redirect: "follow", + headers: { + "user-agent": ua || defaultUa, + "accept-language": "en-US,en;q=0.9", + ...(mode === "text" + ? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" } + : { accept: "application/json, text/plain, */*" }), + ...(cookieHdr ? { cookie: cookieHdr } : {}), + ...headers, + }, + body, + signal: ctrl.signal, + }).finally(() => clearTimeout(t)); - const status = res.status; - const finalUrl = res.url || url; - const elapsed = Date.now() - start; + const status = res.status; + const finalUrl = res.url || url; + const elapsed = Date.now() - start; - // Always pace the host a bit after any response - noteHost(finalUrl); - if (cookies) cookieJar.storeFromResponse(url, res); + // Always pace the host a bit after any response + noteHost(finalUrl); + if (cookies) cookieJar.storeFromResponse(url, res); - logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`); + logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`); - if (status === 429) { - let raMs = retryAfterMs(res); + if (status === 429) { + let raMs = retryAfterMs(res); - // ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it) - if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000); + // ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it) + if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000); - noteHost(finalUrl, raMs); - logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`); - throw new RetryableError("HTTP 429"); - } + noteHost(finalUrl, raMs); + logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`); + throw new RetryableError("HTTP 429"); + } - if (status === 408 || (status >= 500 && status <= 599)) { - throw new RetryableError(`HTTP ${status}`); - } + if (status === 408 || (status >= 500 && status <= 599)) { + throw new RetryableError(`HTTP ${status}`); + } - if (status >= 400) { - const bodyTxt = await safeText(res); - throw new Error( - `HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}` - ); - } + if (status >= 400) { + const bodyTxt = await safeText(res); + throw new Error(`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`); + } - if (mode === "json") { - const txt = await res.text(); - let json; - try { - json = JSON.parse(txt); - } catch (e) { - throw new RetryableError(`Bad JSON: ${e?.message || e}`); - } - return { json, ms: elapsed, bytes: txt.length, status, finalUrl }; - } + if (mode === "json") { + const txt = await res.text(); + let json; + try { + json = JSON.parse(txt); + } catch (e) { + throw new RetryableError(`Bad JSON: ${e?.message || e}`); + } + return { json, ms: elapsed, bytes: txt.length, status, finalUrl }; + } - const text = await res.text(); - if (!text || text.length < 200) { - throw new RetryableError(`Short HTML bytes=${text.length}`); - } + const text = await res.text(); + if (!text || text.length < 200) { + throw new RetryableError(`Short HTML bytes=${text.length}`); + } - return { text, ms: elapsed, bytes: text.length, status, finalUrl }; - } catch (e) { - const retryable = isRetryable(e); - const host = hostFromUrl(url); - const nextOk = hostNextOkAt.get(host) || 0; + return { text, ms: elapsed, bytes: text.length, status, finalUrl }; + } catch (e) { + const retryable = isRetryable(e); + const host = hostFromUrl(url); + const nextOk = hostNextOkAt.get(host) || 0; - logger?.dbg?.( - `REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max( - 0, - nextOk - Date.now() - )}ms` - ); + logger?.dbg?.( + `REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max( + 0, + nextOk - Date.now(), + )}ms`, + ); - if (!retryable || attempt === maxRetries) throw e; + if (!retryable || attempt === maxRetries) throw e; - let delay = backoffMs(attempt); - if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now()); + let delay = backoffMs(attempt); + if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now()); - logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`); - await sleep(delay); - } finally { - releaseHost(); - inflight--; - logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`); - } - } + logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`); + await sleep(delay); + } finally { + releaseHost(); + inflight--; + logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`); + } + } - throw new Error("unreachable"); - } + throw new Error("unreachable"); + } - function fetchTextWithRetry(url, tag, ua, opts) { - return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) }); - } + function fetchTextWithRetry(url, tag, ua, opts) { + return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) }); + } - function fetchJsonWithRetry(url, tag, ua, opts) { - return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) }); - } + function fetchJsonWithRetry(url, tag, ua, opts) { + return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) }); + } - return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr }; + return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr }; } module.exports = { createHttpClient, RetryableError }; diff --git a/src/core/logger.js b/src/core/logger.js index a2cd0b9..afab36f 100644 --- a/src/core/logger.js +++ b/src/core/logger.js @@ -4,55 +4,55 @@ const { C, color } = require("../utils/ansi"); const { ts } = require("../utils/time"); function createLogger({ debug = false, colorize: wantColor = true } = {}) { - const isTTY = Boolean(process.stdout && process.stdout.isTTY); - const enabled = Boolean(wantColor && isTTY); + const isTTY = Boolean(process.stdout && process.stdout.isTTY); + const enabled = Boolean(wantColor && isTTY); - function ok(msg) { - console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg)); - } + function ok(msg) { + console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg)); + } - function warn(msg) { - console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg)); - } + function warn(msg) { + console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg)); + } - function err(msg) { - console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg)); - } + function err(msg) { + console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg)); + } - function info(msg) { - if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg)); - } + function info(msg) { + if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg)); + } - function dbg(msg) { - if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg)); - } + function dbg(msg) { + if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg)); + } - function dim(s) { - return color(s, C.dim, enabled); - } + function dim(s) { + return color(s, C.dim, enabled); + } - function bold(s) { - return color(s, C.bold, enabled); - } + function bold(s) { + return color(s, C.bold, enabled); + } - function paint(s, code) { - return color(s, code, enabled); - } + function paint(s, code) { + return color(s, code, enabled); + } - return { - debug, - isTTY, - colorize: enabled, - C, - ok, - warn, - err, - info, - dbg, - dim, - bold, - color: paint, - }; + return { + debug, + isTTY, + colorize: enabled, + C, + ok, + warn, + err, + info, + dbg, + dim, + bold, + color: paint, + }; } module.exports = { createLogger }; diff --git a/src/main.js b/src/main.js index fa5f793..835ceb8 100644 --- a/src/main.js +++ b/src/main.js @@ -16,189 +16,169 @@ const { runAllStores } = require("./tracker/run_all"); const { renderFinalReport } = require("./tracker/report"); const { ensureDir } = require("./tracker/db"); -const DEFAULT_UA = - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"; +const DEFAULT_UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"; function resolveDir(p, fallback) { - const v = String(p || "").trim(); - if (!v) return fallback; - return path.isAbsolute(v) ? v : path.join(process.cwd(), v); + const v = String(p || "").trim(); + if (!v) return fallback; + return path.isAbsolute(v) ? v : path.join(process.cwd(), v); } function getFlagValue(argv, flag) { - // Supports: - // --stores=a,b - // --stores a,b - const idx = argv.indexOf(flag); - if (idx >= 0) return argv[idx + 1] || ""; - const pref = `${flag}=`; - for (const a of argv) { - if (a.startsWith(pref)) return a.slice(pref.length); - } - return ""; + // Supports: + // --stores=a,b + // --stores a,b + const idx = argv.indexOf(flag); + if (idx >= 0) return argv[idx + 1] || ""; + const pref = `${flag}=`; + for (const a of argv) { + if (a.startsWith(pref)) return a.slice(pref.length); + } + return ""; } function normToken(s) { - return String(s || "") - .toLowerCase() - .trim() - .replace(/[^a-z0-9]+/g, ""); + return String(s || "") + .toLowerCase() + .trim() + .replace(/[^a-z0-9]+/g, ""); } function parseStoresFilter(raw) { - const v = String(raw || "").trim(); - if (!v) return []; - return v - .split(",") - .map((x) => x.trim()) - .filter(Boolean); + const v = String(raw || "").trim(); + if (!v) return []; + return v + .split(",") + .map((x) => x.trim()) + .filter(Boolean); } function filterStoresOrThrow(stores, wantedListRaw) { - const wanted = parseStoresFilter(wantedListRaw); - if (!wanted.length) return stores; + const wanted = parseStoresFilter(wantedListRaw); + if (!wanted.length) return stores; - const wantedNorm = wanted.map(normToken).filter(Boolean); + const wantedNorm = wanted.map(normToken).filter(Boolean); - const matched = []; - const missing = []; + const matched = []; + const missing = []; - for (let i = 0; i < wanted.length; i++) { - const w = wanted[i]; - const wn = wantedNorm[i]; - if (!wn) continue; + for (let i = 0; i < wanted.length; i++) { + const w = wanted[i]; + const wn = wantedNorm[i]; + if (!wn) continue; - // match against key/name/host (normalized) - const hit = stores.find((s) => { - const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean); - return candidates.includes(wn); - }); + // match against key/name/host (normalized) + const hit = stores.find((s) => { + const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean); + return candidates.includes(wn); + }); - if (hit) matched.push(hit); - else missing.push(w); - } + if (hit) matched.push(hit); + else missing.push(w); + } - if (missing.length) { - const avail = stores - .map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`) - .join(", "); - throw new Error( - `Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}` - ); - } + if (missing.length) { + const avail = stores.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`).join(", "); + throw new Error(`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`); + } - // de-dupe by key (in case name+key both matched) - const uniq = []; - const seen = new Set(); - for (const s of matched) { - if (seen.has(s.key)) continue; - seen.add(s.key); - uniq.push(s); - } - return uniq; + // de-dupe by key (in case name+key both matched) + const uniq = []; + const seen = new Set(); + for (const s of matched) { + if (seen.has(s.key)) continue; + seen.add(s.key); + uniq.push(s); + } + return uniq; } async function main() { - if (typeof fetch !== "function") { - throw new Error( - "Global fetch() not found. Please use Node.js 18+ (or newer). " - ); - } + if (typeof fetch !== "function") { + throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). "); + } - const argv = process.argv.slice(2); - const args = parseArgs(argv); + const argv = process.argv.slice(2); + const args = parseArgs(argv); - const logger = createLogger({ debug: args.debug, colorize: true }); + const logger = createLogger({ debug: args.debug, colorize: true }); - const config = { - debug: args.debug, - maxPages: args.maxPages, - concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), - staggerMs: - args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), - maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), - timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), - discoveryGuess: - args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), - discoveryStep: - args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), - categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), - defaultUa: DEFAULT_UA, - defaultParseProducts: parseProductsSierra, - dbDir: resolveDir( - args.dataDir ?? process.env.DATA_DIR, - path.join(process.cwd(), "data", "db") - ), - reportDir: resolveDir( - args.reportDir ?? process.env.REPORT_DIR, - path.join(process.cwd(), "reports") - ), - }; + const config = { + debug: args.debug, + maxPages: args.maxPages, + concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), + staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), + maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), + timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), + discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), + discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), + categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), + defaultUa: DEFAULT_UA, + defaultParseProducts: parseProductsSierra, + dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")), + reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")), + }; - ensureDir(config.dbDir); - ensureDir(config.reportDir); + ensureDir(config.dbDir); + ensureDir(config.reportDir); - const http = createHttpClient({ - maxRetries: config.maxRetries, - timeoutMs: config.timeoutMs, - defaultUa: config.defaultUa, - logger, - }); - const stores = createStores({ defaultUa: config.defaultUa }); + const http = createHttpClient({ + maxRetries: config.maxRetries, + timeoutMs: config.timeoutMs, + defaultUa: config.defaultUa, + logger, + }); + const stores = createStores({ defaultUa: config.defaultUa }); - const storesFilterRaw = - getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim(); + const storesFilterRaw = getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim(); - const storesToRun = filterStoresOrThrow(stores, storesFilterRaw); - if (storesFilterRaw) { - logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`); - } + const storesToRun = filterStoresOrThrow(stores, storesFilterRaw); + if (storesFilterRaw) { + logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`); + } - const report = await runAllStores(storesToRun, { config, logger, http }); + const report = await runAllStores(storesToRun, { config, logger, http }); - const meaningful = - (report?.totals?.newCount || 0) + - (report?.totals?.updatedCount || 0) + - (report?.totals?.removedCount || 0) + - (report?.totals?.restoredCount || 0) + - (report?.totals?.metaChangedCount || 0) > - 0; + const meaningful = + (report?.totals?.newCount || 0) + + (report?.totals?.updatedCount || 0) + + (report?.totals?.removedCount || 0) + + (report?.totals?.restoredCount || 0) + + (report?.totals?.metaChangedCount || 0) > + 0; - const reportTextColor = renderFinalReport(report, { - dbDir: config.dbDir, - colorize: logger.colorize, - }); - process.stdout.write(reportTextColor); + const reportTextColor = renderFinalReport(report, { + dbDir: config.dbDir, + colorize: logger.colorize, + }); + process.stdout.write(reportTextColor); - if (!meaningful) { - logger.ok("No meaningful changes; skipping report write."); - process.exitCode = 3; // special "no-op" code - return; - } + if (!meaningful) { + logger.ok("No meaningful changes; skipping report write."); + process.exitCode = 3; // special "no-op" code + return; + } - const reportTextPlain = renderFinalReport(report, { - dbDir: config.dbDir, - colorize: false, - }); - const file = path.join( - config.reportDir, - `${isoTimestampFileSafe(new Date())}.txt` - ); - try { - fs.writeFileSync(file, reportTextPlain, "utf8"); - logger.ok(`Report saved: ${logger.dim(file)}`); - } catch (e) { - logger.warn(`Report save failed: ${e?.message || e}`); - } + const reportTextPlain = renderFinalReport(report, { + dbDir: config.dbDir, + colorize: false, + }); + const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`); + try { + fs.writeFileSync(file, reportTextPlain, "utf8"); + logger.ok(`Report saved: ${logger.dim(file)}`); + } catch (e) { + logger.warn(`Report save failed: ${e?.message || e}`); + } } module.exports = { main }; if (require.main === module) { - main().catch((e) => { - const msg = e && e.stack ? e.stack : String(e); - // no logger here; keep simple - console.error(msg); - process.exitCode = 1; - }); + main().catch((e) => { + const msg = e && e.stack ? e.stack : String(e); + // no logger here; keep simple + console.error(msg); + process.exitCode = 1; + }); } diff --git a/src/stores/arc.js b/src/stores/arc.js index 5e4f35b..a45d36b 100644 --- a/src/stores/arc.js +++ b/src/stores/arc.js @@ -11,368 +11,374 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); const { addCategoryResultToReport } = require("../tracker/report"); function kbStr(bytes) { - return humanBytes(bytes || 0).padStart(8, " "); + return humanBytes(bytes || 0).padStart(8, " "); } function secStr(ms) { - const s = Number.isFinite(ms) ? ms / 1000 : 0; - const tenths = Math.round(s * 10) / 10; - const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`; - return out.padStart(7, " "); + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + const out = tenths < 10 ? `${tenths.toFixed(1)}s` : `${Math.round(s)}s`; + return out.padStart(7, " "); } function pageStr(i, total) { - const leftW = String(total).length; - return `${padLeft(i, leftW)}/${total}`; + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; } function pctStr(done, total) { - const pct = total ? Math.floor((done / total) * 100) : 0; - return `${padLeft(pct, 3)}%`; + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; } function toNum(v) { - const s = String(v ?? "").trim(); - if (!s) return NaN; - const n = Number(s.replace(/[^0-9.]/g, "")); - return Number.isFinite(n) ? n : NaN; + const s = String(v ?? "").trim(); + if (!s) return NaN; + const n = Number(s.replace(/[^0-9.]/g, "")); + return Number.isFinite(n) ? n : NaN; } function money(v) { - const n = toNum(v); - if (!Number.isFinite(n) || n <= 0) return ""; - return `$${n.toFixed(2)}`; + const n = toNum(v); + if (!Number.isFinite(n) || n <= 0) return ""; + return `$${n.toFixed(2)}`; } function pickBestPrice(p) { - const reg = toNum(p?.regular_price); - const sale = toNum(p?.sale_price); - const net = toNum(p?.net_price); + const reg = toNum(p?.regular_price); + const sale = toNum(p?.sale_price); + const net = toNum(p?.net_price); - // Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular. - if (Number.isFinite(sale) && sale > 0) { - if (p?.is_sale === true) return money(sale); - if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale); - // Some feeds put the current price in sale_price even without flags: - if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale); - } + // Prefer sale when it looks real (is_sale OR sale < regular), otherwise net, otherwise regular. + if (Number.isFinite(sale) && sale > 0) { + if (p?.is_sale === true) return money(sale); + if (Number.isFinite(reg) && reg > 0 && sale < reg) return money(sale); + // Some feeds put the current price in sale_price even without flags: + if (!Number.isFinite(net) || net <= 0 || sale <= net) return money(sale); + } - if (Number.isFinite(net) && net > 0) return money(net); - if (Number.isFinite(reg) && reg > 0) return money(reg); + if (Number.isFinite(net) && net > 0) return money(net); + if (Number.isFinite(reg) && reg > 0) return money(reg); - return ""; + return ""; } function normAbsUrl(raw, base) { - const s = String(raw || "").trim(); - if (!s) return ""; - if (s.startsWith("//")) return `https:${s}`; - if (/^https?:\/\//i.test(s)) return s; - try { - return new URL(s.replace(/^\/+/, ""), base).toString(); - } catch { - return s; - } + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s.replace(/^\/+/, ""), base).toString(); + } catch { + return s; + } } function isInStock(p) { - // Keep this strict: user asked "only show in stock items". - // available_for_sale is the strongest signal; on_hand is a good secondary signal. - if (p && p.available_for_sale === false) return false; + // Keep this strict: user asked "only show in stock items". + // available_for_sale is the strongest signal; on_hand is a good secondary signal. + if (p && p.available_for_sale === false) return false; - const onHand = Number(p?.on_hand); - if (Number.isFinite(onHand)) return onHand > 0; + const onHand = Number(p?.on_hand); + if (Number.isFinite(onHand)) return onHand > 0; - // If on_hand is missing, fall back to available_for_sale truthiness. - return Boolean(p?.available_for_sale); + // If on_hand is missing, fall back to available_for_sale truthiness. + return Boolean(p?.available_for_sale); } function arcNormalizeImg(raw) { - const s = String(raw || "").trim(); - if (!s) return ""; - - // already public - if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s; - - // site-relative -> public CDN - const noProto = s.replace(/^https?:\/\/[^/]+/i, ""); - const rel = noProto.replace(/^\/+/, ""); - - // common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg" - if (/^(custom\/|bc_lrs\/)/i.test(rel)) { - return `https://s.barnetnetwork.com/img/m/${rel}`; - } - - // fallback: if it's any path, still try the CDN - if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`; - - return ""; - } - - function arcItemToTracked(p, ctx) { - if (!p) return null; - if (!isInStock(p)) return null; - - const url = normAbsUrl(p.url, `https://${ctx.store.host}/`); - if (!url) return null; - - const name = cleanText(p.description || p.name || ""); - if (!name) return null; - - const price = pickBestPrice(p); - - const rawCspcId = String(p?.cspcid ?? "").trim(); -const hasCspcId = /^\d{1,11}$/.test(rawCspcId); + const s = String(raw || "").trim(); + if (!s) return ""; -const id = Number(p?.id); -const rawSku = - hasCspcId ? `id:${rawCspcId}` : - Number.isFinite(id) ? `id:${id}` : - ""; + // already public + if (/^https?:\/\/s\.barnetnetwork\.com\/img\/m\//i.test(s)) return s; -const sku = - normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || ""; + // site-relative -> public CDN + const noProto = s.replace(/^https?:\/\/[^/]+/i, ""); + const rel = noProto.replace(/^\/+/, ""); - - const img = arcNormalizeImg(p.image || p.image_url || p.img || ""); - - return { name, price, url, sku, img }; - } - + // common case: "custom/all/BC398280.png" OR "bc_lrs/000046/0000466854.jpg" + if (/^(custom\/|bc_lrs\/)/i.test(rel)) { + return `https://s.barnetnetwork.com/img/m/${rel}`; + } + + // fallback: if it's any path, still try the CDN + if (rel && !/^data:/i.test(rel)) return `https://s.barnetnetwork.com/img/m/${rel}`; + + return ""; +} + +function arcItemToTracked(p, ctx) { + if (!p) return null; + if (!isInStock(p)) return null; + + const url = normAbsUrl(p.url, `https://${ctx.store.host}/`); + if (!url) return null; + + const name = cleanText(p.description || p.name || ""); + if (!name) return null; + + const price = pickBestPrice(p); + + const rawCspcId = String(p?.cspcid ?? "").trim(); + const hasCspcId = /^\d{1,11}$/.test(rawCspcId); + + const id = Number(p?.id); + const rawSku = hasCspcId ? `id:${rawCspcId}` : Number.isFinite(id) ? `id:${id}` : ""; + + const sku = normalizeSkuKey(rawSku, { storeLabel: ctx?.store?.name, url }) || rawSku || ""; + + const img = arcNormalizeImg(p.image || p.image_url || p.img || ""); + + return { name, price, url, sku, img }; +} function parseCategoryParamsFromStartUrl(startUrl) { - try { - const u = new URL(startUrl); - const category = u.searchParams.get("category") || ""; - const sub = u.searchParams.get("sub_category") || ""; - return { category, sub }; - } catch { - return { category: "", sub: "" }; - } + try { + const u = new URL(startUrl); + const category = u.searchParams.get("category") || ""; + const sub = u.searchParams.get("sub_category") || ""; + return { category, sub }; + } catch { + return { category: "", sub: "" }; + } } function avoidMassRemoval(prevDb, discovered, ctx, reason) { - const prevSize = prevDb?.byUrl?.size || 0; - const discSize = discovered?.size || 0; + const prevSize = prevDb?.byUrl?.size || 0; + const discSize = discovered?.size || 0; - if (prevSize <= 0 || discSize <= 0) return false; + if (prevSize <= 0 || discSize <= 0) return false; - const ratio = discSize / Math.max(1, prevSize); - if (ratio >= 0.6) return false; + const ratio = discSize / Math.max(1, prevSize); + if (ratio >= 0.6) return false; - ctx.logger.warn?.( - `${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).` - ); + ctx.logger.warn?.( + `${ctx.catPrefixOut} | ARC partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`, + ); - // Preserve prior active items not seen this run. - for (const [u, it] of prevDb.byUrl.entries()) { - if (!it || it.removed) continue; - if (!discovered.has(u)) discovered.set(u, it); - } - return true; + // Preserve prior active items not seen this run. + for (const [u, it] of prevDb.byUrl.entries()) { + if (!it || it.removed) continue; + if (!discovered.has(u)) discovered.set(u, it); + } + return true; } async function scanCategoryArcApi(ctx, prevDb, report) { - const t0 = Date.now(); - - // Warm cookies / session (Barnet-based shops sometimes need this) - try { - await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua); - } catch (_) {} - - const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl); - const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim(); - const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim(); - - if (!subCategory) { - ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`); - return; - } - - const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`); - const discovered = new Map(); - - const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; - const hardCap = Math.min(5000, Math.max(1, maxPagesCap)); - - let donePages = 0; - let aborted = false; - - // Pagination safety - let pageSize = 0; // inferred from first non-empty page - const seenPageFingerprints = new Set(); - let stagnantPages = 0; - - for (let page = 1; page <= hardCap; page++) { - const u = new URL(apiBase.toString()); - u.searchParams.set("p", String(page)); - u.searchParams.set("show_on_web", "true"); - u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc")); - u.searchParams.set("category", category); - u.searchParams.set("sub_category", subCategory); - u.searchParams.set("varital_name", ""); - u.searchParams.set("no_item_found", "No item found."); - u.searchParams.set("avail_for_sale", "false"); - u.searchParams.set("_dc", String(Date.now())); - - let r; - try { - r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, { - method: "GET", - headers: { - Accept: "application/json, */*", - "X-Requested-With": "XMLHttpRequest", - Referer: ctx.cat.startUrl, - }, - }); - } catch (e) { - ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`); - aborted = true; - break; - } - - const arr = Array.isArray(r?.json?.items) ? r.json.items : []; - donePages++; - - const rawCount = arr.length; - - // Log early (even for empty) - ctx.logger.ok( - `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd( - 3 - )} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( - ctx.http.inflightStr(), - 11 - )} | ${secStr(r.ms)}` - ); - - if (!rawCount) break; - - // Infer page size from first non-empty page - if (!pageSize) pageSize = rawCount; - - // Detect wrap/repeat: fingerprint by ids+urls (stable enough) - const fp = arr - .map((p) => `${p?.id || ""}:${p?.url || ""}`) - .sort() - .join("|"); - if (fp && seenPageFingerprints.has(fp)) { - ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`); - break; - } - if (fp) seenPageFingerprints.add(fp); - - const before = discovered.size; - - let kept = 0; - for (const p of arr) { - const it = arcItemToTracked(p, ctx); - if (!it) continue; - discovered.set(it.url, it); - kept++; - } - - // Re-log with kept filled in (overwrite-style isn’t possible; just emit a second line) - ctx.logger.ok( - `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd( - 3 - )} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( - ctx.http.inflightStr(), - 11 - )} | ${secStr(r.ms)}` - ); - - // Stop condition #1: last page (short page) - if (pageSize && rawCount < pageSize) break; - - // Stop condition #2: no new uniques for 2 pages (safety) - if (discovered.size === before) stagnantPages++; - else stagnantPages = 0; - - if (stagnantPages >= 2) { - ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`); - break; - } - } - - if (aborted) { - avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`); - } - - ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); - - const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = - mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); - - const dbObj = buildDbObject(ctx, merged); - writeJsonAtomic(ctx.dbFile, dbObj); - - ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); - - const elapsedMs = Date.now() - t0; - ctx.logger.ok( - `${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}` - ); - - report.categories.push({ - store: ctx.store.name, - label: ctx.cat.label, - key: ctx.cat.key, - dbFile: ctx.dbFile, - scannedPages: Math.max(1, donePages), - discoveredUnique: discovered.size, - newCount: newItems.length, - updatedCount: updatedItems.length, - removedCount: removedItems.length, - restoredCount: restoredItems.length, - metaChangedCount: metaChangedItems.length, - elapsedMs, - }); - report.totals.newCount += newItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.removedCount += removedItems.length; - report.totals.restoredCount += restoredItems.length; - report.totals.metaChangedCount += metaChangedItems.length; - - addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); - } - + const t0 = Date.now(); + + // Warm cookies / session (Barnet-based shops sometimes need this) + try { + await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `arc:warm:${ctx.cat.key}`, ctx.store.ua); + } catch (_) {} + + const { category: urlCat, sub: urlSub } = parseCategoryParamsFromStartUrl(ctx.cat.startUrl); + const category = String(ctx.cat.arcCategory || urlCat || "Spirits").trim(); + const subCategory = String(ctx.cat.arcSubCategory || urlSub || "").trim(); + + if (!subCategory) { + ctx.logger.warn(`${ctx.catPrefixOut} | ARC missing sub_category; skipping scan.`); + return; + } + + const apiBase = new URL(`https://${ctx.store.host}/api/shop/${ctx.store.shopId}/products`); + const discovered = new Map(); + + const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; + const hardCap = Math.min(5000, Math.max(1, maxPagesCap)); + + let donePages = 0; + let aborted = false; + + // Pagination safety + let pageSize = 0; // inferred from first non-empty page + const seenPageFingerprints = new Set(); + let stagnantPages = 0; + + for (let page = 1; page <= hardCap; page++) { + const u = new URL(apiBase.toString()); + u.searchParams.set("p", String(page)); + u.searchParams.set("show_on_web", "true"); + u.searchParams.set("sort_by", String(ctx.cat.sortBy || "price_desc")); + u.searchParams.set("category", category); + u.searchParams.set("sub_category", subCategory); + u.searchParams.set("varital_name", ""); + u.searchParams.set("no_item_found", "No item found."); + u.searchParams.set("avail_for_sale", "false"); + u.searchParams.set("_dc", String(Date.now())); + + let r; + try { + r = await ctx.http.fetchJsonWithRetry(u.toString(), `arc:api:${ctx.cat.key}:p${page}`, ctx.store.ua, { + method: "GET", + headers: { + Accept: "application/json, */*", + "X-Requested-With": "XMLHttpRequest", + Referer: ctx.cat.startUrl, + }, + }); + } catch (e) { + ctx.logger.warn(`${ctx.catPrefixOut} | ARC API page ${page} failed: ${e?.message || e}`); + aborted = true; + break; + } + + const arr = Array.isArray(r?.json?.items) ? r.json.items : []; + donePages++; + + const rawCount = arr.length; + + // Log early (even for empty) + ctx.logger.ok( + `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "") + .toString() + .padEnd(3)} | raw=${padLeft(rawCount, 3)} kept=${padLeft(0, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( + ctx.http.inflightStr(), + 11, + )} | ${secStr(r.ms)}`, + ); + + if (!rawCount) break; + + // Infer page size from first non-empty page + if (!pageSize) pageSize = rawCount; + + // Detect wrap/repeat: fingerprint by ids+urls (stable enough) + const fp = arr + .map((p) => `${p?.id || ""}:${p?.url || ""}`) + .sort() + .join("|"); + if (fp && seenPageFingerprints.has(fp)) { + ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination repeated at p=${page}; stopping.`); + break; + } + if (fp) seenPageFingerprints.add(fp); + + const before = discovered.size; + + let kept = 0; + for (const p of arr) { + const it = arcItemToTracked(p, ctx); + if (!it) continue; + discovered.set(it.url, it); + kept++; + } + + // Re-log with kept filled in (overwrite-style isn’t possible; just emit a second line) + ctx.logger.ok( + `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "") + .toString() + .padEnd( + 3, + )} | raw=${padLeft(rawCount, 3)} kept=${padLeft(kept, 3)} | bytes=${kbStr(r.bytes)} | ${padRight( + ctx.http.inflightStr(), + 11, + )} | ${secStr(r.ms)}`, + ); + + // Stop condition #1: last page (short page) + if (pageSize && rawCount < pageSize) break; + + // Stop condition #2: no new uniques for 2 pages (safety) + if (discovered.size === before) stagnantPages++; + else stagnantPages = 0; + + if (stagnantPages >= 2) { + ctx.logger.warn(`${ctx.catPrefixOut} | ARC pagination stalled (no new items); stopping.`); + break; + } + } + + if (aborted) { + avoidMassRemoval(prevDb, discovered, ctx, `api pages=${donePages} sub=${subCategory}`); + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } = mergeDiscoveredIntoDb( + prevDb, + discovered, + { storeLabel: ctx.store.name }, + ); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsedMs = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsedMs)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Meta=${metaChangedItems.length} Total(DB)=${merged.size}`, + ); + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: Math.max(1, donePages), + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + metaChangedCount: metaChangedItems.length, + elapsedMs, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + report.totals.metaChangedCount += metaChangedItems.length; + + addCategoryResultToReport( + report, + ctx.store.name, + ctx.cat.label, + newItems, + updatedItems, + removedItems, + restoredItems, + ); +} function createStore(defaultUa) { - return { - key: "arc", - name: "ARC Liquor", - host: "kelownaharveyave.armstrong.coop", - shopId: "644-290", - ua: defaultUa, - scanCategory: scanCategoryArcApi, - categories: [ - { - key: "spirits-rum", - label: "Spirits - Rum", - startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum", - arcCategory: "Spirits", - arcSubCategory: "Rum", - sortBy: "price_desc", - }, - { - key: "spirits-scotch", - label: "Spirits - Scotch", - startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch", - arcCategory: "Spirits", - arcSubCategory: "Scotch", - sortBy: "price_desc", - }, - { - key: "spirits-whiskey", - label: "Spirits - Whiskey", - startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey", - arcCategory: "Spirits", - arcSubCategory: "Whiskey", - sortBy: "price_desc", - }, - ], - }; + return { + key: "arc", + name: "ARC Liquor", + host: "kelownaharveyave.armstrong.coop", + shopId: "644-290", + ua: defaultUa, + scanCategory: scanCategoryArcApi, + categories: [ + { + key: "spirits-rum", + label: "Spirits - Rum", + startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Rum", + arcCategory: "Spirits", + arcSubCategory: "Rum", + sortBy: "price_desc", + }, + { + key: "spirits-scotch", + label: "Spirits - Scotch", + startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Scotch", + arcCategory: "Spirits", + arcSubCategory: "Scotch", + sortBy: "price_desc", + }, + { + key: "spirits-whiskey", + label: "Spirits - Whiskey", + startUrl: "https://kelownaharveyave.armstrong.coop/products?category=Spirits&sub_category=Whiskey", + arcCategory: "Spirits", + arcSubCategory: "Whiskey", + sortBy: "price_desc", + }, + ], + }; } module.exports = { createStore }; diff --git a/src/stores/bcl.js b/src/stores/bcl.js index 0f0d0a7..afee38e 100644 --- a/src/stores/bcl.js +++ b/src/stores/bcl.js @@ -9,332 +9,360 @@ const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); const { addCategoryResultToReport } = require("../tracker/report"); function kbStr(bytes) { - return humanBytes(bytes).padStart(8, " "); + return humanBytes(bytes).padStart(8, " "); } function secStr(ms) { - const s = Number.isFinite(ms) ? ms / 1000 : 0; - const tenths = Math.round(s * 10) / 10; - let out; - if (tenths < 10) out = `${tenths.toFixed(1)}s`; - else out = `${Math.round(s)}s`; - return out.padStart(7, " "); + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); } function pageStr(i, total) { - const leftW = String(total).length; - return `${padLeft(i, leftW)}/${total}`; + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; } function pctStr(done, total) { - const pct = total ? Math.floor((done / total) * 100) : 0; - return `${padLeft(pct, 3)}%`; + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; } function cad(n) { - const x = Number(n); - if (!Number.isFinite(x)) return ""; - return `$${x.toFixed(2)}`; + const x = Number(n); + if (!Number.isFinite(x)) return ""; + return `$${x.toFixed(2)}`; } function asNumber(n) { - if (n == null) return NaN; - if (typeof n === "number") return n; - const t = String(n).trim(); - if (!t) return NaN; - const x = Number(t.replace(/[^0-9.]/g, "")); - return x; + if (n == null) return NaN; + if (typeof n === "number") return n; + const t = String(n).trim(); + if (!t) return NaN; + const x = Number(t.replace(/[^0-9.]/g, "")); + return x; } function bclTotalHits(json) { - const t = json?.hits?.total; - if (typeof t === "number") return t; - if (t && typeof t.value === "number") return t.value; // ES-style - return 0; + const t = json?.hits?.total; + if (typeof t === "number") return t; + if (t && typeof t.value === "number") return t.value; // ES-style + return 0; } function bclIsInStock(src) { - const candidates = [ - src?.availability_override, // <-- add this - src?.availability, - src?.availabilityText, - src?.availabilityStatus, - src?.availability_status, - src?.stockStatus, - src?.stock_status, - src?.status, - src?.statusText, - ] - .map((v) => (v == null ? "" : String(v))) - .filter(Boolean); + const candidates = [ + src?.availability_override, // <-- add this + src?.availability, + src?.availabilityText, + src?.availabilityStatus, + src?.availability_status, + src?.stockStatus, + src?.stock_status, + src?.status, + src?.statusText, + ] + .map((v) => (v == null ? "" : String(v))) + .filter(Boolean); - for (const s of candidates) { - if (/out of stock/i.test(s)) return false; - if (/\bin stock\b/i.test(s)) return true; - if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07" - } + for (const s of candidates) { + if (/out of stock/i.test(s)) return false; + if (/\bin stock\b/i.test(s)) return true; + if (/\bavailable\b/i.test(s)) return true; // "Available Feb 07" + } - const units = Number(src?.availableUnits); - if (Number.isFinite(units)) return units > 0; + const units = Number(src?.availableUnits); + if (Number.isFinite(units)) return units > 0; - return true; + return true; } function bclNormalizeAbsUrl(raw) { - const s = String(raw || "").trim(); - if (!s) return ""; - if (s.startsWith("//")) return `https:${s}`; - if (/^https?:\/\//i.test(s)) return s; - try { - return new URL(s, "https://www.bcliquorstores.com/").toString(); - } catch { - return s; - } + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s, "https://www.bcliquorstores.com/").toString(); + } catch { + return s; + } } function bclPickImage(src) { - const cands = [ - src?.imageUrl, - src?.imageURL, - src?.image, - src?.thumbnail, - src?.thumbnailUrl, - src?.thumbnailURL, - src?.primaryImage, - src?.primaryImageUrl, - ]; + const cands = [ + src?.imageUrl, + src?.imageURL, + src?.image, + src?.thumbnail, + src?.thumbnailUrl, + src?.thumbnailURL, + src?.primaryImage, + src?.primaryImageUrl, + ]; - for (const c of cands) { - if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c); - } + for (const c of cands) { + if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c); + } - const arrs = [src?.images, src?.imageUrls, src?.image_urls]; - for (const a of arrs) { - if (!Array.isArray(a) || !a.length) continue; - const v = a[0]; - if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v); - if (v && typeof v === "object") { - const s = String(v.src || v.url || "").trim(); - if (s) return bclNormalizeAbsUrl(s); - } - } + const arrs = [src?.images, src?.imageUrls, src?.image_urls]; + for (const a of arrs) { + if (!Array.isArray(a) || !a.length) continue; + const v = a[0]; + if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v); + if (v && typeof v === "object") { + const s = String(v.src || v.url || "").trim(); + if (s) return bclNormalizeAbsUrl(s); + } + } - return ""; + return ""; } function bclHitToItem(hit) { - const src = hit?._source || null; - if (!src) return null; + const src = hit?._source || null; + if (!src) return null; - const skuRaw = src.sku != null ? String(src.sku).trim() : ""; - if (!skuRaw) return null; + const skuRaw = src.sku != null ? String(src.sku).trim() : ""; + if (!skuRaw) return null; - // SKU in URL (requested) - const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`; + // SKU in URL (requested) + const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`; - const name = String(src.name || "").trim(); - if (!name) return null; + const name = String(src.name || "").trim(); + if (!name) return null; - // Sale support: pick currentPrice when present; otherwise regularPrice. - const current = asNumber(src.currentPrice); - const regular = asNumber(src.regularPrice); - const price = cad(Number.isFinite(current) ? current : regular); + // Sale support: pick currentPrice when present; otherwise regularPrice. + const current = asNumber(src.currentPrice); + const regular = asNumber(src.regularPrice); + const price = cad(Number.isFinite(current) ? current : regular); - // SKU key: - // - Keep CSPC 6-digit when present (rare for BCL, but safe) - // - Otherwise upgrade to an explicit soft key: id: - // - // ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id: - // only fall back to raw (NOT u:) if it’s genuinely non-numeric. - let sku = normalizeCspc(skuRaw); - if (!sku) { - const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc. - sku = m ? `id:${m[0]}` : `id:${skuRaw}`; - } + // SKU key: + // - Keep CSPC 6-digit when present (rare for BCL, but safe) + // - Otherwise upgrade to an explicit soft key: id: + // + // ✅ PATCH: handle tiny SKUs too (3/4/5-digit) by forcing id: + // only fall back to raw (NOT u:) if it’s genuinely non-numeric. + let sku = normalizeCspc(skuRaw); + if (!sku) { + const m = skuRaw.match(/^\d{1,6}$/); // BCL product IDs like 141, 596, 984, 117, etc. + sku = m ? `id:${m[0]}` : `id:${skuRaw}`; + } - const inStock = bclIsInStock(src); - if (!inStock) return null; + const inStock = bclIsInStock(src); + if (!inStock) return null; - // ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs. - // Also use https. - const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent( - skuRaw - )}.jpg`; + // ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs. + // Also use https. + const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent( + skuRaw, + )}.jpg`; - return { name, price, url, sku, img }; + return { name, price, url, sku, img }; } async function bclFetchBrowsePage(ctx, page1, size) { - const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey" - const category = "spirits"; - const sort = "featuredProducts:desc"; + const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey" + const category = "spirits"; + const sort = "featuredProducts:desc"; - const u = new URL("https://www.bcliquorstores.com/ajax/browse"); - u.searchParams.set("category", category); - u.searchParams.set("type", type); - u.searchParams.set("sort", sort); - u.searchParams.set("size", String(size)); - u.searchParams.set("page", String(page1)); + const u = new URL("https://www.bcliquorstores.com/ajax/browse"); + u.searchParams.set("category", category); + u.searchParams.set("type", type); + u.searchParams.set("sort", sort); + u.searchParams.set("size", String(size)); + u.searchParams.set("page", String(page1)); - const referer = - `https://www.bcliquorstores.com/product-catalogue?` + - `category=${encodeURIComponent(category)}` + - `&type=${encodeURIComponent(type)}` + - `&sort=${encodeURIComponent(sort)}` + - `&page=${encodeURIComponent(String(page1))}`; + const referer = + `https://www.bcliquorstores.com/product-catalogue?` + + `category=${encodeURIComponent(category)}` + + `&type=${encodeURIComponent(type)}` + + `&sort=${encodeURIComponent(sort)}` + + `&page=${encodeURIComponent(String(page1))}`; - return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, { - method: "GET", - headers: { - Accept: "application/json, text/plain, */*", - Referer: referer, - Origin: "https://www.bcliquorstores.com", - }, - }); + return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, { + method: "GET", + headers: { + Accept: "application/json, text/plain, */*", + Referer: referer, + Origin: "https://www.bcliquorstores.com", + }, + }); } async function scanCategoryBCLAjax(ctx, prevDb, report) { - const t0 = Date.now(); - const size = 24; + const t0 = Date.now(); + const size = 24; - let first; - try { - first = await bclFetchBrowsePage(ctx, 1, size); - } catch (e) { - ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`); + let first; + try { + first = await bclFetchBrowsePage(ctx, 1, size); + } catch (e) { + ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`); - const discovered = new Map(); - const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); - const dbObj = buildDbObject(ctx, merged); - writeJsonAtomic(ctx.dbFile, dbObj); + const discovered = new Map(); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb( + prevDb, + discovered, + { storeLabel: ctx.store.name }, + ); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); - const elapsed = Date.now() - t0; - report.categories.push({ - store: ctx.store.name, - label: ctx.cat.label, - key: ctx.cat.key, - dbFile: ctx.dbFile, - scannedPages: 1, - discoveredUnique: 0, - newCount: newItems.length, - updatedCount: updatedItems.length, - removedCount: removedItems.length, - restoredCount: restoredItems.length, - elapsedMs: elapsed, - }); - report.totals.newCount += newItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.removedCount += removedItems.length; - report.totals.restoredCount += restoredItems.length; - addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); - return; - } + const elapsed = Date.now() - t0; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: 1, + discoveredUnique: 0, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + addCategoryResultToReport( + report, + ctx.store.name, + ctx.cat.label, + newItems, + updatedItems, + removedItems, + restoredItems, + ); + return; + } - const total = bclTotalHits(first?.json); - const totalPages = Math.max(1, Math.ceil(total / size)); - const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); + const total = bclTotalHits(first?.json); + const totalPages = Math.max(1, Math.ceil(total / size)); + const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); - ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); + ctx.logger.ok( + `${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`, + ); - const pageNums = []; - for (let p = 1; p <= scanPages; p++) pageNums.push(p); + const pageNums = []; + for (let p = 1; p <= scanPages; p++) pageNums.push(p); - let donePages = 0; + let donePages = 0; - const perPageItems = await require("../utils/async").parallelMapStaggered( - pageNums, - ctx.config.concurrency, - ctx.config.staggerMs, - async (page1, idx) => { - const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size); - const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : []; + const perPageItems = await require("../utils/async").parallelMapStaggered( + pageNums, + ctx.config.concurrency, + ctx.config.staggerMs, + async (page1, idx) => { + const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size); + const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : []; - const items = []; - for (const h of hits) { - const it = bclHitToItem(h); - if (it) items.push(it); - } + const items = []; + for (const h of hits) { + const it = bclHitToItem(h); + if (it) items.push(it); + } - donePages++; - ctx.logger.ok( - `${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft( - items.length, - 3 - )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` - ); + donePages++; + ctx.logger.ok( + `${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft( + items.length, + 3, + )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`, + ); - return items; - } - ); + return items; + }, + ); - const discovered = new Map(); - let dups = 0; - for (const arr of perPageItems) { - for (const it of arr) { - if (discovered.has(it.url)) dups++; - discovered.set(it.url, it); - } - } + const discovered = new Map(); + let dups = 0; + for (const arr of perPageItems) { + for (const it of arr) { + if (discovered.has(it.url)) dups++; + discovered.set(it.url, it); + } + } - ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); + ctx.logger.ok( + `${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`, + ); - const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { + storeLabel: ctx.store.name, + }); - const dbObj = buildDbObject(ctx, merged); - writeJsonAtomic(ctx.dbFile, dbObj); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); - ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); - const elapsed = Date.now() - t0; - ctx.logger.ok( - `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` - ); + const elapsed = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`, + ); - report.categories.push({ - store: ctx.store.name, - label: ctx.cat.label, - key: ctx.cat.key, - dbFile: ctx.dbFile, - scannedPages: scanPages, - discoveredUnique: discovered.size, - newCount: newItems.length, - updatedCount: updatedItems.length, - removedCount: removedItems.length, - restoredCount: restoredItems.length, - elapsedMs: elapsed, - }); - report.totals.newCount += newItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.removedCount += removedItems.length; - report.totals.restoredCount += restoredItems.length; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: scanPages, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; - addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); + addCategoryResultToReport( + report, + ctx.store.name, + ctx.cat.label, + newItems, + updatedItems, + removedItems, + restoredItems, + ); } function createStore(defaultUa) { - return { - key: "bcl", - name: "BCL", - host: "www.bcliquorstores.com", - ua: defaultUa, - scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse) - categories: [ - { - key: "whisky", - label: "Whisky / Whiskey", - // informational only; scan uses ajax/browse - startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1", - bclType: "whisky / whiskey", - }, - { - key: "rum", - label: "Rum", - startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1", - bclType: "rum", - }, - ], - }; + return { + key: "bcl", + name: "BCL", + host: "www.bcliquorstores.com", + ua: defaultUa, + scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse) + categories: [ + { + key: "whisky", + label: "Whisky / Whiskey", + // informational only; scan uses ajax/browse + startUrl: + "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1", + bclType: "whisky / whiskey", + }, + { + key: "rum", + label: "Rum", + startUrl: + "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1", + bclType: "rum", + }, + ], + }; } module.exports = { createStore }; diff --git a/src/stores/bsw.js b/src/stores/bsw.js index 2da2523..445fd3d 100644 --- a/src/stores/bsw.js +++ b/src/stores/bsw.js @@ -14,380 +14,416 @@ const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2"; const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`; function usd(n) { - if (!Number.isFinite(n)) return ""; - return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`; + if (!Number.isFinite(n)) return ""; + return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`; } function bswExtractCollectionIdFromHtml(html) { - const s = String(html || ""); - const patterns = [ - /collection_ids%3A(\d{6,})/i, - /collection_ids\s*:\s*(\d{6,})/i, - /"collection_ids"\s*:\s*(\d{6,})/i, - /"collection_id"\s*:\s*(\d{6,})/i, - /collection_id\s*=\s*(\d{6,})/i, - /collectionId["']?\s*[:=]\s*["']?(\d{6,})/i, - /data-collection-id=["'](\d{6,})["']/i, - ]; - for (const re of patterns) { - const m = s.match(re); - if (m && m[1]) return Number.parseInt(m[1], 10); - } - return null; + const s = String(html || ""); + const patterns = [ + /collection_ids%3A(\d{6,})/i, + /collection_ids\s*:\s*(\d{6,})/i, + /"collection_ids"\s*:\s*(\d{6,})/i, + /"collection_id"\s*:\s*(\d{6,})/i, + /collection_id\s*=\s*(\d{6,})/i, + /collectionId["']?\s*[:=]\s*["']?(\d{6,})/i, + /data-collection-id=["'](\d{6,})["']/i, + ]; + for (const re of patterns) { + const m = s.match(re); + if (m && m[1]) return Number.parseInt(m[1], 10); + } + return null; } function bswFormatPrice(value, hintCents) { - if (value === null || value === undefined) return ""; + if (value === null || value === undefined) return ""; - if (typeof value === "string") { - const t = value.trim(); - if (!t) return ""; - if (t.includes("$")) return t.replace(/\s+/g, ""); - const n = Number(t.replace(/[^0-9.]/g, "")); - if (!Number.isFinite(n)) return t; - return usd(n); - } + if (typeof value === "string") { + const t = value.trim(); + if (!t) return ""; + if (t.includes("$")) return t.replace(/\s+/g, ""); + const n = Number(t.replace(/[^0-9.]/g, "")); + if (!Number.isFinite(n)) return t; + return usd(n); + } - if (typeof value === "number") { - let n = value; + if (typeof value === "number") { + let n = value; - if (hintCents) n = n / 100; - else if (Number.isInteger(n) && n >= 100000) n = n / 100; + if (hintCents) n = n / 100; + else if (Number.isInteger(n) && n >= 100000) n = n / 100; - return usd(n); - } + return usd(n); + } - return ""; + return ""; } function bswPickPrice(hit) { - const pick = (val, cents) => ({ val, cents }); + const pick = (val, cents) => ({ val, cents }); - if (hit && hit.price_cents != null) return pick(hit.price_cents, true); - if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true); + if (hit && hit.price_cents != null) return pick(hit.price_cents, true); + if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true); - if (hit && hit.price != null) return pick(hit.price, false); - if (hit && hit.price_min != null) return pick(hit.price_min, false); - if (hit && hit.priceMin != null) return pick(hit.priceMin, false); - if (hit && hit.min_price != null) return pick(hit.min_price, false); - if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false); + if (hit && hit.price != null) return pick(hit.price, false); + if (hit && hit.price_min != null) return pick(hit.price_min, false); + if (hit && hit.priceMin != null) return pick(hit.priceMin, false); + if (hit && hit.min_price != null) return pick(hit.min_price, false); + if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false); - if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) { - const v = hit.variants[0]; - if (v.price_cents != null) return pick(v.price_cents, true); - if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true); - if (v.price != null) return pick(v.price, false); - } + if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) { + const v = hit.variants[0]; + if (v.price_cents != null) return pick(v.price_cents, true); + if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true); + if (v.price != null) return pick(v.price, false); + } - return pick(null, false); + return pick(null, false); } - function bswHitToItem(hit) { - const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || "")); - const handle = hit && (hit.handle || hit.product_handle || hit.slug || ""); - const url = - (hit && (hit.url || hit.product_url)) || - (handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : ""); + const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || "")); + const handle = hit && (hit.handle || hit.product_handle || hit.slug || ""); + const url = + (hit && (hit.url || hit.product_url)) || + (handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : ""); - const { val: priceVal, cents: hintCents } = bswPickPrice(hit); - const price = bswFormatPrice(priceVal, hintCents); + const { val: priceVal, cents: hintCents } = bswPickPrice(hit); + const price = bswFormatPrice(priceVal, hintCents); - const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || ""); + const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || ""); - const img = bswPickImage(hit); + const img = bswPickImage(hit); - if (!name || !url) return null; - return { name, price, url, sku, img }; + if (!name || !url) return null; + return { name, price, url, sku, img }; } async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) { - const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`; + const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`; - const params = - `facets=%5B%22price%22%2C%22*%22%5D` + - `&filters=${encodeURIComponent(filtersExpr)}` + - `&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` + - `&page=${encodeURIComponent(String(page0))}` + - `&query=` + - `&clickAnalytics=true` + - `&maxValuesPerFacet=100` + - (ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : ""); + const params = + `facets=%5B%22price%22%2C%22*%22%5D` + + `&filters=${encodeURIComponent(filtersExpr)}` + + `&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` + + `&page=${encodeURIComponent(String(page0))}` + + `&query=` + + `&clickAnalytics=true` + + `&maxValuesPerFacet=100` + + (ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : ""); - const bodyObj = { requests: [{ indexName: "shopify_products", params }] }; + const bodyObj = { requests: [{ indexName: "shopify_products", params }] }; - return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, { - method: "POST", - headers: { - Accept: "*/*", - "content-type": "application/x-www-form-urlencoded", - Origin: "https://www.bswliquor.com", - Referer: "https://www.bswliquor.com/", - "x-algolia-api-key": BSW_ALGOLIA_API_KEY, - "x-algolia-application-id": BSW_ALGOLIA_APP_ID, - }, - body: JSON.stringify(bodyObj), - }); + return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, { + method: "POST", + headers: { + Accept: "*/*", + "content-type": "application/x-www-form-urlencoded", + Origin: "https://www.bswliquor.com", + Referer: "https://www.bswliquor.com/", + "x-algolia-api-key": BSW_ALGOLIA_API_KEY, + "x-algolia-application-id": BSW_ALGOLIA_APP_ID, + }, + body: JSON.stringify(bodyObj), + }); } function kbStr(bytes) { - return humanBytes(bytes).padStart(8, " "); + return humanBytes(bytes).padStart(8, " "); } function secStr(ms) { - const s = Number.isFinite(ms) ? ms / 1000 : 0; - const tenths = Math.round(s * 10) / 10; - let out; - if (tenths < 10) out = `${tenths.toFixed(1)}s`; - else out = `${Math.round(s)}s`; - return out.padStart(7, " "); + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); } function pageStr(i, total) { - const leftW = String(total).length; - return `${padLeft(i, leftW)}/${total}`; + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; } function pctStr(done, total) { - const pct = total ? Math.floor((done / total) * 100) : 0; - return `${padLeft(pct, 3)}%`; + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; } function bswNormalizeAbsUrl(raw) { - const s = String(raw || "").trim(); - if (!s) return ""; - if (s.startsWith("//")) return `https:${s}`; - if (/^https?:\/\//i.test(s)) return s; - try { - return new URL(s, "https://www.bswliquor.com/").toString(); - } catch { - return s; - } + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s, "https://www.bswliquor.com/").toString(); + } catch { + return s; + } } function bswNormalizeImg(v) { - if (!v) return ""; - if (typeof v === "string") return bswNormalizeAbsUrl(v); - if (typeof v === "object") { - const cands = [ - v.src, - v.url, - v.originalSrc, - v.original_src, - v.original, - v.secure_url, - v.large, - v.medium, - v.small, - ]; - for (const c of cands) { - if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c); - } - } - return ""; + if (!v) return ""; + if (typeof v === "string") return bswNormalizeAbsUrl(v); + if (typeof v === "object") { + const cands = [ + v.src, + v.url, + v.originalSrc, + v.original_src, + v.original, + v.secure_url, + v.large, + v.medium, + v.small, + ]; + for (const c of cands) { + if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c); + } + } + return ""; } function bswPickImage(hit) { - const cands = [ - hit?.image, - hit?.image_url, - hit?.imageUrl, - hit?.imageURL, - hit?.featured_image, - hit?.featured_image_url, - hit?.featuredImage, - hit?.featuredImageUrl, - hit?.product_image, - hit?.product_image_url, - hit?.productImage, - hit?.productImageUrl, - hit?.thumbnail, - hit?.thumbnail_url, - hit?.thumbnailUrl, - ]; + const cands = [ + hit?.image, + hit?.image_url, + hit?.imageUrl, + hit?.imageURL, + hit?.featured_image, + hit?.featured_image_url, + hit?.featuredImage, + hit?.featuredImageUrl, + hit?.product_image, + hit?.product_image_url, + hit?.productImage, + hit?.productImageUrl, + hit?.thumbnail, + hit?.thumbnail_url, + hit?.thumbnailUrl, + ]; - for (const c of cands) { - const s = bswNormalizeImg(c); - if (s) return s; - } + for (const c of cands) { + const s = bswNormalizeImg(c); + if (s) return s; + } - if (Array.isArray(hit?.images)) { - for (const im of hit.images) { - const s = bswNormalizeImg(im); - if (s) return s; - } - } + if (Array.isArray(hit?.images)) { + for (const im of hit.images) { + const s = bswNormalizeImg(im); + if (s) return s; + } + } - if (Array.isArray(hit?.media)) { - for (const im of hit.media) { - const s = bswNormalizeImg(im); - if (s) return s; - } - } + if (Array.isArray(hit?.media)) { + for (const im of hit.media) { + const s = bswNormalizeImg(im); + if (s) return s; + } + } - return ""; + return ""; } - async function scanCategoryBSWAlgolia(ctx, prevDb, report) { - const t0 = Date.now(); + const t0 = Date.now(); - let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null; - if (!collectionId) { - try { - const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua); - collectionId = bswExtractCollectionIdFromHtml(html); - if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`); - else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`); - } catch (e) { - ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`); - } - } + let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null; + if (!collectionId) { + try { + const { text: html } = await ctx.http.fetchTextWithRetry( + ctx.cat.startUrl, + `bsw:html:${ctx.cat.key}`, + ctx.store.ua, + ); + collectionId = bswExtractCollectionIdFromHtml(html); + if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`); + else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`); + } catch (e) { + ctx.logger.warn( + `${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`, + ); + } + } - if (!collectionId) { - ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`); + if (!collectionId) { + ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`); - const discovered = new Map(); - const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); - const dbObj = buildDbObject(ctx, merged); - writeJsonAtomic(ctx.dbFile, dbObj); + const discovered = new Map(); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb( + prevDb, + discovered, + { storeLabel: ctx.store.name }, + ); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); - ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); - const elapsed = Date.now() - t0; - report.categories.push({ - store: ctx.store.name, - label: ctx.cat.label, - key: ctx.cat.key, - dbFile: ctx.dbFile, - scannedPages: 1, - discoveredUnique: 0, - newCount: newItems.length, - updatedCount: updatedItems.length, - removedCount: removedItems.length, - restoredCount: restoredItems.length, - elapsedMs: elapsed, - }); - report.totals.newCount += newItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.removedCount += removedItems.length; - report.totals.restoredCount += restoredItems.length; - addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); - return; - } + const elapsed = Date.now() - t0; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: 1, + discoveredUnique: 0, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + addCategoryResultToReport( + report, + ctx.store.name, + ctx.cat.label, + newItems, + updatedItems, + removedItems, + restoredItems, + ); + return; + } - const ruleContext = ctx.cat.bswRuleContext || ""; - const hitsPerPage = 50; + const ruleContext = ctx.cat.bswRuleContext || ""; + const hitsPerPage = 50; - const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage); - const result0 = first?.json?.results?.[0] || null; - const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1; + const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage); + const result0 = first?.json?.results?.[0] || null; + const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1; - const totalPages = Math.max(1, nbPages); - const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); - ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); + const totalPages = Math.max(1, nbPages); + const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); + ctx.logger.ok( + `${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`, + ); - const pageIdxs = []; - for (let p = 0; p < scanPages; p++) pageIdxs.push(p); + const pageIdxs = []; + for (let p = 0; p < scanPages; p++) pageIdxs.push(p); - let donePages = 0; + let donePages = 0; - const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => { - const pnum = idx + 1; - const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage); + const perPageItems = await require("../utils/async").parallelMapStaggered( + pageIdxs, + ctx.config.concurrency, + ctx.config.staggerMs, + async (page0, idx) => { + const pnum = idx + 1; + const r = + page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage); - const res0 = r?.json?.results?.[0] || null; - const hits = res0 && Array.isArray(res0.hits) ? res0.hits : []; + const res0 = r?.json?.results?.[0] || null; + const hits = res0 && Array.isArray(res0.hits) ? res0.hits : []; - const items = []; - for (const h of hits) { - const it = bswHitToItem(h); - if (it) items.push(it); - } + const items = []; + for (const h of hits) { + const it = bswHitToItem(h); + if (it) items.push(it); + } - donePages++; - ctx.logger.ok( - `${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft( - items.length, - 3 - )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` - ); + donePages++; + ctx.logger.ok( + `${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft( + items.length, + 3, + )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`, + ); - return items; - }); + return items; + }, + ); - const discovered = new Map(); - let dups = 0; - for (const arr of perPageItems) { - for (const it of arr) { - if (discovered.has(it.url)) dups++; - discovered.set(it.url, it); - } - } + const discovered = new Map(); + let dups = 0; + for (const arr of perPageItems) { + for (const it of arr) { + if (discovered.has(it.url)) dups++; + discovered.set(it.url, it); + } + } - ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); + ctx.logger.ok( + `${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`, + ); - const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name }); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { + storeLabel: ctx.store.name, + }); - const dbObj = buildDbObject(ctx, merged); - writeJsonAtomic(ctx.dbFile, dbObj); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); - ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); - const elapsed = Date.now() - t0; - ctx.logger.ok( - `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` - ); + const elapsed = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`, + ); - report.categories.push({ - store: ctx.store.name, - label: ctx.cat.label, - key: ctx.cat.key, - dbFile: ctx.dbFile, - scannedPages: scanPages, - discoveredUnique: discovered.size, - newCount: newItems.length, - updatedCount: updatedItems.length, - removedCount: removedItems.length, - restoredCount: restoredItems.length, - elapsedMs: elapsed, - }); - report.totals.newCount += newItems.length; - report.totals.updatedCount += updatedItems.length; - report.totals.removedCount += removedItems.length; - report.totals.restoredCount += restoredItems.length; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: scanPages, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; - addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); + addCategoryResultToReport( + report, + ctx.store.name, + ctx.cat.label, + newItems, + updatedItems, + removedItems, + restoredItems, + ); } function createStore(defaultUa) { - return { - key: "bsw", - name: "BSW", - host: "www.bswliquor.com", - ua: defaultUa, - scanCategory: scanCategoryBSWAlgolia, - categories: [ - { - key: "scotch-whisky", - label: "Scotch Whisky", - startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1", - bswRuleContext: "scotch-whisky", - }, - { - key: "rum", - label: "Rum", - startUrl: "https://www.bswliquor.com/collections/rum?page=1", - bswRuleContext: "rum", - }, - { - key: "whisky", - label: "Whisky", - startUrl: "https://www.bswliquor.com/collections/whisky?page=1", - bswRuleContext: "whisky", - }, - ], - }; + return { + key: "bsw", + name: "BSW", + host: "www.bswliquor.com", + ua: defaultUa, + scanCategory: scanCategoryBSWAlgolia, + categories: [ + { + key: "scotch-whisky", + label: "Scotch Whisky", + startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1", + bswRuleContext: "scotch-whisky", + }, + { + key: "rum", + label: "Rum", + startUrl: "https://www.bswliquor.com/collections/rum?page=1", + bswRuleContext: "rum", + }, + { + key: "whisky", + label: "Whisky", + startUrl: "https://www.bswliquor.com/collections/whisky?page=1", + bswRuleContext: "whisky", + }, + ], + }; } module.exports = { createStore }; diff --git a/src/stores/coop.js b/src/stores/coop.js index fb6f6e0..58e0e7c 100644 --- a/src/stores/coop.js +++ b/src/stores/coop.js @@ -11,20 +11,20 @@ const { addCategoryResultToReport } = require("../tracker/report"); /* ---------------- formatting ---------------- */ function kbStr(bytes) { - return humanBytes(bytes).padStart(8, " "); + return humanBytes(bytes).padStart(8, " "); } function secStr(ms) { - const s = Number.isFinite(ms) ? ms / 1000 : 0; - const t = Math.round(s * 10) / 10; - return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " "); + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const t = Math.round(s * 10) / 10; + return (t < 10 ? `${t.toFixed(1)}s` : `${Math.round(s)}s`).padStart(7, " "); } function pageStr(i, total) { - const w = String(total).length; - return `${padLeft(i, w)}/${total}`; + const w = String(total).length; + return `${padLeft(i, w)}/${total}`; } function pctStr(done, total) { - const pct = total ? Math.floor((done / total) * 100) : 0; - return `${padLeft(pct, 3)}%`; + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; } /* ---------------- co-op specifics ---------------- */ @@ -33,327 +33,352 @@ const BASE = "https://shoponlinewhisky-wine.coopwinespiritsbeer.com"; const REFERER = `${BASE}/worldofwhisky`; function coopHeaders(ctx, sourcepage) { - const coop = ctx.store.coop; - return { - Accept: "application/json, text/javascript, */*; q=0.01", - "Content-Type": "application/json", - Origin: BASE, - Referer: REFERER, + const coop = ctx.store.coop; + return { + Accept: "application/json, text/javascript, */*; q=0.01", + "Content-Type": "application/json", + Origin: BASE, + Referer: REFERER, - // these 4 are required on their API calls (matches browser) - SessionKey: coop.sessionKey, - chainID: coop.chainId, - storeID: coop.storeId, - appVersion: coop.appVersion, + // these 4 are required on their API calls (matches browser) + SessionKey: coop.sessionKey, + chainID: coop.chainId, + storeID: coop.storeId, + appVersion: coop.appVersion, - AUTH_TOKEN: "null", - CONNECTION_ID: "null", - SESSION_ID: coop.sessionId || "null", - TIMESTAMP: String(Date.now()), - sourcepage, - }; + AUTH_TOKEN: "null", + CONNECTION_ID: "null", + SESSION_ID: coop.sessionId || "null", + TIMESTAMP: String(Date.now()), + sourcepage, + }; } async function coopFetchText(ctx, url, label, { headers } = {}) { - return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, { - method: "GET", - headers: headers || {}, - }); + return await ctx.http.fetchTextWithRetry(url, label, ctx.store.ua, { + method: "GET", + headers: headers || {}, + }); +} + +function extractVar(html, re) { + const m = String(html || "").match(re); + return m ? String(m[1] || "").trim() : ""; +} + +async function ensureCoopBootstrap(ctx) { + const coop = ctx.store.coop; + if (coop.sessionKey && coop.chainId && coop.storeId && coop.appVersion) return; + + const r = await coopFetchText(ctx, REFERER, "coop:bootstrap", { + headers: { + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + Referer: REFERER, + }, + }); + + const html = r?.text || ""; + if (r?.status !== 200 || !html) { + throw new Error(`coop bootstrap failed: GET ${REFERER} => ${r.status}`); + } + + // Values are in