commit 470eb8ca56254f324cc49d55faf0bddd750558ec Author: Brennan Wilkes (Text Groove) Date: Mon Jan 19 20:00:40 2026 -0800 chore: initial code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d63cbb4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +node_modules/ +*.log + +# Data & reports live on the data branch +/data/ +/reports/ + +.worktrees/ + +# Generated viz artifacts live on the data branch +viz/data/ + +# Keep cron log out of git even on data branch +reports/cron.log diff --git a/bin/tracker.js b/bin/tracker.js new file mode 100755 index 0000000..f7df7d2 --- /dev/null +++ b/bin/tracker.js @@ -0,0 +1,10 @@ +#!/usr/bin/env node +"use strict"; + +const { main } = require("../src/main"); + +main().catch((e) => { + const msg = e && e.stack ? e.stack : String(e); + console.error(msg); + process.exitCode = 1; +}); diff --git a/scripts/bootstrap_clone.sh b/scripts/bootstrap_clone.sh new file mode 100755 index 0000000..53e4f21 --- /dev/null +++ b/scripts/bootstrap_clone.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +MAIN_BRANCH="${MAIN_BRANCH:-main}" +DATA_BRANCH="${DATA_BRANCH:-data}" +WORKTREE_DIR="${DATA_WORKTREE_DIR:-.worktrees/data}" +RUN_DAILY="${RUN_DAILY:-0}" # set RUN_DAILY=1 to run at the end + +# must be in a git repo root-ish +git rev-parse --is-inside-work-tree >/dev/null + +# ensure we have origin +if ! git remote get-url origin >/dev/null 2>&1; then + echo "ERROR: remote 'origin' not configured" >&2 + exit 1 +fi + +echo "[bootstrap] fetching..." +git fetch --prune origin + +# ensure local main exists and tracks origin/main (best effort) +if git show-ref --verify --quiet "refs/remotes/origin/$MAIN_BRANCH"; then + if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then + git checkout -q "$MAIN_BRANCH" + git merge -q --ff-only "origin/$MAIN_BRANCH" || true + else + git checkout -q -b "$MAIN_BRANCH" "origin/$MAIN_BRANCH" + fi + git branch --set-upstream-to="origin/$MAIN_BRANCH" "$MAIN_BRANCH" >/dev/null 2>&1 || true +fi + +# ensure local data branch exists (from origin/data) +if git show-ref --verify --quiet "refs/remotes/origin/$DATA_BRANCH"; then + if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then + # fast-forward local data to origin/data when possible; otherwise leave it alone + git checkout -q "$DATA_BRANCH" + git merge -q --ff-only "origin/$DATA_BRANCH" || true + else + git checkout -q -b "$DATA_BRANCH" "origin/$DATA_BRANCH" + fi + git branch --set-upstream-to="origin/$DATA_BRANCH" "$DATA_BRANCH" >/dev/null 2>&1 || true +else + echo "ERROR: origin/$DATA_BRANCH not found. Did you push the data branch?" >&2 + exit 1 +fi + +# go back to main (so run_daily can merge main->data in the worktree cleanly) +git checkout -q "$MAIN_BRANCH" || true + +echo "[bootstrap] preparing worktree..." +git worktree prune >/dev/null 2>&1 || true + +# if dir exists but isn't a valid worktree checkout, remove it +if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then + rm -rf "$WORKTREE_DIR" +fi + +# ensure worktree exists for data branch +if [[ ! -e "$WORKTREE_DIR/.git" ]]; then + mkdir -p "$(dirname "$WORKTREE_DIR")" + git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH" +fi + +# keep worktree data branch in a reasonable state +( + cd "$WORKTREE_DIR" + git fetch -q --prune origin || true + git merge -q --ff-only "origin/$DATA_BRANCH" || true + # merge main into data if main exists (best effort, matches your run_daily behavior) + if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then + git merge -q --no-edit "$MAIN_BRANCH" || true + fi +) + +echo "[bootstrap] done." +echo " main repo: $(pwd)" +echo " data worktree: $(cd "$WORKTREE_DIR" && pwd)" + +if [[ "$RUN_DAILY" == "1" ]]; then + echo "[bootstrap] running daily..." + NODE_BIN="${NODE_BIN:-$(command -v node || true)}" + if [[ -z "$NODE_BIN" ]]; then + echo "ERROR: node not found in PATH" >&2 + exit 1 + fi + NODE_BIN="$NODE_BIN" bash scripts/run_daily.sh || true +fi diff --git a/scripts/cron_setup.sh b/scripts/cron_setup.sh new file mode 100755 index 0000000..72a029c --- /dev/null +++ b/scripts/cron_setup.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MAIN_BRANCH="${MAIN_BRANCH:-main}" +DATA_BRANCH="${DATA_BRANCH:-data}" + +NODE_BIN="${NODE_BIN:-}" +if [[ -z "$NODE_BIN" ]]; then + NODE_BIN="$(command -v node || true)" +fi +if [[ -z "$NODE_BIN" ]]; then + echo "ERROR: node not found in PATH" >&2 + exit 1 +fi + +mkdir -p "$REPO_ROOT/reports" + +# Default: run 4 times/day (every 6 hours). Override via: +# CRON_SCHEDULE="15 */4 * * *" (example) +CRON_SCHEDULE="${CRON_SCHEDULE:-0 */6 * * *}" + +# Use a stable marker so we can replace old lines (including the previous "daily" one). +MARKER="# spirit-tracker" +CRON_LINE="$CRON_SCHEDULE NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER" + +# Install (idempotent): remove any previous line with the marker, then append. +{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE"; } | crontab - + +echo "Installed cron job:" +echo "$CRON_LINE" diff --git a/scripts/repo_reset.sh b/scripts/repo_reset.sh new file mode 100755 index 0000000..9c659f8 --- /dev/null +++ b/scripts/repo_reset.sh @@ -0,0 +1,7 @@ +rm -rf .git .worktrees +rm -rf data/db reports +bash scripts/repo_setup.sh --force + +git remote add origin git@github.com:brennanwilkes/spirit-tracker.git +git push -u origin main --force +git push -u origin data --force diff --git a/scripts/repo_setup.sh b/scripts/repo_setup.sh new file mode 100755 index 0000000..877e1da --- /dev/null +++ b/scripts/repo_setup.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +set -euo pipefail + +MAIN_BRANCH="${MAIN_BRANCH:-main}" +DATA_BRANCH="${DATA_BRANCH:-data}" + +FORCE=0 +if [[ "${1:-}" == "--force" ]]; then + FORCE=1 +fi + +if [[ -d .git ]]; then + if [[ $FORCE -eq 1 ]]; then + rm -rf .git + else + echo "ERROR: .git already exists. Remove it first or run: $0 --force" >&2 + exit 1 + fi +fi + +git init -q +git branch -M "$MAIN_BRANCH" + +mkdir -p data/db reports .worktrees viz/data + +# Move existing DB snapshots (e.g. kwm__scotch__2b16b533.json) into data/db so +# they don't end up committed on the main branch. +shopt -s nullglob +for f in *__*__*.json; do + mv -f "$f" data/db/ +done +shopt -u nullglob + +# Ensure expected runtime dirs exist (they are ignored on main). +mkdir -p data/db reports viz/data + +# Move old root-level DB JSONs into data/db if present. +shopt -s nullglob +for f in *.json; do + if [[ "$f" =~ __[0-9a-f]{8}\.json$ ]]; then + mv -f "$f" "data/db/$f" + fi +done +shopt -u nullglob + +cat > .gitignore <<'GITIGNORE' +node_modules/ +*.log + +# Data & reports live on the data branch +/data/ +/reports/ + +.worktrees/ + +# Generated viz artifacts live on the data branch +viz/data/ + +# Keep cron log out of git even on data branch +reports/cron.log +GITIGNORE + +# Make sure scripts/tools are executable (best effort) +chmod +x bin/tracker.js 2>/dev/null || true +chmod +x scripts/*.sh 2>/dev/null || true +chmod +x tools/*.js 2>/dev/null || true + +git add -A +if git diff --cached --quiet; then + echo "Nothing to commit on $MAIN_BRANCH (did you already commit?)" >&2 +else + git commit -m "chore: initial code" -q +fi + +# Create data branch, un-ignore data and reports (and viz/data). +if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then + echo "Data branch already exists: $DATA_BRANCH" >&2 +else + git checkout -b "$DATA_BRANCH" -q + + cat > .gitignore <<'GITIGNORE' +node_modules/ +*.log + +# Keep cron log out of git +reports/cron.log +GITIGNORE + + git add .gitignore + git commit -m "chore: enable tracking of data + reports + viz on data branch" -q + + git checkout "$MAIN_BRANCH" -q +fi + +echo "Repo setup complete. Main=$MAIN_BRANCH Data=$DATA_BRANCH" diff --git a/scripts/run_daily.sh b/scripts/run_daily.sh new file mode 100755 index 0000000..5503738 --- /dev/null +++ b/scripts/run_daily.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MAIN_BRANCH="${MAIN_BRANCH:-main}" +DATA_BRANCH="${DATA_BRANCH:-data}" +WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}" + +NODE_BIN="${NODE_BIN:-}" +if [[ -z "$NODE_BIN" ]]; then + NODE_BIN="$(command -v node || true)" +fi +if [[ -z "$NODE_BIN" ]]; then + echo "ERROR: node not found in PATH" >&2 + exit 1 +fi + +cd "$REPO_ROOT" + +git rev-parse --is-inside-work-tree >/dev/null + +# Ensure data branch exists. +if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then + echo "ERROR: data branch not found: $DATA_BRANCH" >&2 + exit 1 +fi + +# Create/repair worktree for data branch. +git worktree prune >/dev/null 2>&1 || true + +# If the dir exists but isn't a valid worktree checkout, remove it properly. +if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then + rm -rf "$WORKTREE_DIR" +fi + +# If the worktree directory is missing, add it (force is safe after prune). +if [[ ! -e "$WORKTREE_DIR/.git" ]]; then + mkdir -p "$(dirname "$WORKTREE_DIR")" + git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH" +fi + +cd "$WORKTREE_DIR" + +# Keep data branch up-to-date with main (merge only when main moved). +if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then + if ! git merge -q --no-edit "$MAIN_BRANCH"; then + echo "ERROR: failed to merge $MAIN_BRANCH into $DATA_BRANCH" >&2 + exit 1 + fi +fi + +# Run tracker (writes data/db + a plain report file in reports/) +"$NODE_BIN" bin/tracker.js + +# Build viz artifacts on the data branch +"$NODE_BIN" tools/build_viz_index.js +"$NODE_BIN" tools/build_viz_commits.js +"$NODE_BIN" tools/build_viz_recent.js + +# Stage only data/report/viz outputs +git add -A data/db reports viz/data + +if git diff --cached --quiet; then + echo "No data/report/viz changes to commit." >&2 + exit 0 +fi + +# Commit message: include the latest report as the commit body. +ts="$(date -u +'%Y-%m-%dT%H:%M:%SZ')" + +REPORT_FILE="" +if compgen -G "reports/*.txt" > /dev/null; then + REPORT_FILE="$(ls -1t reports/*.txt | head -n 1 || true)" +fi + +MSG_FILE="$(mktemp)" +{ + echo "run: ${ts}" + echo + if [[ -n "$REPORT_FILE" && -f "$REPORT_FILE" ]]; then + cat "$REPORT_FILE" + else + echo "(no report file found in reports/*.txt)" + fi +} > "$MSG_FILE" + +git commit -F "$MSG_FILE" -q +rm -f "$MSG_FILE" + +git push -q diff --git a/scripts/serve_viz.sh b/scripts/serve_viz.sh new file mode 100755 index 0000000..dd045dc --- /dev/null +++ b/scripts/serve_viz.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +DATA_BRANCH="${DATA_BRANCH:-data}" +WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}" + +NODE_BIN="${NODE_BIN:-}" +if [[ -z "$NODE_BIN" ]]; then + NODE_BIN="$(command -v node || true)" +fi +if [[ -z "$NODE_BIN" ]]; then + echo "ERROR: node not found in PATH" >&2 + exit 1 +fi + +cd "$REPO_ROOT" +git rev-parse --is-inside-work-tree >/dev/null + +# Ensure data branch exists. +if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then + echo "ERROR: data branch not found: $DATA_BRANCH" >&2 + exit 1 +fi + +# Create/repair worktree for data branch. +git worktree prune >/dev/null 2>&1 || true + +# If dir exists but isn't a valid worktree checkout, remove it. +if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then + rm -rf "$WORKTREE_DIR" +fi + +# If missing, add it. +if [[ ! -e "$WORKTREE_DIR/.git" ]]; then + mkdir -p "$(dirname "$WORKTREE_DIR")" + git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH" +fi + +cd "$WORKTREE_DIR" + +# Ensure viz artifacts exist (helpful if you haven't run daily yet) +if [[ ! -f "viz/data/index.json" ]]; then + echo "viz/data/index.json missing; building..." >&2 + "$NODE_BIN" tools/build_viz_index.js +fi +if [[ ! -f "viz/data/db_commits.json" ]]; then + echo "viz/data/db_commits.json missing; building..." >&2 + "$NODE_BIN" tools/build_viz_commits.js +fi +if [[ ! -f "viz/data/recent.json" ]]; then + echo "viz/data/recent.json missing; building..." >&2 + "$NODE_BIN" tools/build_viz_recent.js +fi + +exec "$NODE_BIN" viz/serve.js diff --git a/src/core/http.js b/src/core/http.js new file mode 100644 index 0000000..b55dc83 --- /dev/null +++ b/src/core/http.js @@ -0,0 +1,232 @@ +"use strict"; + +const { setTimeout: sleep } = require("timers/promises"); + +class RetryableError extends Error { + constructor(msg) { + super(msg); + this.name = "RetryableError"; + } +} + +function isRetryable(e) { + if (!e) return false; + if (e.name === "AbortError") return true; + if (e instanceof RetryableError) return true; + const msg = String(e.message || e); + return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg); +} + +function backoffMs(attempt) { + const base = Math.min(12000, 500 * Math.pow(2, attempt)); + const jitter = Math.floor(Math.random() * 400); + return base + jitter; +} + +async function safeText(res) { + try { + return await res.text(); + } catch { + return ""; + } +} + +/* ---------------- Cookies (simple jar) ---------------- */ + +// host -> Map(cookieName -> "name=value") +function createCookieJar() { + const jar = new Map(); + + function getHost(u) { + try { + return new URL(u).hostname || ""; + } catch { + return ""; + } + } + + function parseSetCookieLine(line) { + // "name=value; Path=/; Secure; HttpOnly; ..." + const s = String(line || "").trim(); + if (!s) return null; + const first = s.split(";")[0] || ""; + const eq = first.indexOf("="); + if (eq <= 0) return null; + const name = first.slice(0, eq).trim(); + const value = first.slice(eq + 1).trim(); + if (!name) return null; + return { name, pair: `${name}=${value}` }; + } + + function getSetCookieArray(headers) { + // Node/undici may support headers.getSetCookie() + if (headers && typeof headers.getSetCookie === "function") { + try { + const arr = headers.getSetCookie(); + return Array.isArray(arr) ? arr : []; + } catch { + // fall through + } + } + + // Fallback: single combined header (may lose multiples, but better than nothing) + const one = headers?.get ? headers.get("set-cookie") : null; + if (!one) return []; + + // Best-effort split. This is imperfect with Expires=... commas, but OK for most WP cookies. + // If this causes issues later, we can replace with a more robust splitter. + return String(one) + .split(/,(?=[^;,]*=)/g) + .map((x) => x.trim()) + .filter(Boolean); + } + + function storeFromResponse(url, res) { + const host = getHost(res?.url || url); + if (!host) return; + + const lines = getSetCookieArray(res?.headers); + if (!lines.length) return; + + let m = jar.get(host); + if (!m) { + m = new Map(); + jar.set(host, m); + } + + for (const line of lines) { + const c = parseSetCookieLine(line); + if (!c) continue; + m.set(c.name, c.pair); + } + } + + function cookieHeaderFor(url) { + const host = getHost(url); + if (!host) return ""; + const m = jar.get(host); + if (!m || m.size === 0) return ""; + return [...m.values()].join("; "); + } + + return { storeFromResponse, cookieHeaderFor }; +} + +/* ---------------- HTTP client ---------------- */ + +function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { + let inflight = 0; + let reqSeq = 0; + + const cookieJar = createCookieJar(); + + function inflightStr() { + return `inflight=${inflight}`; + } + + async function fetchWithRetry( + url, + tag, + ua, + { mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {} + ) { + for (let attempt = 0; attempt <= maxRetries; attempt++) { + const reqId = ++reqSeq; + const start = Date.now(); + + inflight++; + logger?.dbg?.( + `REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})` + ); + + try { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + + const cookieHdr = + cookies && !Object.prototype.hasOwnProperty.call(headers, "Cookie") && !Object.prototype.hasOwnProperty.call(headers, "cookie") + ? cookieJar.cookieHeaderFor(url) + : ""; + + const res = await fetch(url, { + method, + redirect: "follow", + headers: { + "user-agent": ua || defaultUa, + "accept-language": "en-US,en;q=0.9", + ...(mode === "text" + ? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" } + : { accept: "application/json, text/plain, */*" }), + ...(cookieHdr ? { cookie: cookieHdr } : {}), + ...headers, + }, + body, + signal: ctrl.signal, + }).finally(() => clearTimeout(t)); + + const status = res.status; + const finalUrl = res.url || url; + + // capture cookies for subsequent requests to same host + if (cookies) cookieJar.storeFromResponse(url, res); + + logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} finalUrl=${finalUrl}`); + + if (status === 429 || status === 408 || (status >= 500 && status <= 599)) { + throw new RetryableError(`HTTP ${status}`); + } + if (status >= 400) { + const bodyTxt = await safeText(res); + throw new Error( + `HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}` + ); + } + + if (mode === "json") { + const txt = await res.text(); + const ms = Date.now() - start; + let json; + try { + json = JSON.parse(txt); + } catch (e) { + throw new RetryableError(`Bad JSON: ${e?.message || e}`); + } + return { json, ms, bytes: txt.length, status, finalUrl }; + } + + const text = await res.text(); + if (!text || text.length < 200) throw new RetryableError(`Short HTML bytes=${text.length}`); + + const ms = Date.now() - start; + return { text, ms, bytes: text.length, status, finalUrl }; + } catch (e) { + const retryable = isRetryable(e); + logger?.dbg?.( + `REQ#${reqId} ERROR ${tag} retryable=${retryable} err=${e?.message || e} (${inflightStr()})` + ); + + if (!retryable || attempt === maxRetries) throw e; + + const delay = backoffMs(attempt); + logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`); + await sleep(delay); + } finally { + inflight--; + logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`); + } + } + throw new Error("unreachable"); + } + + function fetchTextWithRetry(url, tag, ua, opts) { + return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) }); + } + + function fetchJsonWithRetry(url, tag, ua, opts) { + return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) }); + } + + return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr }; +} + +module.exports = { createHttpClient, RetryableError }; diff --git a/src/core/logger.js b/src/core/logger.js new file mode 100644 index 0000000..a2cd0b9 --- /dev/null +++ b/src/core/logger.js @@ -0,0 +1,58 @@ +"use strict"; + +const { C, color } = require("../utils/ansi"); +const { ts } = require("../utils/time"); + +function createLogger({ debug = false, colorize: wantColor = true } = {}) { + const isTTY = Boolean(process.stdout && process.stdout.isTTY); + const enabled = Boolean(wantColor && isTTY); + + function ok(msg) { + console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg)); + } + + function warn(msg) { + console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg)); + } + + function err(msg) { + console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg)); + } + + function info(msg) { + if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg)); + } + + function dbg(msg) { + if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg)); + } + + function dim(s) { + return color(s, C.dim, enabled); + } + + function bold(s) { + return color(s, C.bold, enabled); + } + + function paint(s, code) { + return color(s, code, enabled); + } + + return { + debug, + isTTY, + colorize: enabled, + C, + ok, + warn, + err, + info, + dbg, + dim, + bold, + color: paint, + }; +} + +module.exports = { createLogger }; diff --git a/src/main.js b/src/main.js new file mode 100644 index 0000000..029a3fc --- /dev/null +++ b/src/main.js @@ -0,0 +1,82 @@ +#!/usr/bin/env node +"use strict"; + +const fs = require("fs"); +const path = require("path"); + +const { parseArgs, clampInt } = require("./utils/args"); +const { isoTimestampFileSafe } = require("./utils/time"); + +const { createLogger } = require("./core/logger"); +const { createHttpClient } = require("./core/http"); + +const { createStores, parseProductsSierra } = require("./stores"); +const { runAllStores } = require("./tracker/run_all"); +const { renderFinalReport } = require("./tracker/report"); +const { ensureDir } = require("./tracker/db"); + +const DEFAULT_UA = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"; + +function resolveDir(p, fallback) { + const v = String(p || "").trim(); + if (!v) return fallback; + return path.isAbsolute(v) ? v : path.join(process.cwd(), v); +} + +async function main() { + if (typeof fetch !== "function") { + throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). "); + } + + const args = parseArgs(process.argv.slice(2)); + + const logger = createLogger({ debug: args.debug, colorize: true }); + + const config = { + debug: args.debug, + maxPages: args.maxPages, + concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), + staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), + maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), + timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), + discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), + discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), + categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), + defaultUa: DEFAULT_UA, + defaultParseProducts: parseProductsSierra, + dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")), + reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")), + }; + + ensureDir(config.dbDir); + ensureDir(config.reportDir); + + const http = createHttpClient({ maxRetries: config.maxRetries, timeoutMs: config.timeoutMs, defaultUa: config.defaultUa, logger }); + const stores = createStores({ defaultUa: config.defaultUa }); + + const report = await runAllStores(stores, { config, logger, http }); + + const reportTextColor = renderFinalReport(report, { dbDir: config.dbDir, colorize: logger.colorize }); + process.stdout.write(reportTextColor); + + const reportTextPlain = renderFinalReport(report, { dbDir: config.dbDir, colorize: false }); + const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`); + try { + fs.writeFileSync(file, reportTextPlain, "utf8"); + logger.ok(`Report saved: ${logger.dim(file)}`); + } catch (e) { + logger.warn(`Report save failed: ${e?.message || e}`); + } +} + +module.exports = { main }; + +if (require.main === module) { + main().catch((e) => { + const msg = e && e.stack ? e.stack : String(e); + // no logger here; keep simple + console.error(msg); + process.exitCode = 1; + }); +} diff --git a/src/stores/bcl.js b/src/stores/bcl.js new file mode 100644 index 0000000..03b33cf --- /dev/null +++ b/src/stores/bcl.js @@ -0,0 +1,332 @@ +"use strict"; + +const { normalizeCspc } = require("../utils/sku"); +const { humanBytes } = require("../utils/bytes"); +const { padLeft, padRight } = require("../utils/string"); + +const { mergeDiscoveredIntoDb } = require("../tracker/merge"); +const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); +const { addCategoryResultToReport } = require("../tracker/report"); + +function kbStr(bytes) { + return humanBytes(bytes).padStart(8, " "); +} + +function secStr(ms) { + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); +} + +function pageStr(i, total) { + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; +} + +function pctStr(done, total) { + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; +} + +function cad(n) { + const x = Number(n); + if (!Number.isFinite(x)) return ""; + return `$${x.toFixed(2)}`; +} + +function asNumber(n) { + if (n == null) return NaN; + if (typeof n === "number") return n; + const t = String(n).trim(); + if (!t) return NaN; + const x = Number(t.replace(/[^0-9.]/g, "")); + return x; +} + +function bclTotalHits(json) { + const t = json?.hits?.total; + if (typeof t === "number") return t; + if (t && typeof t.value === "number") return t.value; // ES-style + return 0; +} + +function bclIsInStock(src) { + // Prefer explicit text if present (matches site UI) + const candidates = [ + src?.availability, + src?.availabilityText, + src?.availabilityStatus, + src?.availability_status, + src?.stockStatus, + src?.stock_status, + src?.status, + src?.statusText, + ] + .map((v) => (v == null ? "" : String(v))) + .filter(Boolean); + + for (const s of candidates) { + if (/out of stock/i.test(s)) return false; + if (/\bin stock\b/i.test(s)) return true; + } + + // Fallback only: units + const units = Number(src?.availableUnits); + if (Number.isFinite(units)) return units > 0; + + // If we can't tell, keep it (better than dropping 90% of a category) + return true; +} + +function bclNormalizeAbsUrl(raw) { + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s, "https://www.bcliquorstores.com/").toString(); + } catch { + return s; + } +} + +function bclPickImage(src) { + const cands = [ + src?.imageUrl, + src?.imageURL, + src?.image, + src?.thumbnail, + src?.thumbnailUrl, + src?.thumbnailURL, + src?.primaryImage, + src?.primaryImageUrl, + ]; + + for (const c of cands) { + if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c); + } + + const arrs = [src?.images, src?.imageUrls, src?.image_urls]; + for (const a of arrs) { + if (!Array.isArray(a) || !a.length) continue; + const v = a[0]; + if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v); + if (v && typeof v === "object") { + const s = String(v.src || v.url || "").trim(); + if (s) return bclNormalizeAbsUrl(s); + } + } + + return ""; +} + +function bclHitToItem(hit) { + const src = hit?._source || null; + if (!src) return null; + + const skuRaw = src.sku != null ? String(src.sku).trim() : ""; + if (!skuRaw) return null; + + // SKU in URL (requested) + const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`; + + const name = String(src.name || "").trim(); + if (!name) return null; + + // Sale support: pick currentPrice when present; otherwise regularPrice. + const current = asNumber(src.currentPrice); + const regular = asNumber(src.regularPrice); + const price = cad(Number.isFinite(current) ? current : regular); + + const sku = normalizeCspc(url); + + const inStock = bclIsInStock(src); + if (!inStock) return null; + + // ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs. + // Also use https. + const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent( + skuRaw + )}.jpg`; + + return { name, price, url, sku, img }; +} + + + +async function bclFetchBrowsePage(ctx, page1, size) { + const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey" + const category = "spirits"; + const sort = "featuredProducts:desc"; + + const u = new URL("https://www.bcliquorstores.com/ajax/browse"); + u.searchParams.set("category", category); + u.searchParams.set("type", type); + u.searchParams.set("sort", sort); + u.searchParams.set("size", String(size)); + u.searchParams.set("page", String(page1)); + + const referer = + `https://www.bcliquorstores.com/product-catalogue?` + + `category=${encodeURIComponent(category)}` + + `&type=${encodeURIComponent(type)}` + + `&sort=${encodeURIComponent(sort)}` + + `&page=${encodeURIComponent(String(page1))}`; + + return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, { + method: "GET", + headers: { + Accept: "application/json, text/plain, */*", + Referer: referer, + Origin: "https://www.bcliquorstores.com", + }, + }); +} + +async function scanCategoryBCLAjax(ctx, prevDb, report) { + const t0 = Date.now(); + const size = 24; + + let first; + try { + first = await bclFetchBrowsePage(ctx, 1, size); + } catch (e) { + ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`); + + const discovered = new Map(); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + const elapsed = Date.now() - t0; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: 1, + discoveredUnique: 0, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); + return; + } + + const total = bclTotalHits(first?.json); + const totalPages = Math.max(1, Math.ceil(total / size)); + const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); + + ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); + + const pageNums = []; + for (let p = 1; p <= scanPages; p++) pageNums.push(p); + + let donePages = 0; + + const perPageItems = await require("../utils/async").parallelMapStaggered( + pageNums, + ctx.config.concurrency, + ctx.config.staggerMs, + async (page1, idx) => { + const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size); + const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : []; + + const items = []; + for (const h of hits) { + const it = bclHitToItem(h); + if (it) items.push(it); + } + + donePages++; + ctx.logger.ok( + `${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft( + items.length, + 3 + )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` + ); + + return items; + } + ); + + const discovered = new Map(); + let dups = 0; + for (const arr of perPageItems) { + for (const it of arr) { + if (discovered.has(it.url)) dups++; + discovered.set(it.url, it); + } + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` + ); + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: scanPages, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: removedItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); +} + +function createStore(defaultUa) { + return { + key: "bcl", + name: "BCL", + host: "www.bcliquorstores.com", + ua: defaultUa, + scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse) + categories: [ + { + key: "whisky", + label: "Whisky / Whiskey", + // informational only; scan uses ajax/browse + startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1", + bclType: "whisky / whiskey", + }, + { + key: "rum", + label: "Rum", + startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1", + bclType: "rum", + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/bsw.js b/src/stores/bsw.js new file mode 100644 index 0000000..deedaa2 --- /dev/null +++ b/src/stores/bsw.js @@ -0,0 +1,387 @@ +"use strict"; + +const { cleanText } = require("../utils/html"); +const { normalizeCspc } = require("../utils/sku"); +const { padLeft, padRight } = require("../utils/string"); +const { humanBytes } = require("../utils/bytes"); + +const { mergeDiscoveredIntoDb } = require("../tracker/merge"); +const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); +const { addCategoryResultToReport } = require("../tracker/report"); + +const BSW_ALGOLIA_APP_ID = "25TO6MPUL0"; +const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2"; +const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`; + +function usd(n) { + if (!Number.isFinite(n)) return ""; + return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`; +} + +function bswExtractCollectionIdFromHtml(html) { + const s = String(html || ""); + const patterns = [ + /collection_ids%3A(\d{6,})/i, + /collection_ids\s*:\s*(\d{6,})/i, + /"collection_ids"\s*:\s*(\d{6,})/i, + /"collection_id"\s*:\s*(\d{6,})/i, + /collection_id\s*=\s*(\d{6,})/i, + /collectionId["']?\s*[:=]\s*["']?(\d{6,})/i, + /data-collection-id=["'](\d{6,})["']/i, + ]; + for (const re of patterns) { + const m = s.match(re); + if (m && m[1]) return Number.parseInt(m[1], 10); + } + return null; +} + +function bswFormatPrice(value, hintCents) { + if (value === null || value === undefined) return ""; + + if (typeof value === "string") { + const t = value.trim(); + if (!t) return ""; + if (t.includes("$")) return t.replace(/\s+/g, ""); + const n = Number(t.replace(/[^0-9.]/g, "")); + if (!Number.isFinite(n)) return t; + return usd(n); + } + + if (typeof value === "number") { + let n = value; + + if (hintCents) n = n / 100; + else if (Number.isInteger(n) && n >= 100000) n = n / 100; + + return usd(n); + } + + return ""; +} + +function bswPickPrice(hit) { + const pick = (val, cents) => ({ val, cents }); + + if (hit && hit.price_cents != null) return pick(hit.price_cents, true); + if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true); + + if (hit && hit.price != null) return pick(hit.price, false); + if (hit && hit.price_min != null) return pick(hit.price_min, false); + if (hit && hit.priceMin != null) return pick(hit.priceMin, false); + if (hit && hit.min_price != null) return pick(hit.min_price, false); + if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false); + + if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) { + const v = hit.variants[0]; + if (v.price_cents != null) return pick(v.price_cents, true); + if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true); + if (v.price != null) return pick(v.price, false); + } + + return pick(null, false); +} + + +function bswHitToItem(hit) { + const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || "")); + const handle = hit && (hit.handle || hit.product_handle || hit.slug || ""); + const url = + (hit && (hit.url || hit.product_url)) || + (handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : ""); + + const { val: priceVal, cents: hintCents } = bswPickPrice(hit); + const price = bswFormatPrice(priceVal, hintCents); + + const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || ""); + + const img = bswPickImage(hit); + + if (!name || !url) return null; + return { name, price, url, sku, img }; +} + +async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) { + const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`; + + const params = + `facets=%5B%22price%22%2C%22*%22%5D` + + `&filters=${encodeURIComponent(filtersExpr)}` + + `&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` + + `&page=${encodeURIComponent(String(page0))}` + + `&query=` + + `&clickAnalytics=true` + + `&maxValuesPerFacet=100` + + (ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : ""); + + const bodyObj = { requests: [{ indexName: "shopify_products", params }] }; + + return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, { + method: "POST", + headers: { + Accept: "*/*", + "content-type": "application/x-www-form-urlencoded", + Origin: "https://www.bswliquor.com", + Referer: "https://www.bswliquor.com/", + "x-algolia-api-key": BSW_ALGOLIA_API_KEY, + "x-algolia-application-id": BSW_ALGOLIA_APP_ID, + }, + body: JSON.stringify(bodyObj), + }); +} + +function kbStr(bytes) { + return humanBytes(bytes).padStart(8, " "); +} + +function secStr(ms) { + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); +} + +function pageStr(i, total) { + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; +} + +function pctStr(done, total) { + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; +} + +function bswNormalizeAbsUrl(raw) { + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s, "https://www.bswliquor.com/").toString(); + } catch { + return s; + } +} + +function bswNormalizeImg(v) { + if (!v) return ""; + if (typeof v === "string") return bswNormalizeAbsUrl(v); + if (typeof v === "object") { + const cands = [ + v.src, + v.url, + v.originalSrc, + v.original_src, + v.original, + v.secure_url, + v.large, + v.medium, + v.small, + ]; + for (const c of cands) { + if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c); + } + } + return ""; +} + +function bswPickImage(hit) { + const cands = [ + hit?.image, + hit?.image_url, + hit?.imageUrl, + hit?.imageURL, + hit?.featured_image, + hit?.featured_image_url, + hit?.featuredImage, + hit?.featuredImageUrl, + hit?.product_image, + hit?.product_image_url, + hit?.productImage, + hit?.productImageUrl, + hit?.thumbnail, + hit?.thumbnail_url, + hit?.thumbnailUrl, + ]; + + for (const c of cands) { + const s = bswNormalizeImg(c); + if (s) return s; + } + + if (Array.isArray(hit?.images)) { + for (const im of hit.images) { + const s = bswNormalizeImg(im); + if (s) return s; + } + } + + if (Array.isArray(hit?.media)) { + for (const im of hit.media) { + const s = bswNormalizeImg(im); + if (s) return s; + } + } + + return ""; +} + + +async function scanCategoryBSWAlgolia(ctx, prevDb, report) { + const t0 = Date.now(); + + let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null; + if (!collectionId) { + try { + const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua); + collectionId = bswExtractCollectionIdFromHtml(html); + if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`); + else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`); + } catch (e) { + ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`); + } + } + + if (!collectionId) { + ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`); + + const discovered = new Map(); + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: 1, + discoveredUnique: 0, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); + return; + } + + const ruleContext = ctx.cat.bswRuleContext || ""; + const hitsPerPage = 50; + + const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage); + const result0 = first?.json?.results?.[0] || null; + const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1; + + const totalPages = Math.max(1, nbPages); + const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages); + ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); + + const pageIdxs = []; + for (let p = 0; p < scanPages; p++) pageIdxs.push(p); + + let donePages = 0; + + const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => { + const pnum = idx + 1; + const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage); + + const res0 = r?.json?.results?.[0] || null; + const hits = res0 && Array.isArray(res0.hits) ? res0.hits : []; + + const items = []; + for (const h of hits) { + const it = bswHitToItem(h); + if (it) items.push(it); + } + + donePages++; + ctx.logger.ok( + `${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft( + items.length, + 3 + )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` + ); + + return items; + }); + + const discovered = new Map(); + let dups = 0; + for (const arr of perPageItems) { + for (const it of arr) { + if (discovered.has(it.url)) dups++; + discovered.set(it.url, it); + } + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` + ); + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: scanPages, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); +} + +function createStore(defaultUa) { + return { + key: "bsw", + name: "BSW", + host: "www.bswliquor.com", + ua: defaultUa, + scanCategory: scanCategoryBSWAlgolia, + categories: [ + { + key: "scotch-whisky", + label: "Scotch Whisky", + startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1", + bswRuleContext: "scotch-whisky", + }, + { + key: "rum", + label: "Rum", + startUrl: "https://www.bswliquor.com/collections/rum?page=1", + bswRuleContext: "rum", + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/craftcellars.js b/src/stores/craftcellars.js new file mode 100644 index 0000000..70c4cac --- /dev/null +++ b/src/stores/craftcellars.js @@ -0,0 +1,307 @@ +"use strict"; + +const { decodeHtml, stripTags, extractFirstImgUrl } = require("../utils/html"); +const { sanitizeName } = require("../utils/text"); +const { normalizeCspc } = require("../utils/sku"); +const { makePageUrlShopifyQueryPage } = require("../utils/url"); + +const { mergeDiscoveredIntoDb } = require("../tracker/merge"); +const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); +const { addCategoryResultToReport } = require("../tracker/report"); + +function craftCellarsIsEmptyListingPage(html) { + const s = String(html || ""); + if (/collection--empty\b/i.test(s)) return true; + if (/No products found/i.test(s)) return true; + return false; +} + +function canonicalizeCraftProductUrl(raw) { + try { + const u = new URL(String(raw)); + u.search = ""; + u.hash = ""; + return u.toString(); + } catch { + return String(raw || ""); + } +} + +function extractShopifyCardPrice(block) { + const b = String(block || ""); + const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, "")); + + const saleRegion = b.split(/sale price/i)[1] || ""; + const saleD = dollars(saleRegion); + if (saleD.length) return saleD[0]; + + const regRegion = b.split(/regular price/i)[1] || ""; + const regD = dollars(regRegion); + if (regD.length) return regD[0]; + + const any = dollars(b); + return any[0] || ""; +} + +function parseProductsCraftCellars(html, ctx) { + const s = String(html || ""); + + const g1 = s.match(/]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || ""; + const g2 = s.match(/]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || ""; + + const gridCandidate = g1.length > g2.length ? g1 : g2; + const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s; + + return parseProductsCraftCellarsInner(grid, ctx); +} + +function parseProductsCraftCellarsInner(html, ctx) { + const s = String(html || ""); + const items = []; + + let blocks = [...s.matchAll(/]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]); + if (blocks.length < 5) { + blocks = [...s.matchAll(/]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map( + (m) => m[0] + ); + } + + const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`; + + for (const block of blocks) { + const href = + block.match(/]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] || + block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1]; + if (!href) continue; + + let url = ""; + try { + url = new URL(decodeHtml(href), base).toString(); + } catch { + continue; + } + url = canonicalizeCraftProductUrl(url); + + const nameHtml = + block.match( + /]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*]*>[\s\S]*?]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i + )?.[1] || + block.match(/]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1]; + + const name = sanitizeName(stripTags(decodeHtml(nameHtml || ""))); + if (!name) continue; + + const price = extractShopifyCardPrice(block); + const img = extractFirstImgUrl(block, base); + + items.push({ name, price, url, img }); + } + + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + + +function usdFromShopifyPriceStr(s) { + const n = Number(String(s || "").replace(/[^0-9.]/g, "")); + if (!Number.isFinite(n)) return ""; + return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`; +} + +/** + * Craft Cellars: + * - HTML listing with ?filter.v.availability=1 is the allowlist (prevents OOS leaking in) + * - Shopify products.json is used only to enrich SKU (and optionally price) for those allowed URLs + */ +async function scanCategoryCraftCellars(ctx, prevDb, report) { + const t0 = Date.now(); + + // 1) HTML scan: allowlist of in-stock listing URLs + const htmlMap = new Map(); // url -> {name, price, url, img} + + const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200); + let htmlPagesFetched = 0; + let emptyStreak = 0; + + for (let p = 1; p <= maxPages; p++) { + const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p); + const { text: html } = await ctx.http.fetchTextWithRetry(pageUrl, `craft:html:${ctx.cat.key}:p${p}`, ctx.store.ua); + htmlPagesFetched++; + + if (craftCellarsIsEmptyListingPage(html)) break; + + const items = parseProductsCraftCellars(html, ctx); + if (!items.length) { + emptyStreak++; + if (emptyStreak >= 2) break; + continue; + } + emptyStreak = 0; + + for (const it of items) { + const url = canonicalizeCraftProductUrl(it.url); + if (!url) continue; + htmlMap.set(url, { name: it.name || "", price: it.price || "", url, img: it.img || "" }); + } + } + + // If HTML returns nothing, don't let JSON invent a category + if (!htmlMap.size) { + ctx.logger.warn( + `${ctx.catPrefixOut} | HTML listing returned 0 items; refusing to use products.json as source of truth.` + ); + } + + // 2) JSON scan: build SKU index (but do NOT add new URLs from JSON) + const jsonMap = new Map(); // url -> { sku, price, img } + + if (htmlMap.size) { + const start = new URL(ctx.cat.startUrl); + const m = start.pathname.match(/^\/collections\/([^/]+)/i); + if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`); + const collectionHandle = m[1]; + + const limit = 250; + let jsonPage = 1; + let jsonPagesFetched = 0; + + while (true) { + const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`; + const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua); + + const products = Array.isArray(r?.json?.products) ? r.json.products : []; + jsonPagesFetched++; + + if (!products.length) break; + + for (const p of products) { + const handle = String(p?.handle || ""); + if (!handle) continue; + + const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`); + + // Only enrich if it's on the HTML allowlist + if (!htmlMap.has(prodUrl)) continue; + + const variants = Array.isArray(p?.variants) ? p.variants : []; + const v = variants.find((x) => x && x.available === true) || variants[0] || null; + + const sku = normalizeCspc(v?.sku || ""); + const price = v?.price ? usdFromShopifyPriceStr(v.price) : ""; + + // Product image (best effort) + let img = ""; + const images = Array.isArray(p?.images) ? p.images : []; + if (images[0]) { + if (typeof images[0] === "string") img = images[0]; + else img = String(images[0]?.src || images[0]?.url || ""); + } + if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || ""); + img = String(img || "").trim(); + if (img.startsWith("//")) img = `https:${img}`; + if (img && !/^https?:\/\//i.test(img)) { + try { + img = new URL(img, `https://${ctx.store.host}/`).toString(); + } catch { + // keep as-is + } + } + + jsonMap.set(prodUrl, { sku, price, img }); + } + + if (products.length < limit) break; + jsonPage++; + if (jsonPage > 200) break; // safety + } + + ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`); + } else { + ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=0`); + } + + // 3) Final discovered: HTML allowlist, enriched by JSON + const discovered = new Map(); + for (const [url, it] of htmlMap.entries()) { + const j = jsonMap.get(url); + discovered.set(url, { + name: it.name || "", + // Prefer JSON price (normalized) when present, else keep HTML price (already formatted) + price: j?.price || it.price || "", + url, + sku: j?.sku || "", + img: j?.img || it.img || "", + }); + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: htmlPagesFetched, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); +} + + +function createStore(defaultUa) { + return { + key: "craftcellars", + name: "Craft Cellars", + host: "craftcellars.ca", + ua: defaultUa, + + // ✅ Custom scan (HTML allowlist + JSON enrichment) + scanCategory: scanCategoryCraftCellars, + + // Keep HTML parser for debugging + parseProducts: parseProductsCraftCellars, + makePageUrl: makePageUrlShopifyQueryPage, + isEmptyListingPage: craftCellarsIsEmptyListingPage, + + categories: [ + { + key: "whisky", + label: "Whisky", + startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1", + discoveryStartPage: 10, + }, + { + key: "rum", + label: "Rum", + startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1", + discoveryStartPage: 5, + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/index.js b/src/stores/index.js new file mode 100644 index 0000000..a1999d1 --- /dev/null +++ b/src/stores/index.js @@ -0,0 +1,25 @@ +"use strict"; + +const { createStore: createSierra, parseProductsSierra } = require("./sierrasprings"); +const { createStore: createBSW } = require("./bsw"); +const { createStore: createKWM } = require("./kwm"); +const { createStore: createKegNCork } = require("./kegncork"); +const { createStore: createMaltsAndGrains } = require("./maltsandgrains"); +const { createStore: createCraftCellars } = require("./craftcellars"); +const { createStore: createBCL } = require("./bcl"); +const { createStore: createStrath } = require("./strath"); + +function createStores({ defaultUa } = {}) { + return [ + createSierra(defaultUa), + createBSW(defaultUa), + createKWM(defaultUa), + createKegNCork(defaultUa), + createMaltsAndGrains(defaultUa), + createCraftCellars(defaultUa), + createBCL(defaultUa), + createStrath(defaultUa), + ]; +} + +module.exports = { createStores, parseProductsSierra }; diff --git a/src/stores/kegncork.js b/src/stores/kegncork.js new file mode 100644 index 0000000..99e210b --- /dev/null +++ b/src/stores/kegncork.js @@ -0,0 +1,78 @@ +"use strict"; + +const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../utils/html"); +const { makePageUrlQueryParam } = require("../utils/url"); + +function makePageUrlKegNCork(baseUrl, pageNum) { + return makePageUrlQueryParam(baseUrl, "page", pageNum); +} + +function parseProductsKegNCork(html, ctx) { + const s = String(html || ""); + const items = []; + + const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`; + + const blocks = s.split(/]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i); + ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`); + + for (let i = 1; i < blocks.length; i++) { + const block = "]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i + ); + if (!mTitle) continue; + + const url = decodeHtml(mTitle[1]).trim(); + const name = cleanText(decodeHtml(mTitle[2])); + if (!url || !/^https?:\/\//i.test(url) || !name) continue; + + let price = ""; + const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*\s*<\/div>/i)?.[0] || ""; + const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/); + if (mDollar) price = mDollar[0].replace(/\s+/g, ""); + } + + const img = extractFirstImgUrl(block, base); + + items.push({ name, price, url, img }); + } + + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + + +function createStore(defaultUa) { + return { + key: "kegncork", + name: "Keg N Cork", + host: "kegncork.com", + ua: defaultUa, + parseProducts: parseProductsKegNCork, + makePageUrl: makePageUrlKegNCork, + categories: [ + { + key: "whisky", + label: "Whisky", + startUrl: "https://kegncork.com/whisky/?page=1", + discoveryStartPage: 5, + }, + { + key: "rum", + label: "Rum", + startUrl: "https://kegncork.com/rum/?page=1", + discoveryStartPage: 1, + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/kwm.js b/src/stores/kwm.js new file mode 100644 index 0000000..008fdaa --- /dev/null +++ b/src/stores/kwm.js @@ -0,0 +1,189 @@ +"use strict"; + +const { decodeHtml, stripTags, cleanText, extractHtmlAttr, escapeRe, extractFirstImgUrl } = require("../utils/html"); +const { sanitizeName } = require("../utils/text"); +const { normalizeCspc } = require("../utils/sku"); +const { normalizeBaseUrl } = require("../utils/url"); + +function makePageUrlKWM(baseUrl, pageNum) { + const u = new URL(normalizeBaseUrl(baseUrl)); + u.hash = ""; + if (pageNum <= 1) { + u.searchParams.delete("page"); + u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; + return u.toString(); + } + u.searchParams.set("page", String(pageNum)); + u.search = `?${u.searchParams.toString()}`; + return u.toString(); +} + +function extractDivBlocksByExactClass(html, className, maxBlocks) { + const out = []; + const s = String(html || ""); + + const re = new RegExp(`]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi"); + + let m; + while ((m = re.exec(s))) { + if (out.length >= maxBlocks) break; + + const startTagEnd = m.index + m[0].length; + let i = startTagEnd; + let depth = 1; + + while (i < s.length) { + const nextOpen = s.indexOf("", i); + if (nextClose === -1) break; + + if (nextOpen !== -1 && nextOpen < nextClose) { + depth++; + i = nextOpen + 4; + continue; + } + depth--; + if (depth === 0) { + out.push(s.slice(m.index, nextClose + 6)); + re.lastIndex = nextClose + 6; + break; + } + i = nextClose + 6; + } + } + return out; +} + +function kwmExtractProductLinkHref(block) { + let m = + block.match(/]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) || + block.match(/]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i); + + if (m && m[1]) return m[1].trim(); + + m = + block.match(/]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) || + block.match(/]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i); + + return m && m[1] ? m[1].trim() : ""; +} + +function kwmExtractName(block) { + const dataItem = extractHtmlAttr(block, "data-item"); + if (dataItem) return sanitizeName(dataItem); + + const m = block.match(/]*>\s*([\s\S]*?)\s*<\/h6>/i); + if (m && m[1]) return sanitizeName(stripTags(m[1])); + + return ""; +} + +function kwmExtractFirstDivByClass(html, className) { + const re = new RegExp(`]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i"); + const m = re.exec(html); + if (!m) return ""; + const start = m.index + m[0].length; + + let i = start; + let depth = 1; + while (i < html.length) { + const nextOpen = html.indexOf("", i); + if (nextClose === -1) break; + + if (nextOpen !== -1 && nextOpen < nextClose) { + depth++; + i = nextOpen + 4; + continue; + } + depth--; + if (depth === 0) return html.slice(start, nextClose); + i = nextClose + 6; + } + return ""; +} + +function kwmExtractPrice(block) { + let m = block.match(/\bdata-price=["']([^"']+)["']/i); + if (m && m[1]) { + const raw = String(m[1]).trim(); + const n = raw.replace(/[^0-9.]/g, ""); + if (n) return `$${Number(n).toFixed(2)}`; + } + + const priceDiv = kwmExtractFirstDivByClass(block, "product-price"); + if (!priceDiv) return ""; + + const cleaned = String(priceDiv).replace(/]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " "); + + const txt = cleanText(decodeHtml(stripTags(cleaned))); + const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)]; + if (dollars.length) return dollars[0][0].replace(/\s+/g, ""); + + return ""; +} + +function parseProductsKWM(html, ctx) { + const s = String(html || ""); + const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`; + + const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000); + ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`); + + const items = []; + for (const block of blocks) { + if (/OUT OF STOCK/i.test(block)) continue; + + const href = kwmExtractProductLinkHref(block); + if (!href) continue; + + let url; + try { + url = new URL(decodeHtml(href), base).toString(); + } catch { + continue; + } + + const name = kwmExtractName(block); + if (!name) continue; + + const price = kwmExtractPrice(block); + const sku = normalizeCspc(url); + + const img = extractFirstImgUrl(block, base); + + items.push({ name, price, url, sku, img }); + } + + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + + +function createStore(defaultUa) { + return { + key: "kwm", + name: "Kensington Wine Market", + host: "kensingtonwinemarket.com", + ua: defaultUa, + parseProducts: parseProductsKWM, + makePageUrl: makePageUrlKWM, + categories: [ + { + key: "scotch", + label: "Scotch", + startUrl: "https://kensingtonwinemarket.com/products/scotch/", + discoveryStartPage: 200, + }, + { + key: "rum", + label: "Rum", + startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/", + discoveryStartPage: 20, + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/maltsandgrains.js b/src/stores/maltsandgrains.js new file mode 100644 index 0000000..6eaf26e --- /dev/null +++ b/src/stores/maltsandgrains.js @@ -0,0 +1,107 @@ +"use strict"; + +const { decodeHtml, stripTags, cleanText, extractHtmlAttr, extractFirstImgUrl } = require("../utils/html"); +const { normalizeCspc } = require("../utils/sku"); +const { extractPriceFromTmbBlock } = require("../utils/woocommerce"); + +function allowMaltsExcludeGinTequilaMezcal(item) { + if (item && item.inStock === false) return false; + + const cats = Array.isArray(item?.cats) ? item.cats : []; + const has = (re) => cats.some((c) => re.test(String(c || ""))); + + if (has(/\bgin\b/i)) return false; + if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false; + + return true; +} + +function parseProductsMaltsAndGrains(html, ctx) { + const s = String(html || ""); + const items = []; + + const re = /]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi; + const blocks = [...s.matchAll(re)].map((m) => m[0] || ""); + ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`); + + const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`; + + for (const block of blocks) { + const classAttr = extractHtmlAttr(block, "class"); + + const isOut = + /\boutofstock\b/i.test(classAttr) || + /ast-shop-product-out-of-stock/i.test(block) || + />\s*out of stock\s*]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i + )?.[1] || + block.match( + /]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i + )?.[2] || + block.match(/]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1]; + + if (!href) continue; + + let url = ""; + try { + url = new URL(decodeHtml(href), base).toString(); + } catch { + continue; + } + if (!/^https?:\/\//i.test(url)) continue; + + const mTitle = block.match( + /]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i + ); + const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : ""; + if (!name) continue; + + const price = extractPriceFromTmbBlock(block); + + const sku = normalizeCspc( + block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || + block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] || + "" + ); + + const img = extractFirstImgUrl(block, base); + + items.push({ name, price, url, sku, img, cats, inStock: true }); + } + + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + + +function createStore(defaultUa) { + return { + key: "maltsandgrains", + name: "Malts & Grains", + host: "maltsandgrains.store", + ua: defaultUa, + parseProducts: parseProductsMaltsAndGrains, + categories: [ + { + key: "all-minus-gin-tequila-mezcal", + label: "All Spirits", + startUrl: "https://maltsandgrains.store/shop/page/1/", + discoveryStartPage: 15, + allowUrl: allowMaltsExcludeGinTequilaMezcal, + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/stores/sierrasprings.js b/src/stores/sierrasprings.js new file mode 100644 index 0000000..75eeaa9 --- /dev/null +++ b/src/stores/sierrasprings.js @@ -0,0 +1,91 @@ +"use strict"; + +const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html"); +const { normalizeCspc } = require("../utils/sku"); +const { extractPriceFromTmbBlock } = require("../utils/woocommerce"); + +function allowSierraSpiritsLiquorUrlRumWhisky(item) { + const u = item && item.url ? item.url : ""; + const s = String(u || "").toLowerCase(); + if (!/^https?:\/\/sierraspringsliquor\.ca\/shop\/spirits-liquor\/.+\/$/.test(s)) return false; + return /\/shop\/spirits-liquor\/.*(rum|whisk(?:e)?y).*/.test(s); +} + +function parseProductsSierra(html, ctx) { + const items = []; + const blocks = String(html || "").split(/
]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i + ); + if (!titleMatch) continue; + + const url = new URL(decodeHtml(titleMatch[1]), base).toString(); + const name = cleanText(decodeHtml(titleMatch[2])); + if (!name) continue; + + const price = extractPriceFromTmbBlock(block); + + const sku = normalizeCspc( + block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] || + block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] || + "" + ); + + const img = extractFirstImgUrl(block, base); + + items.push({ name, price, url, sku, img }); + } + + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + + +function createStore(defaultUa) { + return { + key: "sierrasprings", + name: "Sierra Springs", + host: "sierraspringsliquor.ca", + ua: defaultUa, + parseProducts: parseProductsSierra, + categories: [ + { + key: "whisky", + label: "Whisky", + startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/", + discoveryStartPage: 20, + }, + { + key: "fine-rare", + label: "Fine & Rare", + startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/", + discoveryStartPage: 1, + }, + { + key: "spirits-liquor", + label: "Spirits / Liquor", + startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/page/2/", + discoveryStartPage: 15, + allowUrl: allowSierraSpiritsLiquorUrlRumWhisky, + }, + { + key: "spirits", + label: "Spirits", + startUrl: "https://sierraspringsliquor.ca/product-category/spirits/", + discoveryStartPage: 1, + }, + ], + }; +} + +module.exports = { createStore, parseProductsSierra }; diff --git a/src/stores/strath.js b/src/stores/strath.js new file mode 100644 index 0000000..8530d11 --- /dev/null +++ b/src/stores/strath.js @@ -0,0 +1,495 @@ +"use strict"; + +const { decodeHtml, stripTags, cleanText, extractFirstImgUrl } = require("../utils/html"); +const { normalizeCspc } = require("../utils/sku"); +const { humanBytes } = require("../utils/bytes"); +const { padLeft, padRight } = require("../utils/string"); + +const { mergeDiscoveredIntoDb } = require("../tracker/merge"); +const { buildDbObject, writeJsonAtomic } = require("../tracker/db"); +const { addCategoryResultToReport } = require("../tracker/report"); + +function kbStr(bytes) { + return humanBytes(bytes).padStart(8, " "); +} + +function secStr(ms) { + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); +} + +function pageStr(i, total) { + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; +} + +function pctStr(done, total) { + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; +} + +function extractArticles(html) { + const s = String(html || ""); + const parts = s.split(/]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi, + " " + ); + + const ins = noMember.match(/]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i); + if (ins && ins[1]) return normalizePrice(ins[1]); + + const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i); + if (reg && reg[1]) return normalizePrice(reg[1]); + + const priceDiv = noMember.match( + /]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i + ); + const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember; + + return normalizePrice(scope); +} + +function extractProductIdFromArticle(articleHtml) { + const a = String(articleHtml || ""); + + let m = a.match(/]*\bid=["'](\d{1,10})["']/i); + if (m && m[1]) return Number(m[1]); + + m = a.match(/\bpost-(\d{1,10})\b/i); + if (m && m[1]) return Number(m[1]); + + m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i); + if (m && m[1]) return Number(m[1]); + + return 0; +} + +function extractSkuFromArticle(articleHtml) { + const a = String(articleHtml || ""); + + let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i); + if (m && m[1]) return m[1]; + + m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i); + if (m && m[1]) return m[1]; + + return ""; +} + +function looksInStock(articleHtml) { + const a = String(articleHtml || ""); + + if (/\boutofstock\b/i.test(a)) return false; + if (/Currently\s+Unavailable/i.test(a)) return false; + + if (/\binstock\b/i.test(a)) return true; + if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true; + if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true; + if (/10\+\s*Bottles\s+Available/i.test(a)) return true; + + return /\binstock\b/i.test(a); +} + +function parseProductFromArticle(articleHtml) { + const a = String(articleHtml || ""); + + if (!looksInStock(a)) return null; + + const hrefM = a.match(/]*href=["']([^"']+)["']/i); + if (!hrefM || !hrefM[1]) return null; + + let url; + try { + url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString(); + } catch { + return null; + } + + const t2 = a.match(/]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i); + const t3 = a.match(/]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i); + const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || ""))); + const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || ""))); + const name = cleanText([title, sub].filter(Boolean).join(" - ")); + if (!name) return null; + + const price = pickPriceFromArticle(a); + const productId = extractProductIdFromArticle(a); + + const skuFromHtml = extractSkuFromArticle(a); + const fallbackSku = normalizeCspc(url) || ""; + + const img = extractFirstImgUrl(a, "https://www.strathliquor.com/"); + + return { + name, + price, + url, + sku: skuFromHtml || fallbackSku, + productId, + img, + }; +} + + +/* ---------------- Store API paging ---------------- */ + +function buildStoreApiBaseUrlFromCategoryUrl(startUrl) { + const u = new URL(startUrl); + const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`); + + api.searchParams.set("order", "desc"); + api.searchParams.set("orderby", "date"); + + const stock = u.searchParams.get("_sfm__stock_status"); + if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock"); + + const pr = u.searchParams.get("_sfm__regular_price"); + if (pr) { + const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/); + if (m) { + api.searchParams.set("min_price", m[1]); + api.searchParams.set("max_price", m[2]); + } + } + + return api; +} + +function hasCategorySlug(p, wanted) { + const w = String(wanted || "").trim().toLowerCase(); + if (!w) return true; + + const cats = Array.isArray(p?.categories) ? p.categories : []; + for (const c of cats) { + const slug = String(c?.slug || "").trim().toLowerCase(); + if (slug === w) return true; + } + return false; +} + +function normalizeProductUrl(p) { + const u = String(p?.permalink || p?.link || "").trim(); + return u && u.startsWith("http") ? u : ""; +} + +function normalizeProductName(p) { + // Store API "name" can contain HTML entities like – and sometimes markup like + const raw = String(p?.name || ""); + return cleanText(decodeHtml(stripTags(raw))); +} + +function normalizeProductImage(p) { + const imgs = Array.isArray(p?.images) ? p.images : []; + for (const im of imgs) { + if (!im) continue; + const raw = + (typeof im === "string" ? im : "") || + (typeof im?.src === "string" ? im.src : "") || + (typeof im?.thumbnail === "string" ? im.thumbnail : "") || + (typeof im?.url === "string" ? im.url : ""); + const s = String(raw || "").trim(); + if (!s) continue; + if (s.startsWith("//")) return `https:${s}`; + return s; + } + + const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim(); + if (!direct) return ""; + return direct.startsWith("//") ? `https:${direct}` : direct; +} + + + +function toMoneyStringFromMinorUnits(valueStr, minorUnit) { + const mu = Number(minorUnit); + if (!Number.isFinite(mu) || mu < 0 || mu > 6) return ""; + const v = String(valueStr || "").trim(); + if (!/^\d+$/.test(v)) return ""; + + // Use integer math to avoid float rounding issues + const pad = "0".repeat(mu); + const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v; + const whole = s.length === mu ? "0" : s.slice(0, s.length - mu); + const frac = mu === 0 ? "" : s.slice(s.length - mu); + return mu === 0 ? whole : `${whole}.${frac}`; +} + +function normalizeProductPrice(p) { + const prices = p?.prices; + + // Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35) + if (prices && typeof prices === "object") { + const minor = prices.currency_minor_unit; + const sale = String(prices.sale_price || "").trim(); + const regular = String(prices.regular_price || "").trim(); + const chosen = sale || regular; + + if (chosen) { + let numeric = chosen; + + if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) { + const converted = toMoneyStringFromMinorUnits(chosen, minor); + if (converted) numeric = converted; + } + + const num = Number(numeric); + if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`; + } + } + + const raw = String(p?.price || p?.price_html || "").trim(); + const norm = normalizePrice(raw); + return norm; +} + +function normalizeProductSku(p) { + const sku = String(p?.sku || "").trim(); + if (/^\d{6}$/.test(sku)) return sku; + return ""; +} + +function normalizeProductId(p) { + const id = Number(p?.id); + return Number.isFinite(id) ? id : 0; +} + +async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) { + const u = new URL(apiBaseUrl.toString()); + u.searchParams.set("page", String(page)); + u.searchParams.set("per_page", String(perPage)); + + return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, { + method: "GET", + headers: { + Accept: "application/json", + Referer: ctx.cat.startUrl, + }, + }); +} + +function avoidMassRemoval(prevDb, discovered, ctx, reason) { + const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0; + const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0; + + if (prevSize <= 0 || discSize <= 0) return false; + + const ratio = discSize / Math.max(1, prevSize); + if (ratio >= 0.6) return false; + + ctx.logger.warn?.( + `${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).` + ); + + if (prevDb && typeof prevDb.entries === "function") { + for (const [k, v] of prevDb.entries()) { + if (!discovered.has(k)) discovered.set(k, v); + } + return true; + } + + return false; +} + +async function scanCategoryStrath(ctx, prevDb, report) { + const t0 = Date.now(); + + // Listing HTML (seed + sanity) + let html = ""; + let listingFinalUrl = ctx.cat.startUrl; + let listingStatus = 0; + let listingBytes = 0; + let listingMs = 0; + + try { + const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua); + html = r.text || ""; + listingFinalUrl = r.finalUrl || ctx.cat.startUrl; + listingStatus = r.status || 0; + listingBytes = r.bytes || 0; + listingMs = r.ms || 0; + } catch (e) { + ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`); + } + + const discovered = new Map(); + + const listingArticles = extractArticles(html); + let listingItems = 0; + for (const art of listingArticles) { + const it = parseProductFromArticle(art); + if (it) { + discovered.set(it.url, it); + listingItems++; + } + } + + ctx.logger.ok( + `${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft( + listingItems, + 3 + )} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}` + ); + + const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl); + + const perPage = 100; + const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages; + + const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase(); + + let donePages = 0; + let emptyMatchPages = 0; + + for (let page = 1; page <= maxPagesCap; page++) { + let r; + try { + r = await fetchStoreApiPage(ctx, apiBase, page, perPage); + } catch (e) { + ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`); + break; + } + + const arr = Array.isArray(r?.json) ? r.json : []; + donePages++; + + if (!arr.length) break; + + let kept = 0; + + for (const p of arr) { + const stock = String(p?.stock_status || "").toLowerCase(); + if (stock && stock !== "instock") continue; + + if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue; + + const url = normalizeProductUrl(p); + if (!url) continue; + + const name = normalizeProductName(p); + if (!name) continue; + + const price = normalizeProductPrice(p); + const sku = normalizeProductSku(p); + const productId = normalizeProductId(p); + + const fallbackSku = sku || normalizeCspc(url) || ""; + + const prev = discovered.get(url) || null; + const img = normalizeProductImage(p) || (prev && prev.img) || ""; + + discovered.set(url, { + name, + price, + url, + sku: sku || fallbackSku, + productId, + img, + }); + kept++; + } + + ctx.logger.ok( + `${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft( + kept, + 3 + )} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}` + ); + + if (wantedSlug) { + if (kept === 0) emptyMatchPages++; + else emptyMatchPages = 0; + + // If filter is tight (rum), stop after 2 empty pages in a row. + if (emptyMatchPages >= 2) break; + } + + if (arr.length < perPage) break; + } + + if (prevDb && typeof prevDb.size === "number") { + avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`); + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + ctx.logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` + ); + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: 1 + Math.max(0, donePages), + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); +} + +function createStore(defaultUa) { + return { + key: "strath", + name: "Strath Liquor", + host: "www.strathliquor.com", + ua: defaultUa, + scanCategory: scanCategoryStrath, + categories: [ + { + key: "whisky", + label: "Whisky", + apiCategorySlug: "whisky", + startUrl: + "https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date", + }, + { + key: "spirits-rum", + label: "Spirits - Rum", + apiCategorySlug: "rum", + startUrl: + "https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date", + }, + ], + }; +} + +module.exports = { createStore }; diff --git a/src/tracker/category_scan.js b/src/tracker/category_scan.js new file mode 100644 index 0000000..1b0d09a --- /dev/null +++ b/src/tracker/category_scan.js @@ -0,0 +1,292 @@ +"use strict"; + +const { humanBytes } = require("../utils/bytes"); +const { padLeft, padRight, padLeftV, padRightV } = require("../utils/string"); +const { normalizeBaseUrl, makePageUrlForCtx } = require("../utils/url"); +const { parallelMapStaggered } = require("../utils/async"); + +const { ensureDir, dbPathFor, readDb, writeJsonAtomic, buildDbObject } = require("./db"); +const { mergeDiscoveredIntoDb } = require("./merge"); +const { addCategoryResultToReport } = require("./report"); + +const ACTION_W = 24; +const STATUS_W = 4; +const PROG_W = 4; + +function kbStr(bytes) { + return humanBytes(bytes).padStart(8, " "); +} + +function secStr(ms) { + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); +} + +function pctStr(done, total) { + const pct = total ? Math.floor((done / total) * 100) : 0; + return `${padLeft(pct, 3)}%`; +} + +function pageStr(i, total) { + const leftW = String(total).length; + return `${padLeft(i, leftW)}/${total}`; +} + +function actionCell(s) { + return padRightV(String(s), ACTION_W); +} + +function statusCell(logger, statusRaw, okBool) { + const cell = padRightV(String(statusRaw || ""), STATUS_W); + if (!statusRaw) return cell; + return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow); +} + +function progCell(v) { + const raw = String(v ?? "----"); + return padLeftV(raw, PROG_W); +} + +function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) { + logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`); +} + +function makeCatPrefixers(stores, logger) { + const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1); + const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1); + + function catPrefixRaw(store, cat) { + return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`; + } + + function catPrefixOut(store, cat) { + return logger.bold(catPrefixRaw(store, cat)); + } + + return { catPrefixRaw, catPrefixOut, width: storeW, catW }; +} + +function buildCategoryContext(store, cat, catPrefixOutFn, config) { + const baseUrl = normalizeBaseUrl(cat.startUrl); + const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir); + return { + store, + cat, + baseUrl, + dbFile, + catPrefixOut: catPrefixOutFn(store, cat), + }; +} + +function loadCategoryDb(logger, ctx) { + const prevDb = readDb(ctx.dbFile); + logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`); + return prevDb; +} + +function shouldTrackItem(ctx, finalUrl, item) { + const allow = ctx?.cat?.allowUrl; + if (typeof allow !== "function") return true; + return allow(item, ctx, finalUrl); +} + +async function pageHasProducts(ctx, url) { + const { http, config, logger } = ctx; + try { + const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua); + + if (typeof ctx.store.isEmptyListingPage === "function") { + if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 }; + } + + const parser = ctx.store.parseProducts || config.defaultParseProducts; + const items = parser(text, ctx).length; + return { ok: items > 0, items }; + } catch { + return { ok: false, items: 0 }; + } +} + +async function probePage(ctx, baseUrl, pageNum, state) { + const url = makePageUrlForCtx(ctx, baseUrl, pageNum); + const t0 = Date.now(); + const r = await pageHasProducts(ctx, url); + const ms = Date.now() - t0; + + const prog = discoverProg(state); + + logProgressLine( + ctx.logger, + ctx, + `Discover probe page=${padLeftV(pageNum, 4)}`, + r.ok ? "OK" : "MISS", + Boolean(r.ok), + prog, + `items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}` + ); + + return r; +} + +function discoverProg(state) { + if (!state || state.phase !== "binary") return " 0%"; + const span = Math.max(1, state.hiMiss - state.loOk); + const initial = Math.max(1, state.binInitialSpan); + if (initial <= 1) return "100%"; + + const remaining = Math.max(0, span - 1); + const total = Math.max(1, initial - 1); + const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100))); + return `${padLeft(pct, 3)}%`; +} + +async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) { + state.phase = "binary"; + state.loOk = loOk; + state.hiMiss = hiMiss; + state.binInitialSpan = Math.max(1, hiMiss - loOk); + + while (hiMiss - loOk > 1) { + const mid = loOk + Math.floor((hiMiss - loOk) / 2); + state.loOk = loOk; + state.hiMiss = hiMiss; + + const pm = await probePage(ctx, baseUrl, mid, state); + if (pm.ok) loOk = mid; + else hiMiss = mid; + } + + state.loOk = loOk; + state.hiMiss = hiMiss; + return loOk; +} + +async function discoverTotalPagesFast(ctx, baseUrl, guess, step) { + const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 }; + + const p1 = await probePage(ctx, baseUrl, 1, state); + if (!p1.ok) { + ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`); + return 1; + } + + const g = Math.max(2, guess); + const pg = await probePage(ctx, baseUrl, g, state); + if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state); + + let lastOk = g; + while (true) { + const probe = lastOk + step; + const pr = await probePage(ctx, baseUrl, probe, state); + if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state); + lastOk = probe; + if (lastOk > 5000) { + ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`); + return lastOk; + } + } +} + +async function discoverAndScanCategory(ctx, prevDb, report) { + const { logger, config } = ctx; + + if (typeof ctx.store.scanCategory === "function") { + await ctx.store.scanCategory(ctx, prevDb, report); + return; + } + + const t0 = Date.now(); + + const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess; + const step = config.discoveryStep; + + const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step); + const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages); + + logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`); + + const pages = []; + for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p)); + + let donePages = 0; + + const perPageItems = await parallelMapStaggered(pages, config.concurrency, config.staggerMs, async (pageUrl, idx) => { + const pnum = idx + 1; + + const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry( + pageUrl, + `page:${ctx.store.key}:${ctx.cat.key}:${pnum}`, + ctx.store.ua + ); + + const parser = ctx.store.parseProducts || config.defaultParseProducts; + const itemsRaw = parser(html, ctx, finalUrl); + + const items = []; + for (const it of itemsRaw) { + if (shouldTrackItem(ctx, finalUrl, it)) items.push(it); + } + + donePages++; + logProgressLine( + logger, + ctx, + `Page ${pageStr(pnum, pages.length)}`, + status ? String(status) : "", + status >= 200 && status < 400, + pctStr(donePages, pages.length), + `items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}` + ); + + return items; + }); + + const discovered = new Map(); + let dups = 0; + for (const arr of perPageItems) { + for (const it of arr) { + if (discovered.has(it.url)) dups++; + discovered.set(it.url, it); + } + } + + logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); + + const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered); + + const dbObj = buildDbObject(ctx, merged); + writeJsonAtomic(ctx.dbFile, dbObj); + + logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`); + + const elapsed = Date.now() - t0; + logger.ok( + `${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}` + ); + + report.categories.push({ + store: ctx.store.name, + label: ctx.cat.label, + key: ctx.cat.key, + dbFile: ctx.dbFile, + scannedPages: scanPages, + discoveredUnique: discovered.size, + newCount: newItems.length, + updatedCount: updatedItems.length, + removedCount: removedItems.length, + restoredCount: restoredItems.length, + elapsedMs: elapsed, + }); + report.totals.newCount += newItems.length; + report.totals.updatedCount += updatedItems.length; + report.totals.removedCount += removedItems.length; + report.totals.restoredCount += restoredItems.length; + + addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems); +} + +module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory }; diff --git a/src/tracker/db.js b/src/tracker/db.js new file mode 100644 index 0000000..0f14ca6 --- /dev/null +++ b/src/tracker/db.js @@ -0,0 +1,128 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const crypto = require("crypto"); + +const { normalizeCspc } = require("../utils/sku"); +const { priceToNumber } = require("../utils/price"); + +function ensureDir(dir) { + fs.mkdirSync(dir, { recursive: true }); +} + +function dbPathFor(key, baseUrl, dbDir) { + ensureDir(dbDir); + const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8); + const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-"); + return path.join(dbDir, `${safeKey}__${hash}.json`); +} + +function readDb(file) { + const byUrl = new Map(); + try { + const txt = fs.readFileSync(file, "utf8"); + const obj = JSON.parse(txt); + if (obj && Array.isArray(obj.items)) { + for (const it of obj.items) { + if (it && typeof it.url === "string" && it.url.startsWith("http")) { + byUrl.set(it.url, { + name: String(it.name || ""), + price: String(it.price || ""), + sku: String(it.sku || ""), + url: it.url, + img: String(it.img || it.image || it.thumb || "").trim(), + removed: Boolean(it.removed), + }); + } + } + } + } catch { + // ignore missing or parse errors + } + return { byUrl }; +} + +function writeJsonAtomic(file, obj) { + ensureDir(path.dirname(file)); + const tmp = `${file}.tmp`; + fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8"); + fs.renameSync(tmp, file); +} + +function buildDbObject(ctx, merged) { + return { + version: 6, + store: ctx.store.host, + storeLabel: ctx.store.name, + category: ctx.cat.key, + categoryLabel: ctx.cat.label, + source: ctx.baseUrl, + updatedAt: new Date().toISOString(), + count: merged.size, + items: [...merged.values()] + .sort((a, b) => (a.name || "").localeCompare(b.name || "")) + .map((it) => ({ + name: it.name, + price: it.price || "", + sku: normalizeCspc(it.sku) || "", + url: it.url, + img: String(it.img || "").trim(), + removed: Boolean(it.removed), + })), + }; +} + +function listDbFiles(dbDir) { + const out = []; + try { + for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) { + if (!ent.isFile()) continue; + const name = ent.name || ""; + if (!name.endsWith(".json")) continue; + out.push(path.join(dbDir, name)); + } + } catch { + // ignore + } + return out; +} + +function buildCheapestSkuIndexFromAllDbs(dbDir) { + const cheapest = new Map(); // sku -> { storeLabel, priceNum } + + for (const file of listDbFiles(dbDir)) { + try { + const obj = JSON.parse(fs.readFileSync(file, "utf8")); + const storeLabel = String(obj?.storeLabel || obj?.store || ""); + const items = Array.isArray(obj?.items) ? obj.items : []; + + for (const it of items) { + if (it?.removed) continue; + + const sku = normalizeCspc(it?.sku || ""); + if (!sku) continue; + + const p = priceToNumber(it?.price || ""); + if (!Number.isFinite(p) || p <= 0) continue; + + const prev = cheapest.get(sku); + if (!prev || p < prev.priceNum) cheapest.set(sku, { storeLabel, priceNum: p }); + } + } catch { + // ignore parse errors + } + } + + return cheapest; +} + +module.exports = { + ensureDir, + dbPathFor, + readDb, + writeJsonAtomic, + buildDbObject, + listDbFiles, + buildCheapestSkuIndexFromAllDbs, +}; diff --git a/src/tracker/merge.js b/src/tracker/merge.js new file mode 100644 index 0000000..d8779d7 --- /dev/null +++ b/src/tracker/merge.js @@ -0,0 +1,100 @@ +"use strict"; + +const { normalizeCspc } = require("../utils/sku"); +const { normPrice } = require("../utils/price"); + +function normImg(v) { + const s = String(v || "").trim(); + if (!s) return ""; + if (/^data:/i.test(s)) return ""; + return s; +} + +function mergeDiscoveredIntoDb(prevDb, discovered) { + const merged = new Map(prevDb.byUrl); + + const newItems = []; + const updatedItems = []; + const removedItems = []; + const restoredItems = []; + + for (const [url, nowRaw] of discovered.entries()) { + const prev = prevDb.byUrl.get(url); + + if (!prev) { + const now = { + ...nowRaw, + sku: normalizeCspc(nowRaw.sku), + img: normImg(nowRaw.img), + removed: false, + }; + newItems.push(now); + merged.set(url, now); + continue; + } + + if (prev.removed) { + const now = { + ...nowRaw, + sku: normalizeCspc(nowRaw.sku) || normalizeCspc(prev.sku), + img: normImg(nowRaw.img) || normImg(prev.img), + removed: false, + }; + restoredItems.push({ + url, + name: now.name || prev.name || "", + price: now.price || prev.price || "", + sku: now.sku || "", + }); + merged.set(url, now); + continue; + } + + const prevPrice = normPrice(prev.price); + const nowPrice = normPrice(nowRaw.price); + + const prevSku = normalizeCspc(prev.sku); + const nowSku = normalizeCspc(nowRaw.sku) || prevSku; + + const prevImg = normImg(prev.img); + let nowImg = normImg(nowRaw.img); + if (!nowImg) nowImg = prevImg; + + const nameChanged = String(prev.name || "") !== String(nowRaw.name || ""); + const priceChanged = prevPrice !== nowPrice; + const skuChanged = prevSku !== nowSku; + const imgChanged = prevImg !== nowImg; + + if (nameChanged || priceChanged || skuChanged || imgChanged) { + merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false }); + } + + if (priceChanged) { + updatedItems.push({ + url, + name: nowRaw.name || prev.name || "", + sku: nowSku || "", + oldPrice: prev.price || "", + newPrice: nowRaw.price || "", + }); + } + } + + for (const [url, prev] of prevDb.byUrl.entries()) { + if (discovered.has(url)) continue; + if (!prev.removed) { + const removed = { ...prev, removed: true }; + merged.set(url, removed); + removedItems.push({ + url, + name: prev.name || "", + price: prev.price || "", + sku: normalizeCspc(prev.sku) || "", + }); + } + } + + return { merged, newItems, updatedItems, removedItems, restoredItems }; +} + +module.exports = { mergeDiscoveredIntoDb }; diff --git a/src/tracker/report.js b/src/tracker/report.js new file mode 100644 index 0000000..2e94e36 --- /dev/null +++ b/src/tracker/report.js @@ -0,0 +1,240 @@ +"use strict"; + +const { C, color } = require("../utils/ansi"); +const { padLeft, padRight } = require("../utils/string"); +const { normalizeCspc } = require("../utils/sku"); +const { priceToNumber, salePctOff } = require("../utils/price"); +const { buildCheapestSkuIndexFromAllDbs } = require("./db"); + +function secStr(ms) { + const s = Number.isFinite(ms) ? ms / 1000 : 0; + const tenths = Math.round(s * 10) / 10; + let out; + if (tenths < 10) out = `${tenths.toFixed(1)}s`; + else out = `${Math.round(s)}s`; + return out.padStart(7, " "); +} + +function createReport() { + return { + startedAt: new Date(), + categories: [], + totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 }, + newItems: [], + updatedItems: [], + removedItems: [], + restoredItems: [], + }; +} + +function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) { + const reportCatLabel = `${storeName} | ${catLabel}`; + + for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); + + for (const it of restoredItems) + report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); + + for (const u of updatedItems) { + report.updatedItems.push({ + catLabel: reportCatLabel, + name: u.name, + sku: u.sku || "", + oldPrice: u.oldPrice, + newPrice: u.newPrice, + url: u.url, + }); + } + + for (const it of removedItems) + report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url }); +} + +function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) { + const paint = (s, code) => color(s, code, colorize); + const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir); + + const endedAt = new Date(); + const durMs = endedAt - report.startedAt; + + const storesSet = new Set(report.categories.map((c) => c.store)); + const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0); + + let out = ""; + const ln = (s = "") => { + out += String(s) + "\n"; + }; + + ln(""); + ln(paint("========== REPORT ==========", C.bold)); + ln( + paint("[OK] ", C.green) + + `Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr( + durMs + )}` + ); + ln(""); + + ln(paint("Per-category summary:", C.bold)); + const rows = report.categories.map((c) => ({ + cat: `${c.store} | ${c.label}`, + pages: c.scannedPages, + uniq: c.discoveredUnique, + newC: c.newCount, + resC: c.restoredCount, + remC: c.removedCount, + updC: c.updatedCount, + ms: c.elapsedMs, + })); + + const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8)); + ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`); + ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`); + for (const r of rows) { + ln( + `${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}` + ); + } + ln(""); + + const reportLabelW = Math.max( + 16, + ...report.newItems.map((x) => x.catLabel.length), + ...report.restoredItems.map((x) => x.catLabel.length), + ...report.updatedItems.map((x) => x.catLabel.length), + ...report.removedItems.map((x) => x.catLabel.length) + ); + + function storeFromCatLabel(catLabel) { + return String(catLabel || "").split(" | ")[0] || ""; + } + + function skuInline(sku) { + const s = normalizeCspc(sku); + return s ? paint(` ${s}`, C.gray) : ""; + } + + function cheaperAtInline(catLabel, sku, currentPriceStr) { + const s = normalizeCspc(sku); + if (!s) return ""; + const best = cheapestSku.get(s); + if (!best || !best.storeLabel) return ""; + const curStore = storeFromCatLabel(catLabel); + if (!curStore || best.storeLabel === curStore) return ""; + const curP = priceToNumber(currentPriceStr); + if (!Number.isFinite(curP)) return ""; + if (best.priceNum >= curP) return ""; + return paint(` (Cheaper at ${best.storeLabel})`, C.gray); + } + + function availableAtInline(catLabel, sku) { + const s = normalizeCspc(sku); + if (!s) return ""; + const best = cheapestSku.get(s); + if (!best || !best.storeLabel) return ""; + const curStore = storeFromCatLabel(catLabel); + if (curStore && best.storeLabel === curStore) return ""; + return paint(` (Available at ${best.storeLabel})`, C.gray); + } + + if (report.newItems.length) { + ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green)); + for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + const sku = normalizeCspc(it.sku || ""); + const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || ""); + ln( + `${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}` + ); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } else { + ln(paint("NEW LISTINGS (0)", C.bold)); + ln(""); + } + + if (report.restoredItems.length) { + ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green)); + for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + const sku = normalizeCspc(it.sku || ""); + const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || ""); + ln( + `${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}` + ); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } else { + ln(paint("RESTORED (0)", C.bold)); + ln(""); + } + + if (report.removedItems.length) { + ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow)); + for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + const sku = normalizeCspc(it.sku || ""); + const availTag = availableAtInline(it.catLabel, sku); + ln( + `${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}` + ); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } else { + ln(paint("REMOVED (0)", C.bold)); + ln(""); + } + + if (report.updatedItems.length) { + ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan)); + + for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const oldRaw = u.oldPrice || ""; + const newRaw = u.newPrice || ""; + + const oldN = priceToNumber(oldRaw); + const newN = priceToNumber(newRaw); + + const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray); + + let newP = newRaw ? newRaw : "(no price)"; + let offTag = ""; + + if (Number.isFinite(oldN) && Number.isFinite(newN)) { + if (newN > oldN) { + newP = paint(newP, C.red); // increase + } else if (newN < oldN) { + newP = paint(newP, C.green); // decrease + const pct = salePctOff(oldRaw, newRaw); + if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green); + } else { + newP = paint(newP, C.cyan); + } + } else { + newP = paint(newP, C.cyan); + } + + const sku = normalizeCspc(u.sku || ""); + const cheapTag = cheaperAtInline(u.catLabel, sku, newRaw || ""); + + ln( + `${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}` + ); + ln(` ${paint(u.url, C.dim)}`); + } + + ln(""); + } else { + ln(paint("PRICE CHANGES (0)", C.bold)); + ln(""); + } + + ln(paint("======== END REPORT ========", C.bold)); + + return out; +} + +module.exports = { createReport, addCategoryResultToReport, renderFinalReport }; diff --git a/src/tracker/run_all.js b/src/tracker/run_all.js new file mode 100644 index 0000000..c79d0e7 --- /dev/null +++ b/src/tracker/run_all.js @@ -0,0 +1,72 @@ +"use strict"; + +const { createReport } = require("./report"); +const { parallelMapStaggered } = require("../utils/async"); + +const { + makeCatPrefixers, + buildCategoryContext, + loadCategoryDb, + discoverAndScanCategory, +} = require("./category_scan"); + +// Some sites will intermittently 403/429. We don't want a single category/store +// to abort the entire run. Log and continue. +function formatErr(e) { + if (!e) return "Unknown error"; + if (typeof e === "string") return e; + if (e.stack) return e.stack; + return String(e); +} + +async function runAllStores(stores, { config, logger, http }) { + const report = createReport(); + const { catPrefixOut } = makeCatPrefixers(stores, logger); + + logger.info(`Debug=on`); + logger.info( + `Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}` + ); + logger.info( + `DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}` + ); + logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`); + logger.info(`CategoryConcurrency=${config.categoryConcurrency}`); + + const workItems = []; + for (const store of stores) { + for (const cat of store.categories) { + const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config); + const ctx = { ...baseCtx, config, logger, http }; + const prevDb = loadCategoryDb(logger, ctx); + workItems.push({ ctx, prevDb }); + } + } + + await parallelMapStaggered( + workItems, + Math.min(config.categoryConcurrency, workItems.length), + 0, + async (w) => { + try { + await discoverAndScanCategory(w.ctx, w.prevDb, report); + } catch (e) { + const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store"; + const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category"; + + // Keep it loud in logs, but do not fail the entire run. + logger.warn( + `Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}` + ); + + // If you want failures surfaced in the final report later, you could also + // push a "failed category" record onto report.categories here. + } + return null; + } + ); + + return report; +} + +module.exports = { runAllStores }; diff --git a/src/utils/ansi.js b/src/utils/ansi.js new file mode 100644 index 0000000..6b8dc0f --- /dev/null +++ b/src/utils/ansi.js @@ -0,0 +1,19 @@ +"use strict"; + +const C = { + reset: "\x1b[0m", + dim: "\x1b[2m", + bold: "\x1b[1m", + red: "\x1b[31m", + green: "\x1b[32m", + yellow: "\x1b[33m", + cyan: "\x1b[36m", + gray: "\x1b[90m", +}; + +function color(s, code, enabled) { + if (!enabled) return String(s); + return String(code || "") + String(s) + C.reset; +} + +module.exports = { C, color }; diff --git a/src/utils/args.js b/src/utils/args.js new file mode 100644 index 0000000..5df5bbb --- /dev/null +++ b/src/utils/args.js @@ -0,0 +1,86 @@ +"use strict"; + +function clampInt(v, def, min, max) { + if (def === null && (v === null || v === undefined)) return null; + const n = Number.parseInt(v ?? "", 10); + if (!Number.isFinite(n)) return def; + return Math.max(min, Math.min(max, n)); +} + +function parseArgs(argv) { + let debug = false; + let maxPages = null; + let concurrency = null; + let staggerMs = null; + let guess = null; + let step = null; + let dataDir = null; + let reportDir = null; + + const positional = []; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + + if (a === "--debug" || a === "-d") { + debug = true; + continue; + } + + if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) { + maxPages = clampInt(argv[i + 1], null, 1, 5000); + i++; + continue; + } + + if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) { + concurrency = clampInt(argv[i + 1], null, 1, 64); + i++; + continue; + } + + if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) { + staggerMs = clampInt(argv[i + 1], null, 0, 5000); + i++; + continue; + } + + if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) { + guess = clampInt(argv[i + 1], null, 1, 5000); + i++; + continue; + } + + if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) { + step = clampInt(argv[i + 1], null, 1, 500); + i++; + continue; + } + + if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) { + dataDir = String(argv[i + 1]); + i++; + continue; + } + + if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) { + reportDir = String(argv[i + 1]); + i++; + continue; + } + + if (!String(a).startsWith("-")) positional.push(a); + } + + if (maxPages === null) { + const cand = positional.find((x) => /^\d+$/.test(String(x))); + if (cand) { + const n = Number.parseInt(cand, 10); + if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000); + } + } + + return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir }; +} + +module.exports = { clampInt, parseArgs }; diff --git a/src/utils/async.js b/src/utils/async.js new file mode 100644 index 0000000..21c81ee --- /dev/null +++ b/src/utils/async.js @@ -0,0 +1,26 @@ +"use strict"; + +const { setTimeout: sleep } = require("timers/promises"); + +async function parallelMapStaggered(arr, concurrency, staggerMs, fn) { + const out = new Array(arr.length); + let next = 0; + + async function worker(workerId) { + if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1)); + while (true) { + const i = next++; + if (i >= arr.length) return; + if (staggerMs > 0 && i > 0) await sleep(staggerMs); + out[i] = await fn(arr[i], i); + } + } + + const w = Math.min(concurrency, arr.length); + const workers = []; + for (let i = 0; i < w; i++) workers.push(worker(i + 1)); + await Promise.all(workers); + return out; +} + +module.exports = { parallelMapStaggered }; diff --git a/src/utils/bytes.js b/src/utils/bytes.js new file mode 100644 index 0000000..9f9f5a0 --- /dev/null +++ b/src/utils/bytes.js @@ -0,0 +1,12 @@ +"use strict"; + +function humanBytes(n) { + if (!Number.isFinite(n) || n <= 0) return "0B"; + if (n < 1024) return `${n}B`; + const kb = n / 1024; + if (kb < 1024) return `${kb.toFixed(1)}KB`; + const mb = kb / 1024; + return `${mb.toFixed(1)}MB`; +} + +module.exports = { humanBytes }; diff --git a/src/utils/html.js b/src/utils/html.js new file mode 100644 index 0000000..7859291 --- /dev/null +++ b/src/utils/html.js @@ -0,0 +1,111 @@ +"use strict"; + +function stripTags(s) { + return String(s).replace(/<[^>]*>/g, ""); +} + +function cleanText(s) { + return String(s) + .replace(/<[^>]+>/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function decodeHtml(s) { + return String(s) + .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10))) + .replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16))) + .replace(/&/g, "&") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " ") + .replace(/«/g, "«") + .replace(/»/g, "»"); +} + +function escapeRe(s) { + return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function extractHtmlAttr(html, attrName) { + const re = new RegExp( + `\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`, + "i" + ); + const m = re.exec(html); + if (!m) return ""; + return m[1] ?? m[2] ?? m[3] ?? ""; +} + +function pickFirstUrlFromSrcset(srcset) { + const s = String(srcset || "").trim(); + if (!s) return ""; + const first = (s.split(",")[0] || "").trim(); + const url = (first.split(/\s+/)[0] || "").trim(); + return url.replace(/^["']|["']$/g, ""); +} + +function normalizeMaybeRelativeUrl(raw, baseUrl) { + const r = String(raw || "").trim(); + if (!r) return ""; + let u = r; + if (u.startsWith("//")) u = `https:${u}`; + try { + return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString(); + } catch { + return u; + } +} + +/** + * Best-effort thumbnail extractor for listing HTML blocks. + * Returns absolute URL when baseUrl is provided. + */ +function extractFirstImgUrl(html, baseUrl) { + const s = String(html || ""); + const m = s.match(/]*>/i); + if (!m) return ""; + + const tag = m[0]; + + const attrs = [ + "data-src", + "data-lazy-src", + "data-original", + "data-srcset", + "srcset", + "src", + ]; + + for (const a of attrs) { + let v = extractHtmlAttr(tag, a); + if (!v) continue; + + v = decodeHtml(String(v)).trim(); + if (!v) continue; + + if (a.toLowerCase().includes("srcset")) v = pickFirstUrlFromSrcset(v); + v = String(v || "").trim(); + if (!v) continue; + + // Skip data URIs + if (/^data:/i.test(v)) continue; + + const abs = normalizeMaybeRelativeUrl(v, baseUrl); + if (abs) return abs; + } + + return ""; +} + +module.exports = { + stripTags, + cleanText, + decodeHtml, + escapeRe, + extractHtmlAttr, + extractFirstImgUrl, +}; diff --git a/src/utils/price.js b/src/utils/price.js new file mode 100644 index 0000000..f95de71 --- /dev/null +++ b/src/utils/price.js @@ -0,0 +1,21 @@ +"use strict"; + +function normPrice(p) { + return String(p || "").trim().replace(/\s+/g, ""); +} + +function priceToNumber(p) { + const s = String(p || ""); + const n = Number(s.replace(/[^0-9.]/g, "")); + return Number.isFinite(n) ? n : NaN; +} + +function salePctOff(oldPriceStr, newPriceStr) { + const oldN = priceToNumber(oldPriceStr); + const newN = priceToNumber(newPriceStr); + if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null; + if (newN >= oldN) return null; + return Math.round(((oldN - newN) / oldN) * 100); +} + +module.exports = { normPrice, priceToNumber, salePctOff }; diff --git a/src/utils/sku.js b/src/utils/sku.js new file mode 100644 index 0000000..4ec0e43 --- /dev/null +++ b/src/utils/sku.js @@ -0,0 +1,9 @@ +"use strict"; + +// Alberta CSPC / product code is 6 digits. Some stores label it "SKU". +function normalizeCspc(v) { + const m = String(v ?? "").match(/\b(\d{6})\b/); + return m ? m[1] : ""; +} + +module.exports = { normalizeCspc }; diff --git a/src/utils/string.js b/src/utils/string.js new file mode 100644 index 0000000..20f9575 --- /dev/null +++ b/src/utils/string.js @@ -0,0 +1,29 @@ +"use strict"; + +function padRight(s, n) { + s = String(s); + return s.length >= n ? s : s + " ".repeat(n - s.length); +} + +function padLeft(s, n) { + s = String(s); + return s.length >= n ? s : " ".repeat(n - s.length) + s; +} + +function stripAnsi(s) { + return String(s).replace(/\x1b\[[0-9;]*m/g, ""); +} + +function padRightV(s, n) { + s = String(s); + const w = stripAnsi(s).length; + return w >= n ? s : s + " ".repeat(n - w); +} + +function padLeftV(s, n) { + s = String(s); + const w = stripAnsi(s).length; + return w >= n ? s : " ".repeat(n - w) + s; +} + +module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV }; diff --git a/src/utils/text.js b/src/utils/text.js new file mode 100644 index 0000000..279ebcc --- /dev/null +++ b/src/utils/text.js @@ -0,0 +1,13 @@ +"use strict"; + +const { cleanText, decodeHtml } = require("./html"); + +function sanitizeName(s) { + return cleanText(decodeHtml(String(s || ""))) + .replace(/['"’“”`´]/g, "") + .replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "") + .replace(/\s+/g, " ") + .trim(); +} + +module.exports = { sanitizeName }; diff --git a/src/utils/time.js b/src/utils/time.js new file mode 100644 index 0000000..ca59059 --- /dev/null +++ b/src/utils/time.js @@ -0,0 +1,16 @@ +"use strict"; + +function ts(d = new Date()) { + const h = String(d.getHours()).padStart(2, "0"); + const m = String(d.getMinutes()).padStart(2, "0"); + const s = String(d.getSeconds()).padStart(2, "0"); + const ms = String(d.getMilliseconds()).padStart(3, "0"); + return `${h}:${m}:${s}.${ms}`; +} + +function isoTimestampFileSafe(d = new Date()) { + // 2026-01-16T21-27-01Z + return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z"); +} + +module.exports = { ts, isoTimestampFileSafe }; diff --git a/src/utils/url.js b/src/utils/url.js new file mode 100644 index 0000000..0b00545 --- /dev/null +++ b/src/utils/url.js @@ -0,0 +1,50 @@ +"use strict"; + +function normalizeBaseUrl(startUrl) { + try { + const u = new URL(startUrl); + u.hash = ""; + if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page"); + u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; + + if (!u.pathname.endsWith("/")) u.pathname += "/"; + u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/"); + return u.toString(); + } catch { + return startUrl; + } +} + +function makePageUrl(baseUrl, pageNum) { + if (pageNum <= 1) return normalizeBaseUrl(baseUrl); + const u = new URL(baseUrl); + if (!u.pathname.endsWith("/")) u.pathname += "/"; + u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/"); + u.pathname = u.pathname + `page/${pageNum}/`; + u.hash = ""; + return u.toString(); +} + +function makePageUrlForCtx(ctx, baseUrl, pageNum) { + const fn = ctx?.store?.makePageUrl; + return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum); +} + +function makePageUrlQueryParam(baseUrl, paramName, pageNum) { + const u = new URL(normalizeBaseUrl(baseUrl)); + u.hash = ""; + if (pageNum <= 1) u.searchParams.set(paramName, "1"); + else u.searchParams.set(paramName, String(pageNum)); + u.search = `?${u.searchParams.toString()}`; + return u.toString(); +} + +function makePageUrlShopifyQueryPage(baseUrl, pageNum) { + const u = new URL(normalizeBaseUrl(baseUrl)); + u.hash = ""; + u.searchParams.set("page", String(Math.max(1, pageNum))); + u.search = `?${u.searchParams.toString()}`; + return u.toString(); +} + +module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage }; diff --git a/src/utils/woocommerce.js b/src/utils/woocommerce.js new file mode 100644 index 0000000..75f680a --- /dev/null +++ b/src/utils/woocommerce.js @@ -0,0 +1,57 @@ +"use strict"; + +const { decodeHtml, stripTags, cleanText } = require("./html"); + +/** + * Extracts the *effective* price from Woo price blocks. + * - If sale exists, uses the last (sale price) + * - Else uses the normal price bdi/span content. + */ +function extractPriceFromTmbBlock(block) { + const span = matchFirstPriceSpan(block); + if (!span) return ""; + + const insMatches = [...span.matchAll(/]*>([\s\S]*?)<\/ins>/gi)]; + const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span; + + const bdis = [...scope.matchAll(/]*>([\s\S]*?)<\/bdi>/gi)]; + if (bdis.length) { + const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, ""); + if (raw) return raw.startsWith("$") ? raw : `$${raw}`; + } + + const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i); + const text = cleanText(decodeHtml(stripTags(scope))); + const num = text.match(/(\d+(?:\.\d{2})?)/); + if (sym && num) return `${sym[1].trim()}${num[1]}`; + + const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/); + return m ? m[0].replace(/\s+/g, "") : ""; +} + +function matchFirstPriceSpan(html) { + const re = /]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i; + const m = re.exec(html); + if (!m) return ""; + const start = m.index + m[0].length; + + let i = start; + let depth = 1; + while (i < html.length) { + const nextOpen = html.indexOf("", i); + if (nextClose === -1) break; + + if (nextOpen !== -1 && nextOpen < nextClose) { + depth++; + i = nextOpen + 5; + continue; + } + depth--; + if (depth === 0) return html.slice(start, nextClose); + i = nextClose + 7; + } + return ""; +} + +module.exports = { extractPriceFromTmbBlock }; diff --git a/tools/build_viz_commits.js b/tools/build_viz_commits.js new file mode 100755 index 0000000..5f92a87 --- /dev/null +++ b/tools/build_viz_commits.js @@ -0,0 +1,93 @@ +#!/usr/bin/env node +"use strict"; + +const { execFileSync } = require("child_process"); +const fs = require("fs"); +const path = require("path"); + +function runGit(args) { + return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); +} + +function listDbFiles(dbDir) { + try { + return fs + .readdirSync(dbDir, { withFileTypes: true }) + .filter((e) => e.isFile() && e.name.endsWith(".json")) + .map((e) => path.join(dbDir, e.name)); + } catch { + return []; + } +} + +function dateOnly(iso) { + const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); + return m ? m[1] : ""; +} + +function main() { + const repoRoot = process.cwd(); + const dbDir = path.join(repoRoot, "data", "db"); + const outDir = path.join(repoRoot, "viz", "data"); + const outFile = path.join(outDir, "db_commits.json"); + + fs.mkdirSync(outDir, { recursive: true }); + + const files = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs))); + + const payload = { + generatedAt: new Date().toISOString(), + branch: "data", + files: {}, + }; + + // We want the viz to show ONE point per day (the most recent run that day). + // So we collapse multiple commits per day down to the newest commit for that date. + // + // With multiple runs/day, we also want to keep a long-ish daily history. + // Raw commits per day could be ~4, so grab a larger raw window and then collapse. + const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day + const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing + + for (const rel of files.sort()) { + let txt = ""; + try { + // %H = sha, %cI = committer date strict ISO 8601 (includes time + tz) + txt = runGit(["log", "--format=%H %cI", `-${MAX_RAW_PER_FILE}`, "--", rel]); + } catch { + continue; + } + + const lines = txt.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + + // git log is newest -> oldest. + // Keep the FIRST commit we see for each date (that is the most recent commit for that date). + const byDate = new Map(); // date -> { sha, date, ts } + for (const line of lines) { + const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i); + if (!m) continue; + + const sha = m[1]; + const ts = m[2]; + const d = dateOnly(ts); + if (!d) continue; + + if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts }); + } + + // Convert to oldest -> newest + let arr = [...byDate.values()].reverse(); + + // Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest) + if (arr.length > MAX_DAYS_PER_FILE) { + arr = arr.slice(arr.length - MAX_DAYS_PER_FILE); + } + + payload.files[rel] = arr; + } + + fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8"); + process.stdout.write(`Wrote ${outFile} (${Object.keys(payload.files).length} files)\n`); +} + +main(); diff --git a/tools/build_viz_index.js b/tools/build_viz_index.js new file mode 100755 index 0000000..cb96fb2 --- /dev/null +++ b/tools/build_viz_index.js @@ -0,0 +1,105 @@ +#!/usr/bin/env node +"use strict"; + +const fs = require("fs"); +const path = require("path"); + +function ensureDir(dir) { + fs.mkdirSync(dir, { recursive: true }); +} + +function listJsonFiles(dir) { + const out = []; + try { + for (const ent of fs.readdirSync(dir, { withFileTypes: true })) { + if (!ent.isFile()) continue; + if (!String(ent.name || "").endsWith(".json")) continue; + out.push(path.join(dir, ent.name)); + } + } catch { + // ignore + } + return out; +} + +function readJson(file) { + try { + return JSON.parse(fs.readFileSync(file, "utf8")); + } catch { + return null; + } +} + +function main() { + const repoRoot = path.resolve(__dirname, ".."); + const dbDir = path.join(repoRoot, "data", "db"); + const outDir = path.join(repoRoot, "viz", "data"); + const outFile = path.join(outDir, "index.json"); + + ensureDir(outDir); + + const items = []; + + for (const file of listJsonFiles(dbDir)) { + const obj = readJson(file); + if (!obj) continue; + + const store = String(obj.store || ""); + const storeLabel = String(obj.storeLabel || store || ""); + const category = String(obj.category || ""); + const categoryLabel = String(obj.categoryLabel || ""); + const source = String(obj.source || ""); + const updatedAt = String(obj.updatedAt || ""); + + const dbFile = path + .relative(repoRoot, file) + .replace(/\\/g, "/"); // for GitHub raw paths on Windows too + + const arr = Array.isArray(obj.items) ? obj.items : []; + for (const it of arr) { + if (!it || it.removed) continue; + + const sku = String(it.sku || "").trim(); + const name = String(it.name || "").trim(); + const price = String(it.price || "").trim(); + const url = String(it.url || "").trim(); + const img = String(it.img || it.image || it.thumb || "").trim(); + + items.push({ + sku, + name, + price, + url, + img, + store, + storeLabel, + category, + categoryLabel, + source, + updatedAt, + dbFile, + }); + } + } + + items.sort((a, b) => { + const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`; + const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`; + return ak.localeCompare(bk); + }); + + const outObj = { + generatedAt: new Date().toISOString(), + count: items.length, + items, + }; + + fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8"); + process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`); +} + +module.exports = { main }; + +if (require.main === module) { + main(); +} diff --git a/tools/build_viz_recent.js b/tools/build_viz_recent.js new file mode 100755 index 0000000..457a173 --- /dev/null +++ b/tools/build_viz_recent.js @@ -0,0 +1,393 @@ +#!/usr/bin/env node +"use strict"; + +const { execFileSync } = require("child_process"); +const fs = require("fs"); +const path = require("path"); + +function runGit(args) { + return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); +} + +function gitShowJson(sha, filePath) { + try { + const txt = execFileSync("git", ["show", `${sha}:${filePath}`], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam + }); + return JSON.parse(txt); + } catch { + return null; + } +} + + +function gitListTreeFiles(sha, dirRel) { + try { + const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]); + return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + } catch { + return []; + } +} + +function readJsonFileOrNull(filePath) { + try { + return JSON.parse(fs.readFileSync(filePath, "utf8")); + } catch { + return null; + } +} + +function normalizeCspc(v) { + const m = String(v ?? "").match(/\b(\d{6})\b/); + return m ? m[1] : ""; +} + +function normPriceStr(p) { + return String(p ?? "").trim(); +} + +function priceToNumber(v) { + const s = String(v ?? "").replace(/[^0-9.]/g, ""); + const n = Number(s); + return Number.isFinite(n) ? n : null; +} + +function dateOnly(iso) { + const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); + return m ? m[1] : ""; +} + +function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) { + const m = new Map(); + const items = Array.isArray(obj?.items) ? obj.items : []; + for (const it of items) { + if (!it) continue; + const sku = normalizeCspc(it.sku); + if (!sku) continue; + const removed = Boolean(it.removed); + if (!includeRemoved && removed) continue; + m.set(sku, { + sku, + name: String(it.name || ""), + price: String(it.price || ""), + url: String(it.url || ""), + removed, + }); + } + return m; +} + +function diffDb(prevObj, nextObj) { + const prevAll = mapBySku(prevObj, { includeRemoved: true }); + const nextAll = mapBySku(nextObj, { includeRemoved: true }); + + const prevLive = mapBySku(prevObj, { includeRemoved: false }); + const nextLive = mapBySku(nextObj, { includeRemoved: false }); + + const newItems = []; + const restoredItems = []; + const removedItems = []; + const priceChanges = []; + + // NEW + RESTORED + for (const [sku, now] of nextLive.entries()) { + const had = prevAll.get(sku); + if (!had) { + newItems.push({ ...now }); + continue; + } + if (had.removed) { + restoredItems.push({ ...now }); + continue; + } + } + + // REMOVED + for (const [sku, was] of prevLive.entries()) { + const nxt = nextAll.get(sku); + if (!nxt || nxt.removed) { + removedItems.push({ ...was }); + } + } + + // PRICE CHANGES + for (const [sku, now] of nextLive.entries()) { + const was = prevLive.get(sku); + if (!was) continue; + + const a = normPriceStr(was.price); + const b = normPriceStr(now.price); + if (a === b) continue; + + const aN = priceToNumber(a); + const bN = priceToNumber(b); + + let kind = "price_change"; + if (aN !== null && bN !== null) { + if (bN < aN) kind = "price_down"; + else if (bN > aN) kind = "price_up"; + else kind = "price_change"; + } + + priceChanges.push({ + kind, + sku, + name: now.name || was.name || "", + oldPrice: a, + newPrice: b, + url: now.url || was.url || "", + }); + } + + return { newItems, restoredItems, removedItems, priceChanges }; +} + +function getHeadShaOrEmpty() { + try { + return runGit(["rev-parse", "--verify", "HEAD"]); + } catch { + return ""; + } +} + +function firstParentSha(sha) { + try { + const out = runGit(["rev-list", "--parents", "-n", "1", sha]); + const parts = out.split(/\s+/).filter(Boolean); + // parts[0] is sha, parts[1] is first parent (if any) + return parts.length >= 2 ? parts[1] : ""; + } catch { + return ""; + } +} + +function listChangedDbFiles(fromSha, toSha) { + // toSha can be "WORKTREE" + if (!fromSha && toSha && toSha !== "WORKTREE") { + return gitListTreeFiles(toSha, "data/db"); + } + + if (!fromSha && toSha === "WORKTREE") { + // Fall back: list files on disk + try { + return fs + .readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true }) + .filter((e) => e.isFile() && e.name.endsWith(".json")) + .map((e) => path.posix.join("data/db", e.name)); + } catch { + return []; + } + } + + try { + if (toSha === "WORKTREE") { + const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]); + return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + } + const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]); + return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + } catch { + return []; + } +} + +function logDbCommitsSince(sinceIso) { + try { + const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]); + const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + const arr = []; + for (const line of lines) { + const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i); + if (!m) continue; + const sha = m[1]; + const ts = m[2]; + const d = dateOnly(ts); + arr.push({ sha, ts, date: d }); + } + // newest -> oldest from git; convert to oldest -> newest + arr.reverse(); + return arr; + } catch { + return []; + } +} + +function main() { + const repoRoot = process.cwd(); + const outDir = path.join(repoRoot, "viz", "data"); + const outFile = path.join(outDir, "recent.json"); + fs.mkdirSync(outDir, { recursive: true }); + + const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 3)); + const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 500)); + + const now = new Date(); + const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000); + const sinceIso = since.toISOString(); + + const headSha = getHeadShaOrEmpty(); + const items = []; + + // Collect committed runs in the last N days (touching data/db) + const commits = headSha ? logDbCommitsSince(sinceIso) : []; + + // Build diff pairs: + // parent(of first in window) -> first + // then each consecutive commit -> next + // then HEAD -> WORKTREE (so this run shows up before the commit exists) + const pairs = []; + + if (commits.length) { + const first = commits[0]; + const parent = firstParentSha(first.sha); + pairs.push({ + fromSha: parent || "", + toSha: first.sha, + ts: first.ts, + date: first.date, + }); + + for (let i = 1; i < commits.length; i++) { + pairs.push({ + fromSha: commits[i - 1].sha, + toSha: commits[i].sha, + ts: commits[i].ts, + date: commits[i].date, + }); + } + } + + if (headSha) { + pairs.push({ + fromSha: headSha, + toSha: "WORKTREE", + ts: now.toISOString(), + date: dateOnly(now.toISOString()), + }); + } + + for (const p of pairs) { + const fromSha = p.fromSha; + const toSha = p.toSha; + const ts = p.ts; + const d = p.date; + + const files = listChangedDbFiles(fromSha, toSha); + if (!files.length) continue; + + for (const file of files) { + let prevObj = null; + let nextObj = null; + + if (toSha === "WORKTREE") { + prevObj = fromSha ? gitShowJson(fromSha, file) : null; + nextObj = readJsonFileOrNull(path.join(repoRoot, file)); + } else { + prevObj = fromSha ? gitShowJson(fromSha, file) : null; + nextObj = gitShowJson(toSha, file); + } + + if (!prevObj && !nextObj) continue; + + const storeLabel = String( + nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "" + ); + const categoryLabel = String( + nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "" + ); + + const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj); + + for (const it of newItems) { + items.push({ + ts, + date: d, + fromSha: fromSha || "", + toSha, + kind: "new", + sku: it.sku, + name: it.name, + storeLabel, + categoryLabel, + price: normPriceStr(it.price), + url: it.url, + dbFile: file, + }); + } + + for (const it of restoredItems) { + items.push({ + ts, + date: d, + fromSha: fromSha || "", + toSha, + kind: "restored", + sku: it.sku, + name: it.name, + storeLabel, + categoryLabel, + price: normPriceStr(it.price), + url: it.url, + dbFile: file, + }); + } + + for (const it of removedItems) { + items.push({ + ts, + date: d, + fromSha: fromSha || "", + toSha, + kind: "removed", + sku: it.sku, + name: it.name, + storeLabel, + categoryLabel, + price: normPriceStr(it.price), + url: it.url, + dbFile: file, + }); + } + + for (const u of priceChanges) { + items.push({ + ts, + date: d, + fromSha: fromSha || "", + toSha, + kind: u.kind, + sku: u.sku, + name: u.name, + storeLabel, + categoryLabel, + oldPrice: normPriceStr(u.oldPrice), + newPrice: normPriceStr(u.newPrice), + url: u.url, + dbFile: file, + }); + } + } + } + + // Newest first + items.sort((a, b) => String(b.ts).localeCompare(String(a.ts))); + + // Keep file size under control (but still allows multiple runs/day over the window) + const trimmed = items.slice(0, maxItems); + + const payload = { + generatedAt: now.toISOString(), + windowDays, + since: sinceIso, + headSha, + count: trimmed.length, + items: trimmed, + }; + + fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8"); + process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`); +} + +main(); diff --git a/tools/diff_report.js b/tools/diff_report.js new file mode 100755 index 0000000..a6ef8f0 --- /dev/null +++ b/tools/diff_report.js @@ -0,0 +1,318 @@ +#!/usr/bin/env node +"use strict"; + +const { execFileSync } = require("child_process"); +const fs = require("fs"); +const path = require("path"); + +const { C, color } = require("../src/utils/ansi"); +const { padLeft, padRight } = require("../src/utils/string"); +const { normalizeCspc } = require("../src/utils/sku"); +const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price"); +const { isoTimestampFileSafe } = require("../src/utils/time"); + +function runGit(args) { + return execFileSync("git", args, { encoding: "utf8" }).trimEnd(); +} + +function gitShowText(sha, filePath) { + try { + return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" }); + } catch { + return null; + } +} + +function gitListDbFiles(sha, dbDirRel) { + const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]); + const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + return new Set(lines); +} + +function parseJsonOrNull(txt) { + if (txt == null) return null; + try { + return JSON.parse(txt); + } catch { + return null; + } +} + +function mapItemsByUrl(obj) { + const m = new Map(); + const items = Array.isArray(obj?.items) ? obj.items : []; + for (const it of items) { + if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue; + m.set(it.url, { + name: String(it.name || ""), + price: String(it.price || ""), + sku: String(it.sku || ""), + url: it.url, + removed: Boolean(it.removed), + }); + } + return m; +} + +function buildDiffForDb(prevObj, nextObj) { + const prev = mapItemsByUrl(prevObj); + const next = mapItemsByUrl(nextObj); + + const urls = new Set([...prev.keys(), ...next.keys()]); + + const newItems = []; + const restoredItems = []; + const removedItems = []; + const updatedItems = []; + + for (const url of urls) { + const a = prev.get(url); + const b = next.get(url); + + const aExists = Boolean(a); + const bExists = Boolean(b); + + const aRemoved = Boolean(a?.removed); + const bRemoved = Boolean(b?.removed); + + if (!aExists && bExists && !bRemoved) { + newItems.push({ ...b }); + continue; + } + + if (aExists && aRemoved && bExists && !bRemoved) { + restoredItems.push({ ...b }); + continue; + } + + if (aExists && !aRemoved && (!bExists || bRemoved)) { + removedItems.push({ ...a }); + continue; + } + + if (aExists && bExists && !aRemoved && !bRemoved) { + const aP = normPrice(a.price); + const bP = normPrice(b.price); + if (aP !== bP) { + updatedItems.push({ + name: b.name || a.name || "", + sku: normalizeCspc(b.sku || a.sku || ""), + oldPrice: a.price || "", + newPrice: b.price || "", + url, + }); + } + } + } + + return { newItems, restoredItems, removedItems, updatedItems }; +} + +function parseArgs(argv) { + const flags = new Set(); + const kv = new Map(); + const positional = []; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (!a.startsWith("-")) { + positional.push(a); + continue; + } + if (a === "--no-color") { + flags.add("no-color"); + continue; + } + if (a === "--color") { + flags.add("color"); + continue; + } + if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) { + kv.set(a, argv[i + 1]); + i++; + continue; + } + flags.add(a); + } + + const fromSha = positional[0] || ""; + const toSha = positional[1] || ""; + const dbDir = kv.get("--db-dir") || "data/db"; + const outFile = kv.get("--out") || ""; + + return { fromSha, toSha, dbDir, outFile, flags }; +} + +function renderDiffReport(diffReport, { fromSha, toSha, colorize }) { + const paint = (s, code) => color(s, code, colorize); + + let out = ""; + const ln = (s = "") => { + out += String(s) + "\n"; + }; + + ln(paint("========== DIFF REPORT ==========", C.bold)); + ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`); + ln( + `${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}` + ); + ln(""); + + const rows = diffReport.categories; + const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12)); + + ln(paint("Per-category summary:", C.bold)); + ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`); + ln(`${"-".repeat(catW)} ---- ---- ---- ----`); + for (const r of rows) { + ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`); + } + ln(""); + + const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length)); + + const skuInline = (sku) => { + const s = normalizeCspc(sku); + return s ? paint(` ${s}`, C.gray) : ""; + }; + + if (diffReport.newItems.length) { + ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green)); + for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } + + if (diffReport.restoredItems.length) { + ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green)); + for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } + + if (diffReport.removedItems.length) { + ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow)); + for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); + ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`); + ln(` ${paint(it.url, C.dim)}`); + } + ln(""); + } + + if (diffReport.updatedItems.length) { + ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan)); + + for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { + const oldRaw = u.oldPrice || ""; + const newRaw = u.newPrice || ""; + + const oldN = priceToNumber(oldRaw); + const newN = priceToNumber(newRaw); + + const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray); + + let newP = newRaw ? newRaw : "(no price)"; + let offTag = ""; + + if (Number.isFinite(oldN) && Number.isFinite(newN)) { + if (newN > oldN) newP = paint(newP, C.red); + else if (newN < oldN) { + newP = paint(newP, C.green); + const pct = salePctOff(oldRaw, newRaw); + if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green); + } else newP = paint(newP, C.cyan); + } else newP = paint(newP, C.cyan); + + ln( + `${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}` + ); + ln(` ${paint(u.url, C.dim)}`); + } + + ln(""); + } + + ln(paint("======== END DIFF REPORT ========", C.bold)); + + return out; +} + +async function main() { + const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2)); + + if (!fromSha || !toSha) { + console.error(`Usage: ${path.basename(process.argv[1])} [--db-dir data/db] [--out reports/.txt] [--no-color]`); + process.exitCode = 2; + return; + } + + // If user provides short SHAs, git accepts them. + const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY); + + const filesA = gitListDbFiles(fromSha, dbDir); + const filesB = gitListDbFiles(toSha, dbDir); + const files = new Set([...filesA, ...filesB]); + + const diffReport = { + categories: [], + totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 }, + newItems: [], + restoredItems: [], + removedItems: [], + updatedItems: [], + }; + + for (const file of [...files].sort()) { + const prevObj = parseJsonOrNull(gitShowText(fromSha, file)); + const nextObj = parseJsonOrNull(gitShowText(toSha, file)); + + const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?"); + const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file)); + const catLabelFull = `${storeLabel} | ${catLabel}`; + + const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj); + + diffReport.categories.push({ + catLabel: catLabelFull, + newCount: newItems.length, + restoredCount: restoredItems.length, + removedCount: removedItems.length, + updatedCount: updatedItems.length, + }); + + diffReport.totals.newCount += newItems.length; + diffReport.totals.restoredCount += restoredItems.length; + diffReport.totals.removedCount += removedItems.length; + diffReport.totals.updatedCount += updatedItems.length; + + for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it }); + for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it }); + for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it }); + for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u }); + } + + const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize }); + process.stdout.write(reportText); + + const outPath = outFile + ? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile)) + : ""; + + if (outPath) { + fs.mkdirSync(path.dirname(outPath), { recursive: true }); + fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8"); + } +} + +main().catch((e) => { + const msg = e && e.stack ? e.stack : String(e); + console.error(msg); + process.exitCode = 1; +}); diff --git a/tracker.js b/tracker.js new file mode 100755 index 0000000..8b7bb90 --- /dev/null +++ b/tracker.js @@ -0,0 +1,10 @@ +#!/usr/bin/env node +"use strict"; + +const { main } = require("./src/main"); + +main().catch((e) => { + const msg = e && e.stack ? e.stack : String(e); + console.error(msg); + process.exitCode = 1; +}); diff --git a/viz/app.js b/viz/app.js new file mode 100644 index 0000000..5b962ba --- /dev/null +++ b/viz/app.js @@ -0,0 +1,841 @@ +"use strict"; + +/** + * Hash routes: + * #/ search + * #/item/ detail + */ + +const $app = document.getElementById("app"); + +function esc(s) { + return String(s ?? "").replace( + /[&<>"']/g, + (c) => + ({ + "&": "&", + "<": "<", + ">": ">", + '"': """, + "'": "'", + }[c]) + ); +} + +function parsePriceToNumber(v) { + const s = String(v ?? "").replace(/[^0-9.]/g, ""); + const n = Number(s); + return Number.isFinite(n) ? n : null; +} + +function dateOnly(iso) { + const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); + return m ? m[1] : ""; +} + +function prettyTs(iso) { + const s = String(iso || ""); + if (!s) return ""; + return s.replace("T", " "); +} + +function makeUnknownSku(r) { + const store = String(r?.storeLabel || r?.store || "store").toLowerCase().replace(/[^a-z0-9]+/g, "-"); + const url = String(r?.url || ""); + const h = url ? btoa(unescape(encodeURIComponent(url))).replace(/=+$/g, "").slice(0, 16) : "no-url"; + return `unknown:${store}:${h}`; +} +function fnv1a32(str) { + let h = 0x811c9dc5; // offset basis + for (let i = 0; i < str.length; i++) { + h ^= str.charCodeAt(i); + h = Math.imul(h, 0x01000193); // FNV prime + } + // unsigned -> 8 hex chars + return (h >>> 0).toString(16).padStart(8, "0"); +} + +function makeSyntheticSku(r) { + const store = String(r?.storeLabel || r?.store || "store"); + const url = String(r?.url || ""); + const key = `${store}|${url}`; + return `u:${fnv1a32(key)}`; // stable per store+url +} + +function keySkuForRow(r) { + const real = String(r?.sku || "").trim(); + return real ? real : makeSyntheticSku(r); +} + +function displaySku(key) { + return String(key || "").startsWith("u:") ? "unknown" : String(key || ""); +} + + + +// Normalize for search: lowercase, punctuation -> space, collapse spaces +function normSearchText(s) { + return String(s ?? "") + .toLowerCase() + .replace(/[^a-z0-9]+/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function tokenizeQuery(q) { + const n = normSearchText(q); + return n ? n.split(" ").filter(Boolean) : []; +} + +function inferGithubOwnerRepo() { + const host = location.hostname || ""; + const m = host.match(/^([a-z0-9-]+)\.github\.io$/i); + if (m) { + const owner = m[1]; + const parts = (location.pathname || "/").split("/").filter(Boolean); + const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`; + return { owner, repo }; + } + return { owner: "brennanwilkes", repo: "spirit-tracker" }; +} + +async function fetchJson(url) { + const res = await fetch(url, { cache: "no-store" }); + if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); + return await res.json(); +} + +async function fetchText(url) { + const res = await fetch(url, { cache: "no-store" }); + if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); + return await res.text(); +} + +function route() { + const h = location.hash || "#/"; + const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean); + if (parts.length === 0) return renderSearch(); + if (parts[0] === "item" && parts[1]) return renderItem(parts[1]); + return renderSearch(); +} + +/* ---------------- Search ---------------- */ + +let INDEX = null; +let RECENT = null; + +// persist search box value across navigation +const Q_LS_KEY = "stviz:v1:search:q"; +function loadSavedQuery() { + try { + return localStorage.getItem(Q_LS_KEY) || ""; + } catch { + return ""; + } +} +function saveQuery(v) { + try { + localStorage.setItem(Q_LS_KEY, String(v ?? "")); + } catch {} +} + +async function loadIndex() { + if (INDEX) return INDEX; + INDEX = await fetchJson("./data/index.json"); + return INDEX; +} + +async function loadRecent() { + if (RECENT) return RECENT; + try { + RECENT = await fetchJson("./data/recent.json"); + } catch { + RECENT = { count: 0, items: [] }; + } + return RECENT; +} + +function normImg(s) { + const v = String(s || "").trim(); + if (!v) return ""; + if (/^data:/i.test(v)) return ""; + return v; +} + +// Build one row per SKU + combined searchable text across all listings of that SKU +function aggregateBySku(listings) { + const bySku = new Map(); + + for (const r of listings) { + + const sku = keySkuForRow(r); + + const name = String(r?.name || ""); + const url = String(r?.url || ""); + const storeLabel = String(r?.storeLabel || r?.store || ""); + + const img = normImg(r?.img || r?.image || r?.thumb || ""); + + const pNum = parsePriceToNumber(r?.price); + const pStr = String(r?.price || ""); + + let agg = bySku.get(sku); + if (!agg) { + agg = { + sku, + name: name || "", + img: "", + cheapestPriceStr: pStr || "", + cheapestPriceNum: pNum, + cheapestStoreLabel: storeLabel || "", + stores: new Set(), + sampleUrl: url || "", + _searchParts: [], + searchText: "", // normalized blob + + _imgByName: new Map(), // name -> img + _imgAny: "", + }; + bySku.set(sku, agg); + } + + if (storeLabel) agg.stores.add(storeLabel); + if (!agg.sampleUrl && url) agg.sampleUrl = url; + + // Keep the first non-empty name (existing behavior), but make sure img matches that chosen name + if (!agg.name && name) { + agg.name = name; + if (img) agg.img = img; + } else if (agg.name && name === agg.name && img && !agg.img) { + agg.img = img; + } + + if (img) { + if (!agg._imgAny) agg._imgAny = img; + if (name) agg._imgByName.set(name, img); + } + + // cheapest + if (pNum !== null) { + if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) { + agg.cheapestPriceNum = pNum; + agg.cheapestPriceStr = pStr || ""; + agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel; + } + } + + // search parts (include everything we might want to match) + agg._searchParts.push(sku); + if (name) agg._searchParts.push(name); + if (url) agg._searchParts.push(url); + if (storeLabel) agg._searchParts.push(storeLabel); + } + + const out = [...bySku.values()]; + + for (const it of out) { + // Ensure thumbnail matches chosen name when possible + if (!it.img) { + const m = it._imgByName; + if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || ""; + else it.img = it._imgAny || ""; + } + + delete it._imgByName; + delete it._imgAny; + + // Ensure at least these are in the blob even if index rows are already aggregated + it._searchParts.push(it.sku); + it._searchParts.push(it.name || ""); + it._searchParts.push(it.sampleUrl || ""); + it._searchParts.push(it.cheapestStoreLabel || ""); + + it.searchText = normSearchText(it._searchParts.join(" | ")); + delete it._searchParts; + } + + out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku)); + return out; +} + +function matchesAllTokens(hayNorm, tokens) { + if (!tokens.length) return true; + for (const t of tokens) { + if (!hayNorm.includes(t)) return false; + } + return true; +} + +function renderThumbHtml(imgUrl, cls = "thumb") { + const img = normImg(imgUrl); + if (!img) return `
`; + return ``; +} + +function renderSearch() { + $app.innerHTML = ` +
+
+

Spirit Tracker Viz

+
Search name / url / sku (word AND)
+
+ +
+ +
+
+
+ `; + + const $q = document.getElementById("q"); + const $results = document.getElementById("results"); + + $q.value = loadSavedQuery(); + + let aggBySku = new Map(); + + function renderAggregates(items) { + if (!items.length) { + $results.innerHTML = `
No matches.
`; + return; + } + + const limited = items.slice(0, 80); + $results.innerHTML = limited + .map((it) => { + const storeCount = it.stores.size || 0; + const plus = storeCount > 1 ? ` +${storeCount - 1}` : ""; + const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)"; + const store = it.cheapestStoreLabel || ([...it.stores][0] || "Store"); + + return ` +
+
+
+ ${renderThumbHtml(it.img)} +
+
+
+
${esc(it.name || "(no name)")}
+ ${esc(displaySku(it.sku))} +
+
+ ${esc(price)} + ${esc(store)}${esc(plus)} +
+
+ ${esc(it.sampleUrl || "")} +
+
+
+
+ `; + }) + .join(""); + + for (const el of Array.from($results.querySelectorAll(".item"))) { + el.addEventListener("click", () => { + const sku = el.getAttribute("data-sku") || ""; + if (!sku) return; + saveQuery($q.value); + location.hash = `#/item/${encodeURIComponent(sku)}`; + }); + } + } + + function renderRecent(recent) { + const items = Array.isArray(recent?.items) ? recent.items : []; + if (!items.length) { + $results.innerHTML = `
Type to search…
`; + return; + } + + const days = Number.isFinite(Number(recent?.windowDays)) ? Number(recent.windowDays) : 3; + const limited = items.slice(0, 140); + + $results.innerHTML = + `
Recently changed (last ${esc(days)} day(s)):
` + + limited + .map((r) => { + const kind = + r.kind === "new" + ? "NEW" + : r.kind === "restored" + ? "RESTORED" + : r.kind === "removed" + ? "REMOVED" + : r.kind === "price_down" + ? "PRICE ↓" + : r.kind === "price_up" + ? "PRICE ↑" + : r.kind === "price_change" + ? "PRICE" + : "CHANGE"; + + const priceLine = + r.kind === "new" || r.kind === "restored" || r.kind === "removed" + ? `${esc(r.price || "")}` + : `${esc(r.oldPrice || "")} → ${esc(r.newPrice || "")}`; + + const when = r.ts ? prettyTs(r.ts) : r.date || ""; + + const sku = String(r.sku || ""); + const img = aggBySku.get(sku)?.img || ""; + + return ` +
+
+
+ ${renderThumbHtml(img)} +
+
+
+
${esc(r.name || "(no name)")}
+ ${esc(displaySku(it.sku))} +
+
+ ${esc(kind)} + ${esc(r.storeLabel || "")} + ${esc(priceLine)} +
+
+ ${esc(when)} +
+
+ ${esc(r.url || "")} +
+
+
+
+ `; + }) + .join(""); + + for (const el of Array.from($results.querySelectorAll(".item"))) { + el.addEventListener("click", () => { + const sku = el.getAttribute("data-sku") || ""; + if (!sku) return; + saveQuery($q.value); + location.hash = `#/item/${encodeURIComponent(sku)}`; + }); + } + } + + let allAgg = []; + let indexReady = false; + + function applySearch() { + if (!indexReady) return; + + const tokens = tokenizeQuery($q.value); + if (!tokens.length) { + loadRecent() + .then(renderRecent) + .catch(() => { + $results.innerHTML = `
Type to search…
`; + }); + return; + } + + const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens)); + renderAggregates(matches); + } + + $results.innerHTML = `
Loading index…
`; + + loadIndex() + .then((idx) => { + const listings = Array.isArray(idx.items) ? idx.items : []; + allAgg = aggregateBySku(listings); + aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x])); + indexReady = true; + $q.focus(); + applySearch(); + return loadRecent(); + }) + .then((recent) => { + if (!tokenizeQuery($q.value).length) renderRecent(recent); + }) + .catch((e) => { + $results.innerHTML = `
Failed to load: ${esc(e.message)}
`; + }); + + let t = null; + $q.addEventListener("input", () => { + saveQuery($q.value); + + if (t) clearTimeout(t); + t = setTimeout(applySearch, 50); + }); +} + +/* ---------------- Detail (chart) ---------------- */ + +let CHART = null; + +function destroyChart() { + if (CHART) { + CHART.destroy(); + CHART = null; + } +} + +async function githubListCommits({ owner, repo, branch, path }) { + const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`; + const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`; + const page1 = await fetchJson(u1); + + if (Array.isArray(page1) && page1.length === 100) { + const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`; + const page2 = await fetchJson(u2); + return [...page1, ...(Array.isArray(page2) ? page2 : [])]; + } + + return Array.isArray(page1) ? page1 : []; +} + +async function githubFetchFileAtSha({ owner, repo, sha, path }) { + const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent( + sha + )}/${path}`; + const txt = await fetchText(raw); + return JSON.parse(txt); +} + +function findItemBySkuInDb(obj, sku) { + const items = Array.isArray(obj?.items) ? obj.items : []; + for (const it of items) { + if (!it || it.removed) continue; + const s = String(it.sku || ""); + if (s === sku) return it; + } + return null; +} + +function computeSuggestedY(values) { + const nums = values.filter((v) => Number.isFinite(v)); + if (!nums.length) return { suggestedMin: undefined, suggestedMax: undefined }; + let min = nums[0], + max = nums[0]; + for (const n of nums) { + if (n < min) min = n; + if (n > max) max = n; + } + if (min === max) return { suggestedMin: min * 0.95, suggestedMax: max * 1.05 }; + const pad = (max - min) * 0.08; + return { suggestedMin: Math.max(0, min - pad), suggestedMax: max + pad }; +} + +// Collapse commit list down to 1 commit per day (keep the most recent commit for that day) +function collapseCommitsToDaily(commits) { + // commits should be oldest -> newest. + const byDate = new Map(); + for (const c of commits) { + const d = String(c?.date || ""); + const sha = String(c?.sha || ""); + if (!d || !sha) continue; + byDate.set(d, { sha, date: d, ts: String(c?.ts || "") }); + } + return [...byDate.values()]; +} + +function cacheKeySeries(sku, dbFile, cacheBust) { + return `stviz:v2:series:${cacheBust}:${sku}:${dbFile}`; +} + +function loadSeriesCache(sku, dbFile, cacheBust) { + try { + const raw = localStorage.getItem(cacheKeySeries(sku, dbFile, cacheBust)); + if (!raw) return null; + const obj = JSON.parse(raw); + if (!obj || !Array.isArray(obj.points)) return null; + const savedAt = Number(obj.savedAt || 0); + if (!Number.isFinite(savedAt) || Date.now() - savedAt > 7 * 24 * 3600 * 1000) return null; + return obj; + } catch { + return null; + } +} + +function saveSeriesCache(sku, dbFile, cacheBust, points) { + try { + localStorage.setItem(cacheKeySeries(sku, dbFile, cacheBust), JSON.stringify({ savedAt: Date.now(), points })); + } catch {} +} + +let DB_COMMITS = null; + +async function loadDbCommitsManifest() { + if (DB_COMMITS) return DB_COMMITS; + try { + DB_COMMITS = await fetchJson("./data/db_commits.json"); + return DB_COMMITS; + } catch { + DB_COMMITS = null; + return null; + } +} + +async function renderItem(sku) { + destroyChart(); + + $app.innerHTML = ` +
+
+ + ${esc(displaySku(it.sku))} +
+ +
+
+
+
+
Loading…
+ +
+
+
+ +
+ +
+
+
+ `; + + document.getElementById("back").addEventListener("click", () => { + location.hash = "#/"; + }); + + const $title = document.getElementById("title"); + const $links = document.getElementById("links"); + const $status = document.getElementById("status"); + const $canvas = document.getElementById("chart"); + const $thumbBox = document.getElementById("thumbBox"); + + const idx = await loadIndex(); + const all = Array.isArray(idx.items) ? idx.items : []; + const cur = all.filter((x) => (String(x.sku || "").trim() || makeUnknownSku(x)) === String(sku || "")); + if (!cur.length) { + $title.textContent = "Item not found in current index"; + $status.textContent = "Tip: index.json only includes current (non-removed) items."; + if ($thumbBox) $thumbBox.innerHTML = `
`; + return; + } + + const nameCounts = new Map(); + for (const r of cur) { + const n = String(r.name || ""); + if (!n) continue; + nameCounts.set(n, (nameCounts.get(n) || 0) + 1); + } + let bestName = cur[0].name || `(SKU ${sku})`; + let bestCount = -1; + for (const [n, c] of nameCounts.entries()) { + if (c > bestCount) { + bestName = n; + bestCount = c; + } + } + $title.textContent = bestName; + + // Pick image that matches the picked name (fallback: any) + let bestImg = ""; + for (const r of cur) { + if (String(r?.name || "") === String(bestName || "") && normImg(r?.img)) { + bestImg = normImg(r.img); + break; + } + } + if (!bestImg) { + for (const r of cur) { + if (normImg(r?.img)) { + bestImg = normImg(r.img); + break; + } + } + } + if ($thumbBox) { + $thumbBox.innerHTML = bestImg ? renderThumbHtml(bestImg, "detailThumb") : `
`; + } + + $links.innerHTML = cur + .slice() + .sort((a, b) => String(a.storeLabel || "").localeCompare(String(b.storeLabel || ""))) + .map( + (r) => + `${esc(r.storeLabel || r.store || "Store")}` + ) + .join(""); + + const gh = inferGithubOwnerRepo(); + const owner = gh.owner; + const repo = gh.repo; + const branch = "data"; + + const byDbFile = new Map(); + for (const r of cur) { + if (!r.dbFile) continue; + if (!byDbFile.has(r.dbFile)) byDbFile.set(r.dbFile, r); + } + const dbFiles = [...byDbFile.keys()].sort(); + + $status.textContent = `Loading history for ${dbFiles.length} store file(s)…`; + + const manifest = await loadDbCommitsManifest(); + + const allDatesSet = new Set(); + const series = []; + + const fileJsonCache = new Map(); + + const cacheBust = String(idx.generatedAt || new Date().toISOString()); + const today = dateOnly(idx.generatedAt || new Date().toISOString()); + + for (const dbFile of dbFiles) { + const row = byDbFile.get(dbFile); + const storeLabel = String(row.storeLabel || row.store || dbFile); + + const cached = loadSeriesCache(sku, dbFile, cacheBust); + if (cached && Array.isArray(cached.points) && cached.points.length) { + const points = new Map(); + const values = []; + for (const p of cached.points) { + const d = String(p.date || ""); + const v = p.price === null ? null : Number(p.price); + if (!d) continue; + points.set(d, Number.isFinite(v) ? v : null); + if (Number.isFinite(v)) values.push(v); + allDatesSet.add(d); + } + series.push({ label: storeLabel, points, values }); + continue; + } + + let commits = []; + if (manifest && manifest.files && Array.isArray(manifest.files[dbFile])) { + commits = manifest.files[dbFile]; + } else { + try { + let apiCommits = await githubListCommits({ owner, repo, branch, path: dbFile }); + apiCommits = apiCommits.slice().reverse(); // oldest -> newest + commits = apiCommits + .map((c) => { + const sha = String(c?.sha || ""); + const dIso = c?.commit?.committer?.date || c?.commit?.author?.date || ""; + const d = dateOnly(dIso); + return sha && d ? { sha, date: d, ts: String(dIso || "") } : null; + }) + .filter(Boolean); + } catch { + commits = []; + } + } + + commits = collapseCommitsToDaily(commits); + + const points = new Map(); + const values = []; + const compactPoints = []; + + const MAX_POINTS = 260; // daily points (~8-9 months) + if (commits.length > MAX_POINTS) commits = commits.slice(commits.length - MAX_POINTS); + + for (const c of commits) { + const sha = String(c.sha || ""); + const d = String(c.date || ""); + if (!sha || !d) continue; + + const ck = `${sha}|${dbFile}`; + let obj = fileJsonCache.get(ck) || null; + if (!obj) { + try { + obj = await githubFetchFileAtSha({ owner, repo, sha, path: dbFile }); + fileJsonCache.set(ck, obj); + } catch { + continue; + } + } + + const it = findItemBySkuInDb(obj, sku); + const pNum = it ? parsePriceToNumber(it.price) : null; + + points.set(d, pNum); + if (pNum !== null) values.push(pNum); + allDatesSet.add(d); + + compactPoints.push({ date: d, price: pNum }); + } + + // Always add "today" from the current index + const curP = parsePriceToNumber(row.price); + if (curP !== null) { + points.set(today, curP); + values.push(curP); + allDatesSet.add(today); + compactPoints.push({ date: today, price: curP }); + } + + saveSeriesCache(sku, dbFile, cacheBust, compactPoints); + series.push({ label: storeLabel, points, values }); + } + + const labels = [...allDatesSet].sort(); + if (!labels.length) { + $status.textContent = "No historical points found."; + return; + } + + const allVals = []; + for (const s of series) for (const v of s.values) allVals.push(v); + const ySug = computeSuggestedY(allVals); + + const datasets = series.map((s) => ({ + label: s.label, + data: labels.map((d) => (s.points.has(d) ? s.points.get(d) : null)), + spanGaps: false, + tension: 0.15, + })); + + const ctx = $canvas.getContext("2d"); + CHART = new Chart(ctx, { + type: "line", + data: { labels, datasets }, + options: { + responsive: true, + maintainAspectRatio: false, + interaction: { mode: "nearest", intersect: false }, + plugins: { + legend: { display: true }, + tooltip: { + callbacks: { + label: (ctx) => { + const v = ctx.parsed?.y; + if (!Number.isFinite(v)) return `${ctx.dataset.label}: (no data)`; + return `${ctx.dataset.label}: $${v.toFixed(2)}`; + }, + }, + }, + }, + scales: { + x: { + ticks: { maxRotation: 0, autoSkip: true, maxTicksLimit: 12 }, + grid: { display: false }, + }, + y: { + ...ySug, + ticks: { callback: (v) => `$${Number(v).toFixed(0)}` }, + }, + }, + }, + }); + + $status.textContent = manifest + ? `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.` + : `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`; +} + +/* ---------------- boot ---------------- */ + +window.addEventListener("hashchange", route); +route(); diff --git a/viz/index.html b/viz/index.html new file mode 100644 index 0000000..6b49329 --- /dev/null +++ b/viz/index.html @@ -0,0 +1,16 @@ + + + + + + Spirit Tracker Viz + + + +
+ + + + + + diff --git a/viz/serve.js b/viz/serve.js new file mode 100755 index 0000000..35fc5a4 --- /dev/null +++ b/viz/serve.js @@ -0,0 +1,57 @@ +#!/usr/bin/env node +"use strict"; + +const http = require("http"); +const fs = require("fs"); +const path = require("path"); + +const root = path.resolve(__dirname); + +const MIME = { + ".html": "text/html; charset=utf-8", + ".js": "application/javascript; charset=utf-8", + ".css": "text/css; charset=utf-8", + ".json": "application/json; charset=utf-8", + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".svg": "image/svg+xml", +}; + +function safePath(urlPath) { + const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/"); + const joined = path.join(root, p); + const norm = path.normalize(joined); + if (!norm.startsWith(root)) return null; + return norm; +} + +const server = http.createServer((req, res) => { + const u = req.url || "/"; + let file = safePath(u === "/" ? "/index.html" : u); + if (!file) { + res.writeHead(400); + res.end("Bad path"); + return; + } + + if (fs.existsSync(file) && fs.statSync(file).isDirectory()) { + file = path.join(file, "index.html"); + } + + fs.readFile(file, (err, buf) => { + if (err) { + res.writeHead(404); + res.end("Not found"); + return; + } + const ext = path.extname(file); + res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" }); + res.end(buf); + }); +}); + +const port = Number(process.env.PORT || 8080); +server.listen(port, "127.0.0.1", () => { + process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`); +}); diff --git a/viz/style.css b/viz/style.css new file mode 100644 index 0000000..c29a80f --- /dev/null +++ b/viz/style.css @@ -0,0 +1,240 @@ +:root { + --bg: #0b0d10; + --panel: #12161b; + --text: #e7edf3; + --muted: #9aa6b2; + --border: #242c35; + --accent: #7dd3fc; +} + +* { box-sizing: border-box; } +body { + margin: 0; + font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; + background: var(--bg); + color: var(--text); +} + +a { color: var(--accent); text-decoration: none; } +a:hover { text-decoration: underline; } + +.container { + max-width: 980px; + margin: 0 auto; + padding: 18px; +} + +.header { + display: flex; + gap: 12px; + align-items: center; + justify-content: space-between; + margin-bottom: 14px; +} + +.h1 { + font-size: 18px; + font-weight: 700; + margin: 0; +} + +.card { + background: var(--panel); + border: 1px solid var(--border); + border-radius: 12px; + padding: 14px; +} + +.input { + width: 100%; + padding: 12px 12px; + border-radius: 10px; + border: 1px solid var(--border); + background: #0f1318; + color: var(--text); + outline: none; +} + +.input:focus { border-color: #37566b; } + +.list { + margin-top: 12px; + display: flex; + flex-direction: column; + gap: 10px; +} + +.item { + border: 1px solid var(--border); + border-radius: 12px; + padding: 12px; + background: #0f1318; + cursor: pointer; +} + +.item:hover { border-color: #2f3a46; } + +.itemRow { + display: flex; + gap: 12px; + align-items: flex-start; +} + +.thumbBox { + width: 64px; + height: 64px; + border-radius: 12px; + overflow: hidden; + border: 1px solid var(--border); + background: #0b0d10; + flex: 0 0 64px; + display: flex; + align-items: center; + justify-content: center; +} + +.thumb { + width: 100%; + height: 100%; + object-fit: cover; + display: block; +} + +.thumbPlaceholder { + width: 100%; + height: 100%; + background: #0b0d10; +} + +.itemBody { + flex: 1; + min-width: 0; +} + +.itemTop { + display: flex; + justify-content: space-between; + gap: 10px; + align-items: baseline; +} + +.itemName { + font-weight: 700; + font-size: 14px; +} + +.badge { + font-size: 12px; + color: var(--muted); + border: 1px solid var(--border); + padding: 2px 8px; + border-radius: 999px; + white-space: nowrap; +} + +.meta { + margin-top: 6px; + display: flex; + gap: 10px; + flex-wrap: wrap; + color: var(--muted); + font-size: 12px; +} + +.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; } + +.topbar { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 12px; +} + +.btn { + border: 1px solid var(--border); + background: #0f1318; + color: var(--text); + border-radius: 10px; + padding: 10px 10px; + cursor: pointer; +} + +.btn:hover { border-color: #2f3a46; } + +.links { + display: flex; + gap: 10px; + flex-wrap: wrap; + margin: 8px 0 14px; +} + +.small { + color: var(--muted); + font-size: 12px; +} + +/* Detail view sizing */ +.detailCard { + display: flex; + flex-direction: column; + gap: 10px; +} + +.detailHeader { + display: flex; + gap: 12px; + align-items: flex-start; +} + +.detailThumbBox { + width: 96px; + height: 96px; + border-radius: 14px; + overflow: hidden; + border: 1px solid var(--border); + background: #0b0d10; + flex: 0 0 96px; + display: flex; + align-items: center; + justify-content: center; +} + +.detailThumb { + width: 100%; + height: 100%; + object-fit: cover; + display: block; +} + +.detailHeaderText { + flex: 1; + min-width: 0; +} + +/* Chart fills most of viewport, but stays usable on mobile */ +.chartBox { + width: 100%; + height: min(72vh, 720px); + min-height: 320px; + border: 1px solid var(--border); + border-radius: 12px; + background: #0f1318; + padding: 10px; +} + +@media (max-width: 640px) { + .container { padding: 14px; } + .thumbBox { width: 56px; height: 56px; flex: 0 0 56px; } + .detailThumbBox { width: 84px; height: 84px; flex: 0 0 84px; } + + .chartBox { + height: 58vh; + min-height: 260px; + padding: 8px; + } +} + +.chartBox canvas { + width: 100% !important; + height: 100% !important; +}