chore: initial code

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-19 20:00:40 -08:00
commit 470eb8ca56
46 changed files with 6127 additions and 0 deletions

14
.gitignore vendored Normal file
View file

@ -0,0 +1,14 @@
node_modules/
*.log
# Data & reports live on the data branch
/data/
/reports/
.worktrees/
# Generated viz artifacts live on the data branch
viz/data/
# Keep cron log out of git even on data branch
reports/cron.log

10
bin/tracker.js Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env node
"use strict";
const { main } = require("../src/main");
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

87
scripts/bootstrap_clone.sh Executable file
View file

@ -0,0 +1,87 @@
#!/usr/bin/env bash
set -euo pipefail
MAIN_BRANCH="${MAIN_BRANCH:-main}"
DATA_BRANCH="${DATA_BRANCH:-data}"
WORKTREE_DIR="${DATA_WORKTREE_DIR:-.worktrees/data}"
RUN_DAILY="${RUN_DAILY:-0}" # set RUN_DAILY=1 to run at the end
# must be in a git repo root-ish
git rev-parse --is-inside-work-tree >/dev/null
# ensure we have origin
if ! git remote get-url origin >/dev/null 2>&1; then
echo "ERROR: remote 'origin' not configured" >&2
exit 1
fi
echo "[bootstrap] fetching..."
git fetch --prune origin
# ensure local main exists and tracks origin/main (best effort)
if git show-ref --verify --quiet "refs/remotes/origin/$MAIN_BRANCH"; then
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
git checkout -q "$MAIN_BRANCH"
git merge -q --ff-only "origin/$MAIN_BRANCH" || true
else
git checkout -q -b "$MAIN_BRANCH" "origin/$MAIN_BRANCH"
fi
git branch --set-upstream-to="origin/$MAIN_BRANCH" "$MAIN_BRANCH" >/dev/null 2>&1 || true
fi
# ensure local data branch exists (from origin/data)
if git show-ref --verify --quiet "refs/remotes/origin/$DATA_BRANCH"; then
if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
# fast-forward local data to origin/data when possible; otherwise leave it alone
git checkout -q "$DATA_BRANCH"
git merge -q --ff-only "origin/$DATA_BRANCH" || true
else
git checkout -q -b "$DATA_BRANCH" "origin/$DATA_BRANCH"
fi
git branch --set-upstream-to="origin/$DATA_BRANCH" "$DATA_BRANCH" >/dev/null 2>&1 || true
else
echo "ERROR: origin/$DATA_BRANCH not found. Did you push the data branch?" >&2
exit 1
fi
# go back to main (so run_daily can merge main->data in the worktree cleanly)
git checkout -q "$MAIN_BRANCH" || true
echo "[bootstrap] preparing worktree..."
git worktree prune >/dev/null 2>&1 || true
# if dir exists but isn't a valid worktree checkout, remove it
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
rm -rf "$WORKTREE_DIR"
fi
# ensure worktree exists for data branch
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
mkdir -p "$(dirname "$WORKTREE_DIR")"
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
fi
# keep worktree data branch in a reasonable state
(
cd "$WORKTREE_DIR"
git fetch -q --prune origin || true
git merge -q --ff-only "origin/$DATA_BRANCH" || true
# merge main into data if main exists (best effort, matches your run_daily behavior)
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
git merge -q --no-edit "$MAIN_BRANCH" || true
fi
)
echo "[bootstrap] done."
echo " main repo: $(pwd)"
echo " data worktree: $(cd "$WORKTREE_DIR" && pwd)"
if [[ "$RUN_DAILY" == "1" ]]; then
echo "[bootstrap] running daily..."
NODE_BIN="${NODE_BIN:-$(command -v node || true)}"
if [[ -z "$NODE_BIN" ]]; then
echo "ERROR: node not found in PATH" >&2
exit 1
fi
NODE_BIN="$NODE_BIN" bash scripts/run_daily.sh || true
fi

31
scripts/cron_setup.sh Executable file
View file

@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
MAIN_BRANCH="${MAIN_BRANCH:-main}"
DATA_BRANCH="${DATA_BRANCH:-data}"
NODE_BIN="${NODE_BIN:-}"
if [[ -z "$NODE_BIN" ]]; then
NODE_BIN="$(command -v node || true)"
fi
if [[ -z "$NODE_BIN" ]]; then
echo "ERROR: node not found in PATH" >&2
exit 1
fi
mkdir -p "$REPO_ROOT/reports"
# Default: run 4 times/day (every 6 hours). Override via:
# CRON_SCHEDULE="15 */4 * * *" (example)
CRON_SCHEDULE="${CRON_SCHEDULE:-0 */6 * * *}"
# Use a stable marker so we can replace old lines (including the previous "daily" one).
MARKER="# spirit-tracker"
CRON_LINE="$CRON_SCHEDULE NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER"
# Install (idempotent): remove any previous line with the marker, then append.
{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE"; } | crontab -
echo "Installed cron job:"
echo "$CRON_LINE"

7
scripts/repo_reset.sh Executable file
View file

@ -0,0 +1,7 @@
rm -rf .git .worktrees
rm -rf data/db reports
bash scripts/repo_setup.sh --force
git remote add origin git@github.com:brennanwilkes/spirit-tracker.git
git push -u origin main --force
git push -u origin data --force

95
scripts/repo_setup.sh Executable file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env bash
set -euo pipefail
MAIN_BRANCH="${MAIN_BRANCH:-main}"
DATA_BRANCH="${DATA_BRANCH:-data}"
FORCE=0
if [[ "${1:-}" == "--force" ]]; then
FORCE=1
fi
if [[ -d .git ]]; then
if [[ $FORCE -eq 1 ]]; then
rm -rf .git
else
echo "ERROR: .git already exists. Remove it first or run: $0 --force" >&2
exit 1
fi
fi
git init -q
git branch -M "$MAIN_BRANCH"
mkdir -p data/db reports .worktrees viz/data
# Move existing DB snapshots (e.g. kwm__scotch__2b16b533.json) into data/db so
# they don't end up committed on the main branch.
shopt -s nullglob
for f in *__*__*.json; do
mv -f "$f" data/db/
done
shopt -u nullglob
# Ensure expected runtime dirs exist (they are ignored on main).
mkdir -p data/db reports viz/data
# Move old root-level DB JSONs into data/db if present.
shopt -s nullglob
for f in *.json; do
if [[ "$f" =~ __[0-9a-f]{8}\.json$ ]]; then
mv -f "$f" "data/db/$f"
fi
done
shopt -u nullglob
cat > .gitignore <<'GITIGNORE'
node_modules/
*.log
# Data & reports live on the data branch
/data/
/reports/
.worktrees/
# Generated viz artifacts live on the data branch
viz/data/
# Keep cron log out of git even on data branch
reports/cron.log
GITIGNORE
# Make sure scripts/tools are executable (best effort)
chmod +x bin/tracker.js 2>/dev/null || true
chmod +x scripts/*.sh 2>/dev/null || true
chmod +x tools/*.js 2>/dev/null || true
git add -A
if git diff --cached --quiet; then
echo "Nothing to commit on $MAIN_BRANCH (did you already commit?)" >&2
else
git commit -m "chore: initial code" -q
fi
# Create data branch, un-ignore data and reports (and viz/data).
if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
echo "Data branch already exists: $DATA_BRANCH" >&2
else
git checkout -b "$DATA_BRANCH" -q
cat > .gitignore <<'GITIGNORE'
node_modules/
*.log
# Keep cron log out of git
reports/cron.log
GITIGNORE
git add .gitignore
git commit -m "chore: enable tracking of data + reports + viz on data branch" -q
git checkout "$MAIN_BRANCH" -q
fi
echo "Repo setup complete. Main=$MAIN_BRANCH Data=$DATA_BRANCH"

90
scripts/run_daily.sh Executable file
View file

@ -0,0 +1,90 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
MAIN_BRANCH="${MAIN_BRANCH:-main}"
DATA_BRANCH="${DATA_BRANCH:-data}"
WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}"
NODE_BIN="${NODE_BIN:-}"
if [[ -z "$NODE_BIN" ]]; then
NODE_BIN="$(command -v node || true)"
fi
if [[ -z "$NODE_BIN" ]]; then
echo "ERROR: node not found in PATH" >&2
exit 1
fi
cd "$REPO_ROOT"
git rev-parse --is-inside-work-tree >/dev/null
# Ensure data branch exists.
if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
echo "ERROR: data branch not found: $DATA_BRANCH" >&2
exit 1
fi
# Create/repair worktree for data branch.
git worktree prune >/dev/null 2>&1 || true
# If the dir exists but isn't a valid worktree checkout, remove it properly.
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
rm -rf "$WORKTREE_DIR"
fi
# If the worktree directory is missing, add it (force is safe after prune).
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
mkdir -p "$(dirname "$WORKTREE_DIR")"
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
fi
cd "$WORKTREE_DIR"
# Keep data branch up-to-date with main (merge only when main moved).
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
if ! git merge -q --no-edit "$MAIN_BRANCH"; then
echo "ERROR: failed to merge $MAIN_BRANCH into $DATA_BRANCH" >&2
exit 1
fi
fi
# Run tracker (writes data/db + a plain report file in reports/)
"$NODE_BIN" bin/tracker.js
# Build viz artifacts on the data branch
"$NODE_BIN" tools/build_viz_index.js
"$NODE_BIN" tools/build_viz_commits.js
"$NODE_BIN" tools/build_viz_recent.js
# Stage only data/report/viz outputs
git add -A data/db reports viz/data
if git diff --cached --quiet; then
echo "No data/report/viz changes to commit." >&2
exit 0
fi
# Commit message: include the latest report as the commit body.
ts="$(date -u +'%Y-%m-%dT%H:%M:%SZ')"
REPORT_FILE=""
if compgen -G "reports/*.txt" > /dev/null; then
REPORT_FILE="$(ls -1t reports/*.txt | head -n 1 || true)"
fi
MSG_FILE="$(mktemp)"
{
echo "run: ${ts}"
echo
if [[ -n "$REPORT_FILE" && -f "$REPORT_FILE" ]]; then
cat "$REPORT_FILE"
else
echo "(no report file found in reports/*.txt)"
fi
} > "$MSG_FILE"
git commit -F "$MSG_FILE" -q
rm -f "$MSG_FILE"
git push -q

56
scripts/serve_viz.sh Executable file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
DATA_BRANCH="${DATA_BRANCH:-data}"
WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}"
NODE_BIN="${NODE_BIN:-}"
if [[ -z "$NODE_BIN" ]]; then
NODE_BIN="$(command -v node || true)"
fi
if [[ -z "$NODE_BIN" ]]; then
echo "ERROR: node not found in PATH" >&2
exit 1
fi
cd "$REPO_ROOT"
git rev-parse --is-inside-work-tree >/dev/null
# Ensure data branch exists.
if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
echo "ERROR: data branch not found: $DATA_BRANCH" >&2
exit 1
fi
# Create/repair worktree for data branch.
git worktree prune >/dev/null 2>&1 || true
# If dir exists but isn't a valid worktree checkout, remove it.
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
rm -rf "$WORKTREE_DIR"
fi
# If missing, add it.
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
mkdir -p "$(dirname "$WORKTREE_DIR")"
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
fi
cd "$WORKTREE_DIR"
# Ensure viz artifacts exist (helpful if you haven't run daily yet)
if [[ ! -f "viz/data/index.json" ]]; then
echo "viz/data/index.json missing; building..." >&2
"$NODE_BIN" tools/build_viz_index.js
fi
if [[ ! -f "viz/data/db_commits.json" ]]; then
echo "viz/data/db_commits.json missing; building..." >&2
"$NODE_BIN" tools/build_viz_commits.js
fi
if [[ ! -f "viz/data/recent.json" ]]; then
echo "viz/data/recent.json missing; building..." >&2
"$NODE_BIN" tools/build_viz_recent.js
fi
exec "$NODE_BIN" viz/serve.js

232
src/core/http.js Normal file
View file

@ -0,0 +1,232 @@
"use strict";
const { setTimeout: sleep } = require("timers/promises");
class RetryableError extends Error {
constructor(msg) {
super(msg);
this.name = "RetryableError";
}
}
function isRetryable(e) {
if (!e) return false;
if (e.name === "AbortError") return true;
if (e instanceof RetryableError) return true;
const msg = String(e.message || e);
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
}
function backoffMs(attempt) {
const base = Math.min(12000, 500 * Math.pow(2, attempt));
const jitter = Math.floor(Math.random() * 400);
return base + jitter;
}
async function safeText(res) {
try {
return await res.text();
} catch {
return "";
}
}
/* ---------------- Cookies (simple jar) ---------------- */
// host -> Map(cookieName -> "name=value")
function createCookieJar() {
const jar = new Map();
function getHost(u) {
try {
return new URL(u).hostname || "";
} catch {
return "";
}
}
function parseSetCookieLine(line) {
// "name=value; Path=/; Secure; HttpOnly; ..."
const s = String(line || "").trim();
if (!s) return null;
const first = s.split(";")[0] || "";
const eq = first.indexOf("=");
if (eq <= 0) return null;
const name = first.slice(0, eq).trim();
const value = first.slice(eq + 1).trim();
if (!name) return null;
return { name, pair: `${name}=${value}` };
}
function getSetCookieArray(headers) {
// Node/undici may support headers.getSetCookie()
if (headers && typeof headers.getSetCookie === "function") {
try {
const arr = headers.getSetCookie();
return Array.isArray(arr) ? arr : [];
} catch {
// fall through
}
}
// Fallback: single combined header (may lose multiples, but better than nothing)
const one = headers?.get ? headers.get("set-cookie") : null;
if (!one) return [];
// Best-effort split. This is imperfect with Expires=... commas, but OK for most WP cookies.
// If this causes issues later, we can replace with a more robust splitter.
return String(one)
.split(/,(?=[^;,]*=)/g)
.map((x) => x.trim())
.filter(Boolean);
}
function storeFromResponse(url, res) {
const host = getHost(res?.url || url);
if (!host) return;
const lines = getSetCookieArray(res?.headers);
if (!lines.length) return;
let m = jar.get(host);
if (!m) {
m = new Map();
jar.set(host, m);
}
for (const line of lines) {
const c = parseSetCookieLine(line);
if (!c) continue;
m.set(c.name, c.pair);
}
}
function cookieHeaderFor(url) {
const host = getHost(url);
if (!host) return "";
const m = jar.get(host);
if (!m || m.size === 0) return "";
return [...m.values()].join("; ");
}
return { storeFromResponse, cookieHeaderFor };
}
/* ---------------- HTTP client ---------------- */
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
let inflight = 0;
let reqSeq = 0;
const cookieJar = createCookieJar();
function inflightStr() {
return `inflight=${inflight}`;
}
async function fetchWithRetry(
url,
tag,
ua,
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {}
) {
for (let attempt = 0; attempt <= maxRetries; attempt++) {
const reqId = ++reqSeq;
const start = Date.now();
inflight++;
logger?.dbg?.(
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`
);
try {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
const cookieHdr =
cookies && !Object.prototype.hasOwnProperty.call(headers, "Cookie") && !Object.prototype.hasOwnProperty.call(headers, "cookie")
? cookieJar.cookieHeaderFor(url)
: "";
const res = await fetch(url, {
method,
redirect: "follow",
headers: {
"user-agent": ua || defaultUa,
"accept-language": "en-US,en;q=0.9",
...(mode === "text"
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
: { accept: "application/json, text/plain, */*" }),
...(cookieHdr ? { cookie: cookieHdr } : {}),
...headers,
},
body,
signal: ctrl.signal,
}).finally(() => clearTimeout(t));
const status = res.status;
const finalUrl = res.url || url;
// capture cookies for subsequent requests to same host
if (cookies) cookieJar.storeFromResponse(url, res);
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} finalUrl=${finalUrl}`);
if (status === 429 || status === 408 || (status >= 500 && status <= 599)) {
throw new RetryableError(`HTTP ${status}`);
}
if (status >= 400) {
const bodyTxt = await safeText(res);
throw new Error(
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`
);
}
if (mode === "json") {
const txt = await res.text();
const ms = Date.now() - start;
let json;
try {
json = JSON.parse(txt);
} catch (e) {
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
}
return { json, ms, bytes: txt.length, status, finalUrl };
}
const text = await res.text();
if (!text || text.length < 200) throw new RetryableError(`Short HTML bytes=${text.length}`);
const ms = Date.now() - start;
return { text, ms, bytes: text.length, status, finalUrl };
} catch (e) {
const retryable = isRetryable(e);
logger?.dbg?.(
`REQ#${reqId} ERROR ${tag} retryable=${retryable} err=${e?.message || e} (${inflightStr()})`
);
if (!retryable || attempt === maxRetries) throw e;
const delay = backoffMs(attempt);
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
await sleep(delay);
} finally {
inflight--;
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
}
}
throw new Error("unreachable");
}
function fetchTextWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
}
function fetchJsonWithRetry(url, tag, ua, opts) {
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
}
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
}
module.exports = { createHttpClient, RetryableError };

58
src/core/logger.js Normal file
View file

@ -0,0 +1,58 @@
"use strict";
const { C, color } = require("../utils/ansi");
const { ts } = require("../utils/time");
function createLogger({ debug = false, colorize: wantColor = true } = {}) {
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
const enabled = Boolean(wantColor && isTTY);
function ok(msg) {
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
}
function warn(msg) {
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
}
function err(msg) {
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
}
function info(msg) {
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
}
function dbg(msg) {
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
}
function dim(s) {
return color(s, C.dim, enabled);
}
function bold(s) {
return color(s, C.bold, enabled);
}
function paint(s, code) {
return color(s, code, enabled);
}
return {
debug,
isTTY,
colorize: enabled,
C,
ok,
warn,
err,
info,
dbg,
dim,
bold,
color: paint,
};
}
module.exports = { createLogger };

82
src/main.js Normal file
View file

@ -0,0 +1,82 @@
#!/usr/bin/env node
"use strict";
const fs = require("fs");
const path = require("path");
const { parseArgs, clampInt } = require("./utils/args");
const { isoTimestampFileSafe } = require("./utils/time");
const { createLogger } = require("./core/logger");
const { createHttpClient } = require("./core/http");
const { createStores, parseProductsSierra } = require("./stores");
const { runAllStores } = require("./tracker/run_all");
const { renderFinalReport } = require("./tracker/report");
const { ensureDir } = require("./tracker/db");
const DEFAULT_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
function resolveDir(p, fallback) {
const v = String(p || "").trim();
if (!v) return fallback;
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
}
async function main() {
if (typeof fetch !== "function") {
throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). ");
}
const args = parseArgs(process.argv.slice(2));
const logger = createLogger({ debug: args.debug, colorize: true });
const config = {
debug: args.debug,
maxPages: args.maxPages,
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
defaultUa: DEFAULT_UA,
defaultParseProducts: parseProductsSierra,
dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")),
reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")),
};
ensureDir(config.dbDir);
ensureDir(config.reportDir);
const http = createHttpClient({ maxRetries: config.maxRetries, timeoutMs: config.timeoutMs, defaultUa: config.defaultUa, logger });
const stores = createStores({ defaultUa: config.defaultUa });
const report = await runAllStores(stores, { config, logger, http });
const reportTextColor = renderFinalReport(report, { dbDir: config.dbDir, colorize: logger.colorize });
process.stdout.write(reportTextColor);
const reportTextPlain = renderFinalReport(report, { dbDir: config.dbDir, colorize: false });
const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`);
try {
fs.writeFileSync(file, reportTextPlain, "utf8");
logger.ok(`Report saved: ${logger.dim(file)}`);
} catch (e) {
logger.warn(`Report save failed: ${e?.message || e}`);
}
}
module.exports = { main };
if (require.main === module) {
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
// no logger here; keep simple
console.error(msg);
process.exitCode = 1;
});
}

332
src/stores/bcl.js Normal file
View file

@ -0,0 +1,332 @@
"use strict";
const { normalizeCspc } = require("../utils/sku");
const { humanBytes } = require("../utils/bytes");
const { padLeft, padRight } = require("../utils/string");
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function cad(n) {
const x = Number(n);
if (!Number.isFinite(x)) return "";
return `$${x.toFixed(2)}`;
}
function asNumber(n) {
if (n == null) return NaN;
if (typeof n === "number") return n;
const t = String(n).trim();
if (!t) return NaN;
const x = Number(t.replace(/[^0-9.]/g, ""));
return x;
}
function bclTotalHits(json) {
const t = json?.hits?.total;
if (typeof t === "number") return t;
if (t && typeof t.value === "number") return t.value; // ES-style
return 0;
}
function bclIsInStock(src) {
// Prefer explicit text if present (matches site UI)
const candidates = [
src?.availability,
src?.availabilityText,
src?.availabilityStatus,
src?.availability_status,
src?.stockStatus,
src?.stock_status,
src?.status,
src?.statusText,
]
.map((v) => (v == null ? "" : String(v)))
.filter(Boolean);
for (const s of candidates) {
if (/out of stock/i.test(s)) return false;
if (/\bin stock\b/i.test(s)) return true;
}
// Fallback only: units
const units = Number(src?.availableUnits);
if (Number.isFinite(units)) return units > 0;
// If we can't tell, keep it (better than dropping 90% of a category)
return true;
}
function bclNormalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bcliquorstores.com/").toString();
} catch {
return s;
}
}
function bclPickImage(src) {
const cands = [
src?.imageUrl,
src?.imageURL,
src?.image,
src?.thumbnail,
src?.thumbnailUrl,
src?.thumbnailURL,
src?.primaryImage,
src?.primaryImageUrl,
];
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
}
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
for (const a of arrs) {
if (!Array.isArray(a) || !a.length) continue;
const v = a[0];
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
if (v && typeof v === "object") {
const s = String(v.src || v.url || "").trim();
if (s) return bclNormalizeAbsUrl(s);
}
}
return "";
}
function bclHitToItem(hit) {
const src = hit?._source || null;
if (!src) return null;
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
if (!skuRaw) return null;
// SKU in URL (requested)
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
const name = String(src.name || "").trim();
if (!name) return null;
// Sale support: pick currentPrice when present; otherwise regularPrice.
const current = asNumber(src.currentPrice);
const regular = asNumber(src.regularPrice);
const price = cad(Number.isFinite(current) ? current : regular);
const sku = normalizeCspc(url);
const inStock = bclIsInStock(src);
if (!inStock) return null;
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
// Also use https.
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
skuRaw
)}.jpg`;
return { name, price, url, sku, img };
}
async function bclFetchBrowsePage(ctx, page1, size) {
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
const category = "spirits";
const sort = "featuredProducts:desc";
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
u.searchParams.set("category", category);
u.searchParams.set("type", type);
u.searchParams.set("sort", sort);
u.searchParams.set("size", String(size));
u.searchParams.set("page", String(page1));
const referer =
`https://www.bcliquorstores.com/product-catalogue?` +
`category=${encodeURIComponent(category)}` +
`&type=${encodeURIComponent(type)}` +
`&sort=${encodeURIComponent(sort)}` +
`&page=${encodeURIComponent(String(page1))}`;
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json, text/plain, */*",
Referer: referer,
Origin: "https://www.bcliquorstores.com",
},
});
}
async function scanCategoryBCLAjax(ctx, prevDb, report) {
const t0 = Date.now();
const size = 24;
let first;
try {
first = await bclFetchBrowsePage(ctx, 1, size);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
return;
}
const total = bclTotalHits(first?.json);
const totalPages = Math.max(1, Math.ceil(total / size));
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
const pageNums = [];
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(
pageNums,
ctx.config.concurrency,
ctx.config.staggerMs,
async (page1, idx) => {
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
const items = [];
for (const h of hits) {
const it = bclHitToItem(h);
if (it) items.push(it);
}
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
items.length,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
return items;
}
);
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: removedItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
function createStore(defaultUa) {
return {
key: "bcl",
name: "BCL",
host: "www.bcliquorstores.com",
ua: defaultUa,
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
categories: [
{
key: "whisky",
label: "Whisky / Whiskey",
// informational only; scan uses ajax/browse
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
bclType: "whisky / whiskey",
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
bclType: "rum",
},
],
};
}
module.exports = { createStore };

387
src/stores/bsw.js Normal file
View file

@ -0,0 +1,387 @@
"use strict";
const { cleanText } = require("../utils/html");
const { normalizeCspc } = require("../utils/sku");
const { padLeft, padRight } = require("../utils/string");
const { humanBytes } = require("../utils/bytes");
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
const BSW_ALGOLIA_APP_ID = "25TO6MPUL0";
const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2";
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
function usd(n) {
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
}
function bswExtractCollectionIdFromHtml(html) {
const s = String(html || "");
const patterns = [
/collection_ids%3A(\d{6,})/i,
/collection_ids\s*:\s*(\d{6,})/i,
/"collection_ids"\s*:\s*(\d{6,})/i,
/"collection_id"\s*:\s*(\d{6,})/i,
/collection_id\s*=\s*(\d{6,})/i,
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
/data-collection-id=["'](\d{6,})["']/i,
];
for (const re of patterns) {
const m = s.match(re);
if (m && m[1]) return Number.parseInt(m[1], 10);
}
return null;
}
function bswFormatPrice(value, hintCents) {
if (value === null || value === undefined) return "";
if (typeof value === "string") {
const t = value.trim();
if (!t) return "";
if (t.includes("$")) return t.replace(/\s+/g, "");
const n = Number(t.replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return t;
return usd(n);
}
if (typeof value === "number") {
let n = value;
if (hintCents) n = n / 100;
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
return usd(n);
}
return "";
}
function bswPickPrice(hit) {
const pick = (val, cents) => ({ val, cents });
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
if (hit && hit.price != null) return pick(hit.price, false);
if (hit && hit.price_min != null) return pick(hit.price_min, false);
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
if (hit && hit.min_price != null) return pick(hit.min_price, false);
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
const v = hit.variants[0];
if (v.price_cents != null) return pick(v.price_cents, true);
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
if (v.price != null) return pick(v.price, false);
}
return pick(null, false);
}
function bswHitToItem(hit) {
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
const url =
(hit && (hit.url || hit.product_url)) ||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
const price = bswFormatPrice(priceVal, hintCents);
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
const img = bswPickImage(hit);
if (!name || !url) return null;
return { name, price, url, sku, img };
}
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
const params =
`facets=%5B%22price%22%2C%22*%22%5D` +
`&filters=${encodeURIComponent(filtersExpr)}` +
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
`&page=${encodeURIComponent(String(page0))}` +
`&query=` +
`&clickAnalytics=true` +
`&maxValuesPerFacet=100` +
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
method: "POST",
headers: {
Accept: "*/*",
"content-type": "application/x-www-form-urlencoded",
Origin: "https://www.bswliquor.com",
Referer: "https://www.bswliquor.com/",
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
},
body: JSON.stringify(bodyObj),
});
}
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function bswNormalizeAbsUrl(raw) {
const s = String(raw || "").trim();
if (!s) return "";
if (s.startsWith("//")) return `https:${s}`;
if (/^https?:\/\//i.test(s)) return s;
try {
return new URL(s, "https://www.bswliquor.com/").toString();
} catch {
return s;
}
}
function bswNormalizeImg(v) {
if (!v) return "";
if (typeof v === "string") return bswNormalizeAbsUrl(v);
if (typeof v === "object") {
const cands = [
v.src,
v.url,
v.originalSrc,
v.original_src,
v.original,
v.secure_url,
v.large,
v.medium,
v.small,
];
for (const c of cands) {
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
}
}
return "";
}
function bswPickImage(hit) {
const cands = [
hit?.image,
hit?.image_url,
hit?.imageUrl,
hit?.imageURL,
hit?.featured_image,
hit?.featured_image_url,
hit?.featuredImage,
hit?.featuredImageUrl,
hit?.product_image,
hit?.product_image_url,
hit?.productImage,
hit?.productImageUrl,
hit?.thumbnail,
hit?.thumbnail_url,
hit?.thumbnailUrl,
];
for (const c of cands) {
const s = bswNormalizeImg(c);
if (s) return s;
}
if (Array.isArray(hit?.images)) {
for (const im of hit.images) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
if (Array.isArray(hit?.media)) {
for (const im of hit.media) {
const s = bswNormalizeImg(im);
if (s) return s;
}
}
return "";
}
async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
const t0 = Date.now();
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
if (!collectionId) {
try {
const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua);
collectionId = bswExtractCollectionIdFromHtml(html);
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`);
}
}
if (!collectionId) {
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
const discovered = new Map();
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1,
discoveredUnique: 0,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
return;
}
const ruleContext = ctx.cat.bswRuleContext || "";
const hitsPerPage = 50;
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
const result0 = first?.json?.results?.[0] || null;
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
const totalPages = Math.max(1, nbPages);
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
const pageIdxs = [];
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
let donePages = 0;
const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => {
const pnum = idx + 1;
const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
const res0 = r?.json?.results?.[0] || null;
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
const items = [];
for (const h of hits) {
const it = bswHitToItem(h);
if (it) items.push(it);
}
donePages++;
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
items.length,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
return items;
});
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
function createStore(defaultUa) {
return {
key: "bsw",
name: "BSW",
host: "www.bswliquor.com",
ua: defaultUa,
scanCategory: scanCategoryBSWAlgolia,
categories: [
{
key: "scotch-whisky",
label: "Scotch Whisky",
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
bswRuleContext: "scotch-whisky",
},
{
key: "rum",
label: "Rum",
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
bswRuleContext: "rum",
},
],
};
}
module.exports = { createStore };

307
src/stores/craftcellars.js Normal file
View file

@ -0,0 +1,307 @@
"use strict";
const { decodeHtml, stripTags, extractFirstImgUrl } = require("../utils/html");
const { sanitizeName } = require("../utils/text");
const { normalizeCspc } = require("../utils/sku");
const { makePageUrlShopifyQueryPage } = require("../utils/url");
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function craftCellarsIsEmptyListingPage(html) {
const s = String(html || "");
if (/collection--empty\b/i.test(s)) return true;
if (/No products found/i.test(s)) return true;
return false;
}
function canonicalizeCraftProductUrl(raw) {
try {
const u = new URL(String(raw));
u.search = "";
u.hash = "";
return u.toString();
} catch {
return String(raw || "");
}
}
function extractShopifyCardPrice(block) {
const b = String(block || "");
const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
const saleRegion = b.split(/sale price/i)[1] || "";
const saleD = dollars(saleRegion);
if (saleD.length) return saleD[0];
const regRegion = b.split(/regular price/i)[1] || "";
const regD = dollars(regRegion);
if (regD.length) return regD[0];
const any = dollars(b);
return any[0] || "";
}
function parseProductsCraftCellars(html, ctx) {
const s = String(html || "");
const g1 = s.match(/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
const g2 = s.match(/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
const gridCandidate = g1.length > g2.length ? g1 : g2;
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
return parseProductsCraftCellarsInner(grid, ctx);
}
function parseProductsCraftCellarsInner(html, ctx) {
const s = String(html || "");
const items = [];
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]);
if (blocks.length < 5) {
blocks = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map(
(m) => m[0]
);
}
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
for (const block of blocks) {
const href =
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] ||
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
if (!href) continue;
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
url = canonicalizeCraftProductUrl(url);
const nameHtml =
block.match(
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i
)?.[1] ||
block.match(
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i
)?.[1] ||
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
if (!name) continue;
const price = extractShopifyCardPrice(block);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function usdFromShopifyPriceStr(s) {
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
if (!Number.isFinite(n)) return "";
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
}
/**
* Craft Cellars:
* - HTML listing with ?filter.v.availability=1 is the allowlist (prevents OOS leaking in)
* - Shopify products.json is used only to enrich SKU (and optionally price) for those allowed URLs
*/
async function scanCategoryCraftCellars(ctx, prevDb, report) {
const t0 = Date.now();
// 1) HTML scan: allowlist of in-stock listing URLs
const htmlMap = new Map(); // url -> {name, price, url, img}
const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200);
let htmlPagesFetched = 0;
let emptyStreak = 0;
for (let p = 1; p <= maxPages; p++) {
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
const { text: html } = await ctx.http.fetchTextWithRetry(pageUrl, `craft:html:${ctx.cat.key}:p${p}`, ctx.store.ua);
htmlPagesFetched++;
if (craftCellarsIsEmptyListingPage(html)) break;
const items = parseProductsCraftCellars(html, ctx);
if (!items.length) {
emptyStreak++;
if (emptyStreak >= 2) break;
continue;
}
emptyStreak = 0;
for (const it of items) {
const url = canonicalizeCraftProductUrl(it.url);
if (!url) continue;
htmlMap.set(url, { name: it.name || "", price: it.price || "", url, img: it.img || "" });
}
}
// If HTML returns nothing, don't let JSON invent a category
if (!htmlMap.size) {
ctx.logger.warn(
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing to use products.json as source of truth.`
);
}
// 2) JSON scan: build SKU index (but do NOT add new URLs from JSON)
const jsonMap = new Map(); // url -> { sku, price, img }
if (htmlMap.size) {
const start = new URL(ctx.cat.startUrl);
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`);
const collectionHandle = m[1];
const limit = 250;
let jsonPage = 1;
let jsonPagesFetched = 0;
while (true) {
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
const products = Array.isArray(r?.json?.products) ? r.json.products : [];
jsonPagesFetched++;
if (!products.length) break;
for (const p of products) {
const handle = String(p?.handle || "");
if (!handle) continue;
const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`);
// Only enrich if it's on the HTML allowlist
if (!htmlMap.has(prodUrl)) continue;
const variants = Array.isArray(p?.variants) ? p.variants : [];
const v = variants.find((x) => x && x.available === true) || variants[0] || null;
const sku = normalizeCspc(v?.sku || "");
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
// Product image (best effort)
let img = "";
const images = Array.isArray(p?.images) ? p.images : [];
if (images[0]) {
if (typeof images[0] === "string") img = images[0];
else img = String(images[0]?.src || images[0]?.url || "");
}
if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || "");
img = String(img || "").trim();
if (img.startsWith("//")) img = `https:${img}`;
if (img && !/^https?:\/\//i.test(img)) {
try {
img = new URL(img, `https://${ctx.store.host}/`).toString();
} catch {
// keep as-is
}
}
jsonMap.set(prodUrl, { sku, price, img });
}
if (products.length < limit) break;
jsonPage++;
if (jsonPage > 200) break; // safety
}
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`);
} else {
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=0`);
}
// 3) Final discovered: HTML allowlist, enriched by JSON
const discovered = new Map();
for (const [url, it] of htmlMap.entries()) {
const j = jsonMap.get(url);
discovered.set(url, {
name: it.name || "",
// Prefer JSON price (normalized) when present, else keep HTML price (already formatted)
price: j?.price || it.price || "",
url,
sku: j?.sku || "",
img: j?.img || it.img || "",
});
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: htmlPagesFetched,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
function createStore(defaultUa) {
return {
key: "craftcellars",
name: "Craft Cellars",
host: "craftcellars.ca",
ua: defaultUa,
// ✅ Custom scan (HTML allowlist + JSON enrichment)
scanCategory: scanCategoryCraftCellars,
// Keep HTML parser for debugging
parseProducts: parseProductsCraftCellars,
makePageUrl: makePageUrlShopifyQueryPage,
isEmptyListingPage: craftCellarsIsEmptyListingPage,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
discoveryStartPage: 10,
},
{
key: "rum",
label: "Rum",
startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
discoveryStartPage: 5,
},
],
};
}
module.exports = { createStore };

25
src/stores/index.js Normal file
View file

@ -0,0 +1,25 @@
"use strict";
const { createStore: createSierra, parseProductsSierra } = require("./sierrasprings");
const { createStore: createBSW } = require("./bsw");
const { createStore: createKWM } = require("./kwm");
const { createStore: createKegNCork } = require("./kegncork");
const { createStore: createMaltsAndGrains } = require("./maltsandgrains");
const { createStore: createCraftCellars } = require("./craftcellars");
const { createStore: createBCL } = require("./bcl");
const { createStore: createStrath } = require("./strath");
function createStores({ defaultUa } = {}) {
return [
createSierra(defaultUa),
createBSW(defaultUa),
createKWM(defaultUa),
createKegNCork(defaultUa),
createMaltsAndGrains(defaultUa),
createCraftCellars(defaultUa),
createBCL(defaultUa),
createStrath(defaultUa),
];
}
module.exports = { createStores, parseProductsSierra };

78
src/stores/kegncork.js Normal file
View file

@ -0,0 +1,78 @@
"use strict";
const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../utils/html");
const { makePageUrlQueryParam } = require("../utils/url");
function makePageUrlKegNCork(baseUrl, pageNum) {
return makePageUrlQueryParam(baseUrl, "page", pageNum);
}
function parseProductsKegNCork(html, ctx) {
const s = String(html || "");
const items = [];
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
for (let i = 1; i < blocks.length; i++) {
const block = "<li" + blocks[i];
const mTitle = block.match(
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i
);
if (!mTitle) continue;
const url = decodeHtml(mTitle[1]).trim();
const name = cleanText(decodeHtml(mTitle[2]));
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
let price = "";
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
if (mPrice && mPrice[1]) {
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
if (p) price = p.startsWith("$") ? p : `$${p}`;
} else {
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
}
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "kegncork",
name: "Keg N Cork",
host: "kegncork.com",
ua: defaultUa,
parseProducts: parseProductsKegNCork,
makePageUrl: makePageUrlKegNCork,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://kegncork.com/whisky/?page=1",
discoveryStartPage: 5,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kegncork.com/rum/?page=1",
discoveryStartPage: 1,
},
],
};
}
module.exports = { createStore };

189
src/stores/kwm.js Normal file
View file

@ -0,0 +1,189 @@
"use strict";
const { decodeHtml, stripTags, cleanText, extractHtmlAttr, escapeRe, extractFirstImgUrl } = require("../utils/html");
const { sanitizeName } = require("../utils/text");
const { normalizeCspc } = require("../utils/sku");
const { normalizeBaseUrl } = require("../utils/url");
function makePageUrlKWM(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) {
u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
return u.toString();
}
u.searchParams.set("page", String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
function extractDivBlocksByExactClass(html, className, maxBlocks) {
const out = [];
const s = String(html || "");
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
let m;
while ((m = re.exec(s))) {
if (out.length >= maxBlocks) break;
const startTagEnd = m.index + m[0].length;
let i = startTagEnd;
let depth = 1;
while (i < s.length) {
const nextOpen = s.indexOf("<div", i);
const nextClose = s.indexOf("</div>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) {
out.push(s.slice(m.index, nextClose + 6));
re.lastIndex = nextClose + 6;
break;
}
i = nextClose + 6;
}
}
return out;
}
function kwmExtractProductLinkHref(block) {
let m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
if (m && m[1]) return m[1].trim();
m =
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
return m && m[1] ? m[1].trim() : "";
}
function kwmExtractName(block) {
const dataItem = extractHtmlAttr(block, "data-item");
if (dataItem) return sanitizeName(dataItem);
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
if (m && m[1]) return sanitizeName(stripTags(m[1]));
return "";
}
function kwmExtractFirstDivByClass(html, className) {
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<div", i);
const nextClose = html.indexOf("</div>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 4;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 6;
}
return "";
}
function kwmExtractPrice(block) {
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
if (m && m[1]) {
const raw = String(m[1]).trim();
const n = raw.replace(/[^0-9.]/g, "");
if (n) return `$${Number(n).toFixed(2)}`;
}
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
if (!priceDiv) return "";
const cleaned = String(priceDiv).replace(/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
const txt = cleanText(decodeHtml(stripTags(cleaned)));
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
return "";
}
function parseProductsKWM(html, ctx) {
const s = String(html || "");
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
const items = [];
for (const block of blocks) {
if (/OUT OF STOCK/i.test(block)) continue;
const href = kwmExtractProductLinkHref(block);
if (!href) continue;
let url;
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
const name = kwmExtractName(block);
if (!name) continue;
const price = kwmExtractPrice(block);
const sku = normalizeCspc(url);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "kwm",
name: "Kensington Wine Market",
host: "kensingtonwinemarket.com",
ua: defaultUa,
parseProducts: parseProductsKWM,
makePageUrl: makePageUrlKWM,
categories: [
{
key: "scotch",
label: "Scotch",
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
discoveryStartPage: 200,
},
{
key: "rum",
label: "Rum",
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
discoveryStartPage: 20,
},
],
};
}
module.exports = { createStore };

View file

@ -0,0 +1,107 @@
"use strict";
const { decodeHtml, stripTags, cleanText, extractHtmlAttr, extractFirstImgUrl } = require("../utils/html");
const { normalizeCspc } = require("../utils/sku");
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
function allowMaltsExcludeGinTequilaMezcal(item) {
if (item && item.inStock === false) return false;
const cats = Array.isArray(item?.cats) ? item.cats : [];
const has = (re) => cats.some((c) => re.test(String(c || "")));
if (has(/\bgin\b/i)) return false;
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
return true;
}
function parseProductsMaltsAndGrains(html, ctx) {
const s = String(html || "");
const items = [];
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
for (const block of blocks) {
const classAttr = extractHtmlAttr(block, "class");
const isOut =
/\boutofstock\b/i.test(classAttr) ||
/ast-shop-product-out-of-stock/i.test(block) ||
/>\s*out of stock\s*</i.test(block);
if (isOut) continue;
const cats = [];
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
const v = String(m[1] || "").trim().toLowerCase();
if (v) cats.push(v);
}
let href =
block.match(
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i
)?.[1] ||
block.match(
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i
)?.[2] ||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
if (!href) continue;
let url = "";
try {
url = new URL(decodeHtml(href), base).toString();
} catch {
continue;
}
if (!/^https?:\/\//i.test(url)) continue;
const mTitle = block.match(
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
);
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
if (!name) continue;
const price = extractPriceFromTmbBlock(block);
const sku = normalizeCspc(
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
""
);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img, cats, inStock: true });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "maltsandgrains",
name: "Malts & Grains",
host: "maltsandgrains.store",
ua: defaultUa,
parseProducts: parseProductsMaltsAndGrains,
categories: [
{
key: "all-minus-gin-tequila-mezcal",
label: "All Spirits",
startUrl: "https://maltsandgrains.store/shop/page/1/",
discoveryStartPage: 15,
allowUrl: allowMaltsExcludeGinTequilaMezcal,
},
],
};
}
module.exports = { createStore };

View file

@ -0,0 +1,91 @@
"use strict";
const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html");
const { normalizeCspc } = require("../utils/sku");
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
function allowSierraSpiritsLiquorUrlRumWhisky(item) {
const u = item && item.url ? item.url : "";
const s = String(u || "").toLowerCase();
if (!/^https?:\/\/sierraspringsliquor\.ca\/shop\/spirits-liquor\/.+\/$/.test(s)) return false;
return /\/shop\/spirits-liquor\/.*(rum|whisk(?:e)?y).*/.test(s);
}
function parseProductsSierra(html, ctx) {
const items = [];
const blocks = String(html || "").split(/<div class="tmb\b/i);
ctx.logger?.dbg?.(
`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${String(html || "").length}`
);
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
for (let i = 1; i < blocks.length; i++) {
const block = '<div class="tmb' + blocks[i];
const titleMatch = block.match(
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i
);
if (!titleMatch) continue;
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
const name = cleanText(decodeHtml(titleMatch[2]));
if (!name) continue;
const price = extractPriceFromTmbBlock(block);
const sku = normalizeCspc(
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
""
);
const img = extractFirstImgUrl(block, base);
items.push({ name, price, url, sku, img });
}
const uniq = new Map();
for (const it of items) uniq.set(it.url, it);
return [...uniq.values()];
}
function createStore(defaultUa) {
return {
key: "sierrasprings",
name: "Sierra Springs",
host: "sierraspringsliquor.ca",
ua: defaultUa,
parseProducts: parseProductsSierra,
categories: [
{
key: "whisky",
label: "Whisky",
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
discoveryStartPage: 20,
},
{
key: "fine-rare",
label: "Fine & Rare",
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
discoveryStartPage: 1,
},
{
key: "spirits-liquor",
label: "Spirits / Liquor",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/page/2/",
discoveryStartPage: 15,
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
},
{
key: "spirits",
label: "Spirits",
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
discoveryStartPage: 1,
},
],
};
}
module.exports = { createStore, parseProductsSierra };

495
src/stores/strath.js Normal file
View file

@ -0,0 +1,495 @@
"use strict";
const { decodeHtml, stripTags, cleanText, extractFirstImgUrl } = require("../utils/html");
const { normalizeCspc } = require("../utils/sku");
const { humanBytes } = require("../utils/bytes");
const { padLeft, padRight } = require("../utils/string");
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
const { addCategoryResultToReport } = require("../tracker/report");
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function extractArticles(html) {
const s = String(html || "");
const parts = s.split(/<article\b/i);
if (parts.length <= 1) return [];
const out = [];
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
return out;
}
function normalizePrice(str) {
const s = String(str || "");
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
if (!m) return "";
const raw = m[0].replace(/\s+/g, "");
return raw.replace(/,/g, "");
}
function pickPriceFromArticle(articleHtml) {
const a = String(articleHtml || "");
const noMember = a.replace(
/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi,
" "
);
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
if (ins && ins[1]) return normalizePrice(ins[1]);
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
if (reg && reg[1]) return normalizePrice(reg[1]);
const priceDiv = noMember.match(
/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i
);
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
return normalizePrice(scope);
}
function extractProductIdFromArticle(articleHtml) {
const a = String(articleHtml || "");
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bpost-(\d{1,10})\b/i);
if (m && m[1]) return Number(m[1]);
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
if (m && m[1]) return Number(m[1]);
return 0;
}
function extractSkuFromArticle(articleHtml) {
const a = String(articleHtml || "");
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
if (m && m[1]) return m[1];
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
if (m && m[1]) return m[1];
return "";
}
function looksInStock(articleHtml) {
const a = String(articleHtml || "");
if (/\boutofstock\b/i.test(a)) return false;
if (/Currently\s+Unavailable/i.test(a)) return false;
if (/\binstock\b/i.test(a)) return true;
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
return /\binstock\b/i.test(a);
}
function parseProductFromArticle(articleHtml) {
const a = String(articleHtml || "");
if (!looksInStock(a)) return null;
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
if (!hrefM || !hrefM[1]) return null;
let url;
try {
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
} catch {
return null;
}
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
const name = cleanText([title, sub].filter(Boolean).join(" - "));
if (!name) return null;
const price = pickPriceFromArticle(a);
const productId = extractProductIdFromArticle(a);
const skuFromHtml = extractSkuFromArticle(a);
const fallbackSku = normalizeCspc(url) || "";
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
return {
name,
price,
url,
sku: skuFromHtml || fallbackSku,
productId,
img,
};
}
/* ---------------- Store API paging ---------------- */
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
const u = new URL(startUrl);
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
api.searchParams.set("order", "desc");
api.searchParams.set("orderby", "date");
const stock = u.searchParams.get("_sfm__stock_status");
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
const pr = u.searchParams.get("_sfm__regular_price");
if (pr) {
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
if (m) {
api.searchParams.set("min_price", m[1]);
api.searchParams.set("max_price", m[2]);
}
}
return api;
}
function hasCategorySlug(p, wanted) {
const w = String(wanted || "").trim().toLowerCase();
if (!w) return true;
const cats = Array.isArray(p?.categories) ? p.categories : [];
for (const c of cats) {
const slug = String(c?.slug || "").trim().toLowerCase();
if (slug === w) return true;
}
return false;
}
function normalizeProductUrl(p) {
const u = String(p?.permalink || p?.link || "").trim();
return u && u.startsWith("http") ? u : "";
}
function normalizeProductName(p) {
// Store API "name" can contain HTML entities like &#8211; and sometimes markup like <em>
const raw = String(p?.name || "");
return cleanText(decodeHtml(stripTags(raw)));
}
function normalizeProductImage(p) {
const imgs = Array.isArray(p?.images) ? p.images : [];
for (const im of imgs) {
if (!im) continue;
const raw =
(typeof im === "string" ? im : "") ||
(typeof im?.src === "string" ? im.src : "") ||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
(typeof im?.url === "string" ? im.url : "");
const s = String(raw || "").trim();
if (!s) continue;
if (s.startsWith("//")) return `https:${s}`;
return s;
}
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
if (!direct) return "";
return direct.startsWith("//") ? `https:${direct}` : direct;
}
function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
const mu = Number(minorUnit);
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
const v = String(valueStr || "").trim();
if (!/^\d+$/.test(v)) return "";
// Use integer math to avoid float rounding issues
const pad = "0".repeat(mu);
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
const frac = mu === 0 ? "" : s.slice(s.length - mu);
return mu === 0 ? whole : `${whole}.${frac}`;
}
function normalizeProductPrice(p) {
const prices = p?.prices;
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
if (prices && typeof prices === "object") {
const minor = prices.currency_minor_unit;
const sale = String(prices.sale_price || "").trim();
const regular = String(prices.regular_price || "").trim();
const chosen = sale || regular;
if (chosen) {
let numeric = chosen;
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
const converted = toMoneyStringFromMinorUnits(chosen, minor);
if (converted) numeric = converted;
}
const num = Number(numeric);
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
}
}
const raw = String(p?.price || p?.price_html || "").trim();
const norm = normalizePrice(raw);
return norm;
}
function normalizeProductSku(p) {
const sku = String(p?.sku || "").trim();
if (/^\d{6}$/.test(sku)) return sku;
return "";
}
function normalizeProductId(p) {
const id = Number(p?.id);
return Number.isFinite(id) ? id : 0;
}
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
const u = new URL(apiBaseUrl.toString());
u.searchParams.set("page", String(page));
u.searchParams.set("per_page", String(perPage));
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
method: "GET",
headers: {
Accept: "application/json",
Referer: ctx.cat.startUrl,
},
});
}
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
if (prevSize <= 0 || discSize <= 0) return false;
const ratio = discSize / Math.max(1, prevSize);
if (ratio >= 0.6) return false;
ctx.logger.warn?.(
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
);
if (prevDb && typeof prevDb.entries === "function") {
for (const [k, v] of prevDb.entries()) {
if (!discovered.has(k)) discovered.set(k, v);
}
return true;
}
return false;
}
async function scanCategoryStrath(ctx, prevDb, report) {
const t0 = Date.now();
// Listing HTML (seed + sanity)
let html = "";
let listingFinalUrl = ctx.cat.startUrl;
let listingStatus = 0;
let listingBytes = 0;
let listingMs = 0;
try {
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
html = r.text || "";
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
listingStatus = r.status || 0;
listingBytes = r.bytes || 0;
listingMs = r.ms || 0;
} catch (e) {
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
}
const discovered = new Map();
const listingArticles = extractArticles(html);
let listingItems = 0;
for (const art of listingArticles) {
const it = parseProductFromArticle(art);
if (it) {
discovered.set(it.url, it);
listingItems++;
}
}
ctx.logger.ok(
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
listingItems,
3
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`
);
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
const perPage = 100;
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase();
let donePages = 0;
let emptyMatchPages = 0;
for (let page = 1; page <= maxPagesCap; page++) {
let r;
try {
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
} catch (e) {
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
break;
}
const arr = Array.isArray(r?.json) ? r.json : [];
donePages++;
if (!arr.length) break;
let kept = 0;
for (const p of arr) {
const stock = String(p?.stock_status || "").toLowerCase();
if (stock && stock !== "instock") continue;
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
const url = normalizeProductUrl(p);
if (!url) continue;
const name = normalizeProductName(p);
if (!name) continue;
const price = normalizeProductPrice(p);
const sku = normalizeProductSku(p);
const productId = normalizeProductId(p);
const fallbackSku = sku || normalizeCspc(url) || "";
const prev = discovered.get(url) || null;
const img = normalizeProductImage(p) || (prev && prev.img) || "";
discovered.set(url, {
name,
price,
url,
sku: sku || fallbackSku,
productId,
img,
});
kept++;
}
ctx.logger.ok(
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
kept,
3
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
);
if (wantedSlug) {
if (kept === 0) emptyMatchPages++;
else emptyMatchPages = 0;
// If filter is tight (rum), stop after 2 empty pages in a row.
if (emptyMatchPages >= 2) break;
}
if (arr.length < perPage) break;
}
if (prevDb && typeof prevDb.size === "number") {
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
}
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
ctx.logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: 1 + Math.max(0, donePages),
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
function createStore(defaultUa) {
return {
key: "strath",
name: "Strath Liquor",
host: "www.strathliquor.com",
ua: defaultUa,
scanCategory: scanCategoryStrath,
categories: [
{
key: "whisky",
label: "Whisky",
apiCategorySlug: "whisky",
startUrl:
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
},
{
key: "spirits-rum",
label: "Spirits - Rum",
apiCategorySlug: "rum",
startUrl:
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
},
],
};
}
module.exports = { createStore };

View file

@ -0,0 +1,292 @@
"use strict";
const { humanBytes } = require("../utils/bytes");
const { padLeft, padRight, padLeftV, padRightV } = require("../utils/string");
const { normalizeBaseUrl, makePageUrlForCtx } = require("../utils/url");
const { parallelMapStaggered } = require("../utils/async");
const { ensureDir, dbPathFor, readDb, writeJsonAtomic, buildDbObject } = require("./db");
const { mergeDiscoveredIntoDb } = require("./merge");
const { addCategoryResultToReport } = require("./report");
const ACTION_W = 24;
const STATUS_W = 4;
const PROG_W = 4;
function kbStr(bytes) {
return humanBytes(bytes).padStart(8, " ");
}
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function pctStr(done, total) {
const pct = total ? Math.floor((done / total) * 100) : 0;
return `${padLeft(pct, 3)}%`;
}
function pageStr(i, total) {
const leftW = String(total).length;
return `${padLeft(i, leftW)}/${total}`;
}
function actionCell(s) {
return padRightV(String(s), ACTION_W);
}
function statusCell(logger, statusRaw, okBool) {
const cell = padRightV(String(statusRaw || ""), STATUS_W);
if (!statusRaw) return cell;
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
}
function progCell(v) {
const raw = String(v ?? "----");
return padLeftV(raw, PROG_W);
}
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`);
}
function makeCatPrefixers(stores, logger) {
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
function catPrefixRaw(store, cat) {
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
}
function catPrefixOut(store, cat) {
return logger.bold(catPrefixRaw(store, cat));
}
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
}
function buildCategoryContext(store, cat, catPrefixOutFn, config) {
const baseUrl = normalizeBaseUrl(cat.startUrl);
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
return {
store,
cat,
baseUrl,
dbFile,
catPrefixOut: catPrefixOutFn(store, cat),
};
}
function loadCategoryDb(logger, ctx) {
const prevDb = readDb(ctx.dbFile);
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
return prevDb;
}
function shouldTrackItem(ctx, finalUrl, item) {
const allow = ctx?.cat?.allowUrl;
if (typeof allow !== "function") return true;
return allow(item, ctx, finalUrl);
}
async function pageHasProducts(ctx, url) {
const { http, config, logger } = ctx;
try {
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
if (typeof ctx.store.isEmptyListingPage === "function") {
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
}
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const items = parser(text, ctx).length;
return { ok: items > 0, items };
} catch {
return { ok: false, items: 0 };
}
}
async function probePage(ctx, baseUrl, pageNum, state) {
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
const t0 = Date.now();
const r = await pageHasProducts(ctx, url);
const ms = Date.now() - t0;
const prog = discoverProg(state);
logProgressLine(
ctx.logger,
ctx,
`Discover probe page=${padLeftV(pageNum, 4)}`,
r.ok ? "OK" : "MISS",
Boolean(r.ok),
prog,
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
);
return r;
}
function discoverProg(state) {
if (!state || state.phase !== "binary") return " 0%";
const span = Math.max(1, state.hiMiss - state.loOk);
const initial = Math.max(1, state.binInitialSpan);
if (initial <= 1) return "100%";
const remaining = Math.max(0, span - 1);
const total = Math.max(1, initial - 1);
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
return `${padLeft(pct, 3)}%`;
}
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
state.phase = "binary";
state.loOk = loOk;
state.hiMiss = hiMiss;
state.binInitialSpan = Math.max(1, hiMiss - loOk);
while (hiMiss - loOk > 1) {
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
state.loOk = loOk;
state.hiMiss = hiMiss;
const pm = await probePage(ctx, baseUrl, mid, state);
if (pm.ok) loOk = mid;
else hiMiss = mid;
}
state.loOk = loOk;
state.hiMiss = hiMiss;
return loOk;
}
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
const p1 = await probePage(ctx, baseUrl, 1, state);
if (!p1.ok) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
return 1;
}
const g = Math.max(2, guess);
const pg = await probePage(ctx, baseUrl, g, state);
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
let lastOk = g;
while (true) {
const probe = lastOk + step;
const pr = await probePage(ctx, baseUrl, probe, state);
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
lastOk = probe;
if (lastOk > 5000) {
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`);
return lastOk;
}
}
}
async function discoverAndScanCategory(ctx, prevDb, report) {
const { logger, config } = ctx;
if (typeof ctx.store.scanCategory === "function") {
await ctx.store.scanCategory(ctx, prevDb, report);
return;
}
const t0 = Date.now();
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
const step = config.discoveryStep;
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
const pages = [];
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
let donePages = 0;
const perPageItems = await parallelMapStaggered(pages, config.concurrency, config.staggerMs, async (pageUrl, idx) => {
const pnum = idx + 1;
const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry(
pageUrl,
`page:${ctx.store.key}:${ctx.cat.key}:${pnum}`,
ctx.store.ua
);
const parser = ctx.store.parseProducts || config.defaultParseProducts;
const itemsRaw = parser(html, ctx, finalUrl);
const items = [];
for (const it of itemsRaw) {
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
}
donePages++;
logProgressLine(
logger,
ctx,
`Page ${pageStr(pnum, pages.length)}`,
status ? String(status) : "",
status >= 200 && status < 400,
pctStr(donePages, pages.length),
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
);
return items;
});
const discovered = new Map();
let dups = 0;
for (const arr of perPageItems) {
for (const it of arr) {
if (discovered.has(it.url)) dups++;
discovered.set(it.url, it);
}
}
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
const dbObj = buildDbObject(ctx, merged);
writeJsonAtomic(ctx.dbFile, dbObj);
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
const elapsed = Date.now() - t0;
logger.ok(
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
);
report.categories.push({
store: ctx.store.name,
label: ctx.cat.label,
key: ctx.cat.key,
dbFile: ctx.dbFile,
scannedPages: scanPages,
discoveredUnique: discovered.size,
newCount: newItems.length,
updatedCount: updatedItems.length,
removedCount: removedItems.length,
restoredCount: restoredItems.length,
elapsedMs: elapsed,
});
report.totals.newCount += newItems.length;
report.totals.updatedCount += updatedItems.length;
report.totals.removedCount += removedItems.length;
report.totals.restoredCount += restoredItems.length;
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
}
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };

128
src/tracker/db.js Normal file
View file

@ -0,0 +1,128 @@
"use strict";
const fs = require("fs");
const path = require("path");
const crypto = require("crypto");
const { normalizeCspc } = require("../utils/sku");
const { priceToNumber } = require("../utils/price");
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
}
function dbPathFor(key, baseUrl, dbDir) {
ensureDir(dbDir);
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
return path.join(dbDir, `${safeKey}__${hash}.json`);
}
function readDb(file) {
const byUrl = new Map();
try {
const txt = fs.readFileSync(file, "utf8");
const obj = JSON.parse(txt);
if (obj && Array.isArray(obj.items)) {
for (const it of obj.items) {
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
byUrl.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
img: String(it.img || it.image || it.thumb || "").trim(),
removed: Boolean(it.removed),
});
}
}
}
} catch {
// ignore missing or parse errors
}
return { byUrl };
}
function writeJsonAtomic(file, obj) {
ensureDir(path.dirname(file));
const tmp = `${file}.tmp`;
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
fs.renameSync(tmp, file);
}
function buildDbObject(ctx, merged) {
return {
version: 6,
store: ctx.store.host,
storeLabel: ctx.store.name,
category: ctx.cat.key,
categoryLabel: ctx.cat.label,
source: ctx.baseUrl,
updatedAt: new Date().toISOString(),
count: merged.size,
items: [...merged.values()]
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
.map((it) => ({
name: it.name,
price: it.price || "",
sku: normalizeCspc(it.sku) || "",
url: it.url,
img: String(it.img || "").trim(),
removed: Boolean(it.removed),
})),
};
}
function listDbFiles(dbDir) {
const out = [];
try {
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
const name = ent.name || "";
if (!name.endsWith(".json")) continue;
out.push(path.join(dbDir, name));
}
} catch {
// ignore
}
return out;
}
function buildCheapestSkuIndexFromAllDbs(dbDir) {
const cheapest = new Map(); // sku -> { storeLabel, priceNum }
for (const file of listDbFiles(dbDir)) {
try {
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
const storeLabel = String(obj?.storeLabel || obj?.store || "");
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (it?.removed) continue;
const sku = normalizeCspc(it?.sku || "");
if (!sku) continue;
const p = priceToNumber(it?.price || "");
if (!Number.isFinite(p) || p <= 0) continue;
const prev = cheapest.get(sku);
if (!prev || p < prev.priceNum) cheapest.set(sku, { storeLabel, priceNum: p });
}
} catch {
// ignore parse errors
}
}
return cheapest;
}
module.exports = {
ensureDir,
dbPathFor,
readDb,
writeJsonAtomic,
buildDbObject,
listDbFiles,
buildCheapestSkuIndexFromAllDbs,
};

100
src/tracker/merge.js Normal file
View file

@ -0,0 +1,100 @@
"use strict";
const { normalizeCspc } = require("../utils/sku");
const { normPrice } = require("../utils/price");
function normImg(v) {
const s = String(v || "").trim();
if (!s) return "";
if (/^data:/i.test(s)) return "";
return s;
}
function mergeDiscoveredIntoDb(prevDb, discovered) {
const merged = new Map(prevDb.byUrl);
const newItems = [];
const updatedItems = [];
const removedItems = [];
const restoredItems = [];
for (const [url, nowRaw] of discovered.entries()) {
const prev = prevDb.byUrl.get(url);
if (!prev) {
const now = {
...nowRaw,
sku: normalizeCspc(nowRaw.sku),
img: normImg(nowRaw.img),
removed: false,
};
newItems.push(now);
merged.set(url, now);
continue;
}
if (prev.removed) {
const now = {
...nowRaw,
sku: normalizeCspc(nowRaw.sku) || normalizeCspc(prev.sku),
img: normImg(nowRaw.img) || normImg(prev.img),
removed: false,
};
restoredItems.push({
url,
name: now.name || prev.name || "",
price: now.price || prev.price || "",
sku: now.sku || "",
});
merged.set(url, now);
continue;
}
const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price);
const prevSku = normalizeCspc(prev.sku);
const nowSku = normalizeCspc(nowRaw.sku) || prevSku;
const prevImg = normImg(prev.img);
let nowImg = normImg(nowRaw.img);
if (!nowImg) nowImg = prevImg;
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
const priceChanged = prevPrice !== nowPrice;
const skuChanged = prevSku !== nowSku;
const imgChanged = prevImg !== nowImg;
if (nameChanged || priceChanged || skuChanged || imgChanged) {
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
}
if (priceChanged) {
updatedItems.push({
url,
name: nowRaw.name || prev.name || "",
sku: nowSku || "",
oldPrice: prev.price || "",
newPrice: nowRaw.price || "",
});
}
}
for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue;
if (!prev.removed) {
const removed = { ...prev, removed: true };
merged.set(url, removed);
removedItems.push({
url,
name: prev.name || "",
price: prev.price || "",
sku: normalizeCspc(prev.sku) || "",
});
}
}
return { merged, newItems, updatedItems, removedItems, restoredItems };
}
module.exports = { mergeDiscoveredIntoDb };

240
src/tracker/report.js Normal file
View file

@ -0,0 +1,240 @@
"use strict";
const { C, color } = require("../utils/ansi");
const { padLeft, padRight } = require("../utils/string");
const { normalizeCspc } = require("../utils/sku");
const { priceToNumber, salePctOff } = require("../utils/price");
const { buildCheapestSkuIndexFromAllDbs } = require("./db");
function secStr(ms) {
const s = Number.isFinite(ms) ? ms / 1000 : 0;
const tenths = Math.round(s * 10) / 10;
let out;
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
else out = `${Math.round(s)}s`;
return out.padStart(7, " ");
}
function createReport() {
return {
startedAt: new Date(),
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
newItems: [],
updatedItems: [],
removedItems: [],
restoredItems: [],
};
}
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
const reportCatLabel = `${storeName} | ${catLabel}`;
for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
for (const it of restoredItems)
report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
for (const u of updatedItems) {
report.updatedItems.push({
catLabel: reportCatLabel,
name: u.name,
sku: u.sku || "",
oldPrice: u.oldPrice,
newPrice: u.newPrice,
url: u.url,
});
}
for (const it of removedItems)
report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
}
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
const paint = (s, code) => color(s, code, colorize);
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir);
const endedAt = new Date();
const durMs = endedAt - report.startedAt;
const storesSet = new Set(report.categories.map((c) => c.store));
const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0);
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
ln("");
ln(paint("========== REPORT ==========", C.bold));
ln(
paint("[OK] ", C.green) +
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
durMs
)}`
);
ln("");
ln(paint("Per-category summary:", C.bold));
const rows = report.categories.map((c) => ({
cat: `${c.store} | ${c.label}`,
pages: c.scannedPages,
uniq: c.discoveredUnique,
newC: c.newCount,
resC: c.restoredCount,
remC: c.removedCount,
updC: c.updatedCount,
ms: c.elapsedMs,
}));
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`);
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
for (const r of rows) {
ln(
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`
);
}
ln("");
const reportLabelW = Math.max(
16,
...report.newItems.map((x) => x.catLabel.length),
...report.restoredItems.map((x) => x.catLabel.length),
...report.updatedItems.map((x) => x.catLabel.length),
...report.removedItems.map((x) => x.catLabel.length)
);
function storeFromCatLabel(catLabel) {
return String(catLabel || "").split(" | ")[0] || "";
}
function skuInline(sku) {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
}
function cheaperAtInline(catLabel, sku, currentPriceStr) {
const s = normalizeCspc(sku);
if (!s) return "";
const best = cheapestSku.get(s);
if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel);
if (!curStore || best.storeLabel === curStore) return "";
const curP = priceToNumber(currentPriceStr);
if (!Number.isFinite(curP)) return "";
if (best.priceNum >= curP) return "";
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
}
function availableAtInline(catLabel, sku) {
const s = normalizeCspc(sku);
if (!s) return "";
const best = cheapestSku.get(s);
if (!best || !best.storeLabel) return "";
const curStore = storeFromCatLabel(catLabel);
if (curStore && best.storeLabel === curStore) return "";
return paint(` (Available at ${best.storeLabel})`, C.gray);
}
if (report.newItems.length) {
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = normalizeCspc(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || "");
ln(
`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("NEW LISTINGS (0)", C.bold));
ln("");
}
if (report.restoredItems.length) {
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = normalizeCspc(it.sku || "");
const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || "");
ln(
`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("RESTORED (0)", C.bold));
ln("");
}
if (report.removedItems.length) {
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
const sku = normalizeCspc(it.sku || "");
const availTag = availableAtInline(it.catLabel, sku);
ln(
`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`
);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
} else {
ln(paint("REMOVED (0)", C.bold));
ln("");
}
if (report.updatedItems.length) {
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) {
newP = paint(newP, C.red); // increase
} else if (newN < oldN) {
newP = paint(newP, C.green); // decrease
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else {
newP = paint(newP, C.cyan);
}
} else {
newP = paint(newP, C.cyan);
}
const sku = normalizeCspc(u.sku || "");
const cheapTag = cheaperAtInline(u.catLabel, sku, newRaw || "");
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`
);
ln(` ${paint(u.url, C.dim)}`);
}
ln("");
} else {
ln(paint("PRICE CHANGES (0)", C.bold));
ln("");
}
ln(paint("======== END REPORT ========", C.bold));
return out;
}
module.exports = { createReport, addCategoryResultToReport, renderFinalReport };

72
src/tracker/run_all.js Normal file
View file

@ -0,0 +1,72 @@
"use strict";
const { createReport } = require("./report");
const { parallelMapStaggered } = require("../utils/async");
const {
makeCatPrefixers,
buildCategoryContext,
loadCategoryDb,
discoverAndScanCategory,
} = require("./category_scan");
// Some sites will intermittently 403/429. We don't want a single category/store
// to abort the entire run. Log and continue.
function formatErr(e) {
if (!e) return "Unknown error";
if (typeof e === "string") return e;
if (e.stack) return e.stack;
return String(e);
}
async function runAllStores(stores, { config, logger, http }) {
const report = createReport();
const { catPrefixOut } = makeCatPrefixers(stores, logger);
logger.info(`Debug=on`);
logger.info(
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`
);
logger.info(
`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`
);
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
const workItems = [];
for (const store of stores) {
for (const cat of store.categories) {
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
const ctx = { ...baseCtx, config, logger, http };
const prevDb = loadCategoryDb(logger, ctx);
workItems.push({ ctx, prevDb });
}
}
await parallelMapStaggered(
workItems,
Math.min(config.categoryConcurrency, workItems.length),
0,
async (w) => {
try {
await discoverAndScanCategory(w.ctx, w.prevDb, report);
} catch (e) {
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
// Keep it loud in logs, but do not fail the entire run.
logger.warn(
`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`
);
// If you want failures surfaced in the final report later, you could also
// push a "failed category" record onto report.categories here.
}
return null;
}
);
return report;
}
module.exports = { runAllStores };

19
src/utils/ansi.js Normal file
View file

@ -0,0 +1,19 @@
"use strict";
const C = {
reset: "\x1b[0m",
dim: "\x1b[2m",
bold: "\x1b[1m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
cyan: "\x1b[36m",
gray: "\x1b[90m",
};
function color(s, code, enabled) {
if (!enabled) return String(s);
return String(code || "") + String(s) + C.reset;
}
module.exports = { C, color };

86
src/utils/args.js Normal file
View file

@ -0,0 +1,86 @@
"use strict";
function clampInt(v, def, min, max) {
if (def === null && (v === null || v === undefined)) return null;
const n = Number.parseInt(v ?? "", 10);
if (!Number.isFinite(n)) return def;
return Math.max(min, Math.min(max, n));
}
function parseArgs(argv) {
let debug = false;
let maxPages = null;
let concurrency = null;
let staggerMs = null;
let guess = null;
let step = null;
let dataDir = null;
let reportDir = null;
const positional = [];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--debug" || a === "-d") {
debug = true;
continue;
}
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
maxPages = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
concurrency = clampInt(argv[i + 1], null, 1, 64);
i++;
continue;
}
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
i++;
continue;
}
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
guess = clampInt(argv[i + 1], null, 1, 5000);
i++;
continue;
}
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
step = clampInt(argv[i + 1], null, 1, 500);
i++;
continue;
}
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
dataDir = String(argv[i + 1]);
i++;
continue;
}
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
reportDir = String(argv[i + 1]);
i++;
continue;
}
if (!String(a).startsWith("-")) positional.push(a);
}
if (maxPages === null) {
const cand = positional.find((x) => /^\d+$/.test(String(x)));
if (cand) {
const n = Number.parseInt(cand, 10);
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
}
}
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
}
module.exports = { clampInt, parseArgs };

26
src/utils/async.js Normal file
View file

@ -0,0 +1,26 @@
"use strict";
const { setTimeout: sleep } = require("timers/promises");
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
const out = new Array(arr.length);
let next = 0;
async function worker(workerId) {
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
while (true) {
const i = next++;
if (i >= arr.length) return;
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
out[i] = await fn(arr[i], i);
}
}
const w = Math.min(concurrency, arr.length);
const workers = [];
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
await Promise.all(workers);
return out;
}
module.exports = { parallelMapStaggered };

12
src/utils/bytes.js Normal file
View file

@ -0,0 +1,12 @@
"use strict";
function humanBytes(n) {
if (!Number.isFinite(n) || n <= 0) return "0B";
if (n < 1024) return `${n}B`;
const kb = n / 1024;
if (kb < 1024) return `${kb.toFixed(1)}KB`;
const mb = kb / 1024;
return `${mb.toFixed(1)}MB`;
}
module.exports = { humanBytes };

111
src/utils/html.js Normal file
View file

@ -0,0 +1,111 @@
"use strict";
function stripTags(s) {
return String(s).replace(/<[^>]*>/g, "");
}
function cleanText(s) {
return String(s)
.replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function decodeHtml(s) {
return String(s)
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#039;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&nbsp;/g, " ")
.replace(/&laquo;/g, "«")
.replace(/&raquo;/g, "»");
}
function escapeRe(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function extractHtmlAttr(html, attrName) {
const re = new RegExp(
`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`,
"i"
);
const m = re.exec(html);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
}
function pickFirstUrlFromSrcset(srcset) {
const s = String(srcset || "").trim();
if (!s) return "";
const first = (s.split(",")[0] || "").trim();
const url = (first.split(/\s+/)[0] || "").trim();
return url.replace(/^["']|["']$/g, "");
}
function normalizeMaybeRelativeUrl(raw, baseUrl) {
const r = String(raw || "").trim();
if (!r) return "";
let u = r;
if (u.startsWith("//")) u = `https:${u}`;
try {
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
} catch {
return u;
}
}
/**
* Best-effort thumbnail extractor for listing HTML blocks.
* Returns absolute URL when baseUrl is provided.
*/
function extractFirstImgUrl(html, baseUrl) {
const s = String(html || "");
const m = s.match(/<img\b[^>]*>/i);
if (!m) return "";
const tag = m[0];
const attrs = [
"data-src",
"data-lazy-src",
"data-original",
"data-srcset",
"srcset",
"src",
];
for (const a of attrs) {
let v = extractHtmlAttr(tag, a);
if (!v) continue;
v = decodeHtml(String(v)).trim();
if (!v) continue;
if (a.toLowerCase().includes("srcset")) v = pickFirstUrlFromSrcset(v);
v = String(v || "").trim();
if (!v) continue;
// Skip data URIs
if (/^data:/i.test(v)) continue;
const abs = normalizeMaybeRelativeUrl(v, baseUrl);
if (abs) return abs;
}
return "";
}
module.exports = {
stripTags,
cleanText,
decodeHtml,
escapeRe,
extractHtmlAttr,
extractFirstImgUrl,
};

21
src/utils/price.js Normal file
View file

@ -0,0 +1,21 @@
"use strict";
function normPrice(p) {
return String(p || "").trim().replace(/\s+/g, "");
}
function priceToNumber(p) {
const s = String(p || "");
const n = Number(s.replace(/[^0-9.]/g, ""));
return Number.isFinite(n) ? n : NaN;
}
function salePctOff(oldPriceStr, newPriceStr) {
const oldN = priceToNumber(oldPriceStr);
const newN = priceToNumber(newPriceStr);
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
if (newN >= oldN) return null;
return Math.round(((oldN - newN) / oldN) * 100);
}
module.exports = { normPrice, priceToNumber, salePctOff };

9
src/utils/sku.js Normal file
View file

@ -0,0 +1,9 @@
"use strict";
// Alberta CSPC / product code is 6 digits. Some stores label it "SKU".
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
module.exports = { normalizeCspc };

29
src/utils/string.js Normal file
View file

@ -0,0 +1,29 @@
"use strict";
function padRight(s, n) {
s = String(s);
return s.length >= n ? s : s + " ".repeat(n - s.length);
}
function padLeft(s, n) {
s = String(s);
return s.length >= n ? s : " ".repeat(n - s.length) + s;
}
function stripAnsi(s) {
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
}
function padRightV(s, n) {
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : s + " ".repeat(n - w);
}
function padLeftV(s, n) {
s = String(s);
const w = stripAnsi(s).length;
return w >= n ? s : " ".repeat(n - w) + s;
}
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };

13
src/utils/text.js Normal file
View file

@ -0,0 +1,13 @@
"use strict";
const { cleanText, decodeHtml } = require("./html");
function sanitizeName(s) {
return cleanText(decodeHtml(String(s || "")))
.replace(/['"’“”`´]/g, "")
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
.replace(/\s+/g, " ")
.trim();
}
module.exports = { sanitizeName };

16
src/utils/time.js Normal file
View file

@ -0,0 +1,16 @@
"use strict";
function ts(d = new Date()) {
const h = String(d.getHours()).padStart(2, "0");
const m = String(d.getMinutes()).padStart(2, "0");
const s = String(d.getSeconds()).padStart(2, "0");
const ms = String(d.getMilliseconds()).padStart(3, "0");
return `${h}:${m}:${s}.${ms}`;
}
function isoTimestampFileSafe(d = new Date()) {
// 2026-01-16T21-27-01Z
return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z");
}
module.exports = { ts, isoTimestampFileSafe };

50
src/utils/url.js Normal file
View file

@ -0,0 +1,50 @@
"use strict";
function normalizeBaseUrl(startUrl) {
try {
const u = new URL(startUrl);
u.hash = "";
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
return u.toString();
} catch {
return startUrl;
}
}
function makePageUrl(baseUrl, pageNum) {
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
const u = new URL(baseUrl);
if (!u.pathname.endsWith("/")) u.pathname += "/";
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
u.pathname = u.pathname + `page/${pageNum}/`;
u.hash = "";
return u.toString();
}
function makePageUrlForCtx(ctx, baseUrl, pageNum) {
const fn = ctx?.store?.makePageUrl;
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
}
function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
if (pageNum <= 1) u.searchParams.set(paramName, "1");
else u.searchParams.set(paramName, String(pageNum));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
const u = new URL(normalizeBaseUrl(baseUrl));
u.hash = "";
u.searchParams.set("page", String(Math.max(1, pageNum)));
u.search = `?${u.searchParams.toString()}`;
return u.toString();
}
module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage };

57
src/utils/woocommerce.js Normal file
View file

@ -0,0 +1,57 @@
"use strict";
const { decodeHtml, stripTags, cleanText } = require("./html");
/**
* Extracts the *effective* price from Woo price blocks.
* - If sale <ins> exists, uses the last <ins> (sale price)
* - Else uses the normal price bdi/span content.
*/
function extractPriceFromTmbBlock(block) {
const span = matchFirstPriceSpan(block);
if (!span) return "";
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
if (bdis.length) {
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
}
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
const text = cleanText(decodeHtml(stripTags(scope)));
const num = text.match(/(\d+(?:\.\d{2})?)/);
if (sym && num) return `${sym[1].trim()}${num[1]}`;
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
return m ? m[0].replace(/\s+/g, "") : "";
}
function matchFirstPriceSpan(html) {
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
const m = re.exec(html);
if (!m) return "";
const start = m.index + m[0].length;
let i = start;
let depth = 1;
while (i < html.length) {
const nextOpen = html.indexOf("<span", i);
const nextClose = html.indexOf("</span>", i);
if (nextClose === -1) break;
if (nextOpen !== -1 && nextOpen < nextClose) {
depth++;
i = nextOpen + 5;
continue;
}
depth--;
if (depth === 0) return html.slice(start, nextClose);
i = nextClose + 7;
}
return "";
}
module.exports = { extractPriceFromTmbBlock };

93
tools/build_viz_commits.js Executable file
View file

@ -0,0 +1,93 @@
#!/usr/bin/env node
"use strict";
const { execFileSync } = require("child_process");
const fs = require("fs");
const path = require("path");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function listDbFiles(dbDir) {
try {
return fs
.readdirSync(dbDir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dbDir, e.name));
} catch {
return [];
}
}
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function main() {
const repoRoot = process.cwd();
const dbDir = path.join(repoRoot, "data", "db");
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "db_commits.json");
fs.mkdirSync(outDir, { recursive: true });
const files = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
const payload = {
generatedAt: new Date().toISOString(),
branch: "data",
files: {},
};
// We want the viz to show ONE point per day (the most recent run that day).
// So we collapse multiple commits per day down to the newest commit for that date.
//
// With multiple runs/day, we also want to keep a long-ish daily history.
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
for (const rel of files.sort()) {
let txt = "";
try {
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
txt = runGit(["log", "--format=%H %cI", `-${MAX_RAW_PER_FILE}`, "--", rel]);
} catch {
continue;
}
const lines = txt.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
// git log is newest -> oldest.
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
const byDate = new Map(); // date -> { sha, date, ts }
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
if (!d) continue;
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
}
// Convert to oldest -> newest
let arr = [...byDate.values()].reverse();
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
if (arr.length > MAX_DAYS_PER_FILE) {
arr = arr.slice(arr.length - MAX_DAYS_PER_FILE);
}
payload.files[rel] = arr;
}
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFile} (${Object.keys(payload.files).length} files)\n`);
}
main();

105
tools/build_viz_index.js Executable file
View file

@ -0,0 +1,105 @@
#!/usr/bin/env node
"use strict";
const fs = require("fs");
const path = require("path");
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
}
function listJsonFiles(dir) {
const out = [];
try {
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
if (!ent.isFile()) continue;
if (!String(ent.name || "").endsWith(".json")) continue;
out.push(path.join(dir, ent.name));
}
} catch {
// ignore
}
return out;
}
function readJson(file) {
try {
return JSON.parse(fs.readFileSync(file, "utf8"));
} catch {
return null;
}
}
function main() {
const repoRoot = path.resolve(__dirname, "..");
const dbDir = path.join(repoRoot, "data", "db");
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "index.json");
ensureDir(outDir);
const items = [];
for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file);
if (!obj) continue;
const store = String(obj.store || "");
const storeLabel = String(obj.storeLabel || store || "");
const category = String(obj.category || "");
const categoryLabel = String(obj.categoryLabel || "");
const source = String(obj.source || "");
const updatedAt = String(obj.updatedAt || "");
const dbFile = path
.relative(repoRoot, file)
.replace(/\\/g, "/"); // for GitHub raw paths on Windows too
const arr = Array.isArray(obj.items) ? obj.items : [];
for (const it of arr) {
if (!it || it.removed) continue;
const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim();
const price = String(it.price || "").trim();
const url = String(it.url || "").trim();
const img = String(it.img || it.image || it.thumb || "").trim();
items.push({
sku,
name,
price,
url,
img,
store,
storeLabel,
category,
categoryLabel,
source,
updatedAt,
dbFile,
});
}
}
items.sort((a, b) => {
const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
const outObj = {
generatedAt: new Date().toISOString(),
count: items.length,
items,
};
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`);
}
module.exports = { main };
if (require.main === module) {
main();
}

393
tools/build_viz_recent.js Executable file
View file

@ -0,0 +1,393 @@
#!/usr/bin/env node
"use strict";
const { execFileSync } = require("child_process");
const fs = require("fs");
const path = require("path");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function gitShowJson(sha, filePath) {
try {
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
});
return JSON.parse(txt);
} catch {
return null;
}
}
function gitListTreeFiles(sha, dirRel) {
try {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
} catch {
return [];
}
}
function readJsonFileOrNull(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
} catch {
return null;
}
}
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function normPriceStr(p) {
return String(p ?? "").trim();
}
function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (!it) continue;
const sku = normalizeCspc(it.sku);
if (!sku) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
m.set(sku, {
sku,
name: String(it.name || ""),
price: String(it.price || ""),
url: String(it.url || ""),
removed,
});
}
return m;
}
function diffDb(prevObj, nextObj) {
const prevAll = mapBySku(prevObj, { includeRemoved: true });
const nextAll = mapBySku(nextObj, { includeRemoved: true });
const prevLive = mapBySku(prevObj, { includeRemoved: false });
const nextLive = mapBySku(nextObj, { includeRemoved: false });
const newItems = [];
const restoredItems = [];
const removedItems = [];
const priceChanges = [];
// NEW + RESTORED
for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku);
if (!had) {
newItems.push({ ...now });
continue;
}
if (had.removed) {
restoredItems.push({ ...now });
continue;
}
}
// REMOVED
for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) {
removedItems.push({ ...was });
}
}
// PRICE CHANGES
for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku);
if (!was) continue;
const a = normPriceStr(was.price);
const b = normPriceStr(now.price);
if (a === b) continue;
const aN = priceToNumber(a);
const bN = priceToNumber(b);
let kind = "price_change";
if (aN !== null && bN !== null) {
if (bN < aN) kind = "price_down";
else if (bN > aN) kind = "price_up";
else kind = "price_change";
}
priceChanges.push({
kind,
sku,
name: now.name || was.name || "",
oldPrice: a,
newPrice: b,
url: now.url || was.url || "",
});
}
return { newItems, restoredItems, removedItems, priceChanges };
}
function getHeadShaOrEmpty() {
try {
return runGit(["rev-parse", "--verify", "HEAD"]);
} catch {
return "";
}
}
function firstParentSha(sha) {
try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean);
// parts[0] is sha, parts[1] is first parent (if any)
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
}
}
function listChangedDbFiles(fromSha, toSha) {
// toSha can be "WORKTREE"
if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db");
}
if (!fromSha && toSha === "WORKTREE") {
// Fall back: list files on disk
try {
return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.posix.join("data/db", e.name));
} catch {
return [];
}
}
try {
if (toSha === "WORKTREE") {
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
}
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
} catch {
return [];
}
}
function logDbCommitsSince(sinceIso) {
try {
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
const arr = [];
for (const line of lines) {
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
if (!m) continue;
const sha = m[1];
const ts = m[2];
const d = dateOnly(ts);
arr.push({ sha, ts, date: d });
}
// newest -> oldest from git; convert to oldest -> newest
arr.reverse();
return arr;
} catch {
return [];
}
}
function main() {
const repoRoot = process.cwd();
const outDir = path.join(repoRoot, "viz", "data");
const outFile = path.join(outDir, "recent.json");
fs.mkdirSync(outDir, { recursive: true });
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 3));
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 500));
const now = new Date();
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
const sinceIso = since.toISOString();
const headSha = getHeadShaOrEmpty();
const items = [];
// Collect committed runs in the last N days (touching data/db)
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
// Build diff pairs:
// parent(of first in window) -> first
// then each consecutive commit -> next
// then HEAD -> WORKTREE (so this run shows up before the commit exists)
const pairs = [];
if (commits.length) {
const first = commits[0];
const parent = firstParentSha(first.sha);
pairs.push({
fromSha: parent || "",
toSha: first.sha,
ts: first.ts,
date: first.date,
});
for (let i = 1; i < commits.length; i++) {
pairs.push({
fromSha: commits[i - 1].sha,
toSha: commits[i].sha,
ts: commits[i].ts,
date: commits[i].date,
});
}
}
if (headSha) {
pairs.push({
fromSha: headSha,
toSha: "WORKTREE",
ts: now.toISOString(),
date: dateOnly(now.toISOString()),
});
}
for (const p of pairs) {
const fromSha = p.fromSha;
const toSha = p.toSha;
const ts = p.ts;
const d = p.date;
const files = listChangedDbFiles(fromSha, toSha);
if (!files.length) continue;
for (const file of files) {
let prevObj = null;
let nextObj = null;
if (toSha === "WORKTREE") {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
} else {
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
nextObj = gitShowJson(toSha, file);
}
if (!prevObj && !nextObj) continue;
const storeLabel = String(
nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || ""
);
const categoryLabel = String(
nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || ""
);
const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
for (const it of newItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "new",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of restoredItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "restored",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const it of removedItems) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: "removed",
sku: it.sku,
name: it.name,
storeLabel,
categoryLabel,
price: normPriceStr(it.price),
url: it.url,
dbFile: file,
});
}
for (const u of priceChanges) {
items.push({
ts,
date: d,
fromSha: fromSha || "",
toSha,
kind: u.kind,
sku: u.sku,
name: u.name,
storeLabel,
categoryLabel,
oldPrice: normPriceStr(u.oldPrice),
newPrice: normPriceStr(u.newPrice),
url: u.url,
dbFile: file,
});
}
}
}
// Newest first
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
// Keep file size under control (but still allows multiple runs/day over the window)
const trimmed = items.slice(0, maxItems);
const payload = {
generatedAt: now.toISOString(),
windowDays,
since: sinceIso,
headSha,
count: trimmed.length,
items: trimmed,
};
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
}
main();

318
tools/diff_report.js Executable file
View file

@ -0,0 +1,318 @@
#!/usr/bin/env node
"use strict";
const { execFileSync } = require("child_process");
const fs = require("fs");
const path = require("path");
const { C, color } = require("../src/utils/ansi");
const { padLeft, padRight } = require("../src/utils/string");
const { normalizeCspc } = require("../src/utils/sku");
const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price");
const { isoTimestampFileSafe } = require("../src/utils/time");
function runGit(args) {
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
}
function gitShowText(sha, filePath) {
try {
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
} catch {
return null;
}
}
function gitListDbFiles(sha, dbDirRel) {
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
return new Set(lines);
}
function parseJsonOrNull(txt) {
if (txt == null) return null;
try {
return JSON.parse(txt);
} catch {
return null;
}
}
function mapItemsByUrl(obj) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
m.set(it.url, {
name: String(it.name || ""),
price: String(it.price || ""),
sku: String(it.sku || ""),
url: it.url,
removed: Boolean(it.removed),
});
}
return m;
}
function buildDiffForDb(prevObj, nextObj) {
const prev = mapItemsByUrl(prevObj);
const next = mapItemsByUrl(nextObj);
const urls = new Set([...prev.keys(), ...next.keys()]);
const newItems = [];
const restoredItems = [];
const removedItems = [];
const updatedItems = [];
for (const url of urls) {
const a = prev.get(url);
const b = next.get(url);
const aExists = Boolean(a);
const bExists = Boolean(b);
const aRemoved = Boolean(a?.removed);
const bRemoved = Boolean(b?.removed);
if (!aExists && bExists && !bRemoved) {
newItems.push({ ...b });
continue;
}
if (aExists && aRemoved && bExists && !bRemoved) {
restoredItems.push({ ...b });
continue;
}
if (aExists && !aRemoved && (!bExists || bRemoved)) {
removedItems.push({ ...a });
continue;
}
if (aExists && bExists && !aRemoved && !bRemoved) {
const aP = normPrice(a.price);
const bP = normPrice(b.price);
if (aP !== bP) {
updatedItems.push({
name: b.name || a.name || "",
sku: normalizeCspc(b.sku || a.sku || ""),
oldPrice: a.price || "",
newPrice: b.price || "",
url,
});
}
}
}
return { newItems, restoredItems, removedItems, updatedItems };
}
function parseArgs(argv) {
const flags = new Set();
const kv = new Map();
const positional = [];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (!a.startsWith("-")) {
positional.push(a);
continue;
}
if (a === "--no-color") {
flags.add("no-color");
continue;
}
if (a === "--color") {
flags.add("color");
continue;
}
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
kv.set(a, argv[i + 1]);
i++;
continue;
}
flags.add(a);
}
const fromSha = positional[0] || "";
const toSha = positional[1] || "";
const dbDir = kv.get("--db-dir") || "data/db";
const outFile = kv.get("--out") || "";
return { fromSha, toSha, dbDir, outFile, flags };
}
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
const paint = (s, code) => color(s, code, colorize);
let out = "";
const ln = (s = "") => {
out += String(s) + "\n";
};
ln(paint("========== DIFF REPORT ==========", C.bold));
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
ln(
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`
);
ln("");
const rows = diffReport.categories;
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
ln(paint("Per-category summary:", C.bold));
ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`);
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
for (const r of rows) {
ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`);
}
ln("");
const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length));
const skuInline = (sku) => {
const s = normalizeCspc(sku);
return s ? paint(` ${s}`, C.gray) : "";
};
if (diffReport.newItems.length) {
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.restoredItems.length) {
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.removedItems.length) {
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
ln(` ${paint(it.url, C.dim)}`);
}
ln("");
}
if (diffReport.updatedItems.length) {
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
const oldRaw = u.oldPrice || "";
const newRaw = u.newPrice || "";
const oldN = priceToNumber(oldRaw);
const newN = priceToNumber(newRaw);
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
let newP = newRaw ? newRaw : "(no price)";
let offTag = "";
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
if (newN > oldN) newP = paint(newP, C.red);
else if (newN < oldN) {
newP = paint(newP, C.green);
const pct = salePctOff(oldRaw, newRaw);
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
} else newP = paint(newP, C.cyan);
} else newP = paint(newP, C.cyan);
ln(
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`
);
ln(` ${paint(u.url, C.dim)}`);
}
ln("");
}
ln(paint("======== END DIFF REPORT ========", C.bold));
return out;
}
async function main() {
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
if (!fromSha || !toSha) {
console.error(`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`);
process.exitCode = 2;
return;
}
// If user provides short SHAs, git accepts them.
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
const filesA = gitListDbFiles(fromSha, dbDir);
const filesB = gitListDbFiles(toSha, dbDir);
const files = new Set([...filesA, ...filesB]);
const diffReport = {
categories: [],
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
newItems: [],
restoredItems: [],
removedItems: [],
updatedItems: [],
};
for (const file of [...files].sort()) {
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?");
const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file));
const catLabelFull = `${storeLabel} | ${catLabel}`;
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
diffReport.categories.push({
catLabel: catLabelFull,
newCount: newItems.length,
restoredCount: restoredItems.length,
removedCount: removedItems.length,
updatedCount: updatedItems.length,
});
diffReport.totals.newCount += newItems.length;
diffReport.totals.restoredCount += restoredItems.length;
diffReport.totals.removedCount += removedItems.length;
diffReport.totals.updatedCount += updatedItems.length;
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
}
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
process.stdout.write(reportText);
const outPath = outFile
? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile))
: "";
if (outPath) {
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
}
}
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

10
tracker.js Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env node
"use strict";
const { main } = require("./src/main");
main().catch((e) => {
const msg = e && e.stack ? e.stack : String(e);
console.error(msg);
process.exitCode = 1;
});

841
viz/app.js Normal file
View file

@ -0,0 +1,841 @@
"use strict";
/**
* Hash routes:
* #/ search
* #/item/<sku> detail
*/
const $app = document.getElementById("app");
function esc(s) {
return String(s ?? "").replace(
/[&<>"']/g,
(c) =>
({
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
'"': "&quot;",
"'": "&#39;",
}[c])
);
}
function parsePriceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function prettyTs(iso) {
const s = String(iso || "");
if (!s) return "";
return s.replace("T", " ");
}
function makeUnknownSku(r) {
const store = String(r?.storeLabel || r?.store || "store").toLowerCase().replace(/[^a-z0-9]+/g, "-");
const url = String(r?.url || "");
const h = url ? btoa(unescape(encodeURIComponent(url))).replace(/=+$/g, "").slice(0, 16) : "no-url";
return `unknown:${store}:${h}`;
}
function fnv1a32(str) {
let h = 0x811c9dc5; // offset basis
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193); // FNV prime
}
// unsigned -> 8 hex chars
return (h >>> 0).toString(16).padStart(8, "0");
}
function makeSyntheticSku(r) {
const store = String(r?.storeLabel || r?.store || "store");
const url = String(r?.url || "");
const key = `${store}|${url}`;
return `u:${fnv1a32(key)}`; // stable per store+url
}
function keySkuForRow(r) {
const real = String(r?.sku || "").trim();
return real ? real : makeSyntheticSku(r);
}
function displaySku(key) {
return String(key || "").startsWith("u:") ? "unknown" : String(key || "");
}
// Normalize for search: lowercase, punctuation -> space, collapse spaces
function normSearchText(s) {
return String(s ?? "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function tokenizeQuery(q) {
const n = normSearchText(q);
return n ? n.split(" ").filter(Boolean) : [];
}
function inferGithubOwnerRepo() {
const host = location.hostname || "";
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
if (m) {
const owner = m[1];
const parts = (location.pathname || "/").split("/").filter(Boolean);
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
return { owner, repo };
}
return { owner: "brennanwilkes", repo: "spirit-tracker" };
}
async function fetchJson(url) {
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.json();
}
async function fetchText(url) {
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return await res.text();
}
function route() {
const h = location.hash || "#/";
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
if (parts.length === 0) return renderSearch();
if (parts[0] === "item" && parts[1]) return renderItem(parts[1]);
return renderSearch();
}
/* ---------------- Search ---------------- */
let INDEX = null;
let RECENT = null;
// persist search box value across navigation
const Q_LS_KEY = "stviz:v1:search:q";
function loadSavedQuery() {
try {
return localStorage.getItem(Q_LS_KEY) || "";
} catch {
return "";
}
}
function saveQuery(v) {
try {
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
} catch {}
}
async function loadIndex() {
if (INDEX) return INDEX;
INDEX = await fetchJson("./data/index.json");
return INDEX;
}
async function loadRecent() {
if (RECENT) return RECENT;
try {
RECENT = await fetchJson("./data/recent.json");
} catch {
RECENT = { count: 0, items: [] };
}
return RECENT;
}
function normImg(s) {
const v = String(s || "").trim();
if (!v) return "";
if (/^data:/i.test(v)) return "";
return v;
}
// Build one row per SKU + combined searchable text across all listings of that SKU
function aggregateBySku(listings) {
const bySku = new Map();
for (const r of listings) {
const sku = keySkuForRow(r);
const name = String(r?.name || "");
const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || "");
const img = normImg(r?.img || r?.image || r?.thumb || "");
const pNum = parsePriceToNumber(r?.price);
const pStr = String(r?.price || "");
let agg = bySku.get(sku);
if (!agg) {
agg = {
sku,
name: name || "",
img: "",
cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "",
stores: new Set(),
sampleUrl: url || "",
_searchParts: [],
searchText: "", // normalized blob
_imgByName: new Map(), // name -> img
_imgAny: "",
};
bySku.set(sku, agg);
}
if (storeLabel) agg.stores.add(storeLabel);
if (!agg.sampleUrl && url) agg.sampleUrl = url;
// Keep the first non-empty name (existing behavior), but make sure img matches that chosen name
if (!agg.name && name) {
agg.name = name;
if (img) agg.img = img;
} else if (agg.name && name === agg.name && img && !agg.img) {
agg.img = img;
}
if (img) {
if (!agg._imgAny) agg._imgAny = img;
if (name) agg._imgByName.set(name, img);
}
// cheapest
if (pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || "";
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
}
}
// search parts (include everything we might want to match)
agg._searchParts.push(sku);
if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel);
}
const out = [...bySku.values()];
for (const it of out) {
// Ensure thumbnail matches chosen name when possible
if (!it.img) {
const m = it._imgByName;
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
else it.img = it._imgAny || "";
}
delete it._imgByName;
delete it._imgAny;
// Ensure at least these are in the blob even if index rows are already aggregated
it._searchParts.push(it.sku);
it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || "");
it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts;
}
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
return out;
}
function matchesAllTokens(hayNorm, tokens) {
if (!tokens.length) return true;
for (const t of tokens) {
if (!hayNorm.includes(t)) return false;
}
return true;
}
function renderThumbHtml(imgUrl, cls = "thumb") {
const img = normImg(imgUrl);
if (!img) return `<div class="thumbPlaceholder"></div>`;
return `<img class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
}
function renderSearch() {
$app.innerHTML = `
<div class="container">
<div class="header">
<h1 class="h1">Spirit Tracker Viz</h1>
<div class="small">Search name / url / sku (word AND)</div>
</div>
<div class="card">
<input id="q" class="input" placeholder="e.g. bowmore sherry, 303821, sierrasprings..." autocomplete="off" />
<div id="results" class="list"></div>
</div>
</div>
`;
const $q = document.getElementById("q");
const $results = document.getElementById("results");
$q.value = loadSavedQuery();
let aggBySku = new Map();
function renderAggregates(items) {
if (!items.length) {
$results.innerHTML = `<div class="small">No matches.</div>`;
return;
}
const limited = items.slice(0, 80);
$results.innerHTML = limited
.map((it) => {
const storeCount = it.stores.size || 0;
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
const store = it.cheapestStoreLabel || ([...it.stores][0] || "Store");
return `
<div class="item" data-sku="${esc(it.sku)}">
<div class="itemRow">
<div class="thumbBox">
${renderThumbHtml(it.img)}
</div>
<div class="itemBody">
<div class="itemTop">
<div class="itemName">${esc(it.name || "(no name)")}</div>
<span class="badge mono">${esc(displaySku(it.sku))}</span>
</div>
<div class="meta">
<span class="mono">${esc(price)}</span>
<span class="badge">${esc(store)}${esc(plus)}</span>
</div>
<div class="meta">
<span class="mono">${esc(it.sampleUrl || "")}</span>
</div>
</div>
</div>
</div>
`;
})
.join("");
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
function renderRecent(recent) {
const items = Array.isArray(recent?.items) ? recent.items : [];
if (!items.length) {
$results.innerHTML = `<div class="small">Type to search…</div>`;
return;
}
const days = Number.isFinite(Number(recent?.windowDays)) ? Number(recent.windowDays) : 3;
const limited = items.slice(0, 140);
$results.innerHTML =
`<div class="small">Recently changed (last ${esc(days)} day(s)):</div>` +
limited
.map((r) => {
const kind =
r.kind === "new"
? "NEW"
: r.kind === "restored"
? "RESTORED"
: r.kind === "removed"
? "REMOVED"
: r.kind === "price_down"
? "PRICE ↓"
: r.kind === "price_up"
? "PRICE ↑"
: r.kind === "price_change"
? "PRICE"
: "CHANGE";
const priceLine =
r.kind === "new" || r.kind === "restored" || r.kind === "removed"
? `${esc(r.price || "")}`
: `${esc(r.oldPrice || "")}${esc(r.newPrice || "")}`;
const when = r.ts ? prettyTs(r.ts) : r.date || "";
const sku = String(r.sku || "");
const img = aggBySku.get(sku)?.img || "";
return `
<div class="item" data-sku="${esc(sku)}">
<div class="itemRow">
<div class="thumbBox">
${renderThumbHtml(img)}
</div>
<div class="itemBody">
<div class="itemTop">
<div class="itemName">${esc(r.name || "(no name)")}</div>
<span class="badge mono">${esc(displaySku(it.sku))}</span>
</div>
<div class="meta">
<span class="badge">${esc(kind)}</span>
<span class="badge">${esc(r.storeLabel || "")}</span>
<span class="mono">${esc(priceLine)}</span>
</div>
<div class="meta">
<span class="mono">${esc(when)}</span>
</div>
<div class="meta">
<span class="mono">${esc(r.url || "")}</span>
</div>
</div>
</div>
</div>
`;
})
.join("");
for (const el of Array.from($results.querySelectorAll(".item"))) {
el.addEventListener("click", () => {
const sku = el.getAttribute("data-sku") || "";
if (!sku) return;
saveQuery($q.value);
location.hash = `#/item/${encodeURIComponent(sku)}`;
});
}
}
let allAgg = [];
let indexReady = false;
function applySearch() {
if (!indexReady) return;
const tokens = tokenizeQuery($q.value);
if (!tokens.length) {
loadRecent()
.then(renderRecent)
.catch(() => {
$results.innerHTML = `<div class="small">Type to search…</div>`;
});
return;
}
const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens));
renderAggregates(matches);
}
$results.innerHTML = `<div class="small">Loading index…</div>`;
loadIndex()
.then((idx) => {
const listings = Array.isArray(idx.items) ? idx.items : [];
allAgg = aggregateBySku(listings);
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
indexReady = true;
$q.focus();
applySearch();
return loadRecent();
})
.then((recent) => {
if (!tokenizeQuery($q.value).length) renderRecent(recent);
})
.catch((e) => {
$results.innerHTML = `<div class="small">Failed to load: ${esc(e.message)}</div>`;
});
let t = null;
$q.addEventListener("input", () => {
saveQuery($q.value);
if (t) clearTimeout(t);
t = setTimeout(applySearch, 50);
});
}
/* ---------------- Detail (chart) ---------------- */
let CHART = null;
function destroyChart() {
if (CHART) {
CHART.destroy();
CHART = null;
}
}
async function githubListCommits({ owner, repo, branch, path }) {
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
const page1 = await fetchJson(u1);
if (Array.isArray(page1) && page1.length === 100) {
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
const page2 = await fetchJson(u2);
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
}
return Array.isArray(page1) ? page1 : [];
}
async function githubFetchFileAtSha({ owner, repo, sha, path }) {
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(
sha
)}/${path}`;
const txt = await fetchText(raw);
return JSON.parse(txt);
}
function findItemBySkuInDb(obj, sku) {
const items = Array.isArray(obj?.items) ? obj.items : [];
for (const it of items) {
if (!it || it.removed) continue;
const s = String(it.sku || "");
if (s === sku) return it;
}
return null;
}
function computeSuggestedY(values) {
const nums = values.filter((v) => Number.isFinite(v));
if (!nums.length) return { suggestedMin: undefined, suggestedMax: undefined };
let min = nums[0],
max = nums[0];
for (const n of nums) {
if (n < min) min = n;
if (n > max) max = n;
}
if (min === max) return { suggestedMin: min * 0.95, suggestedMax: max * 1.05 };
const pad = (max - min) * 0.08;
return { suggestedMin: Math.max(0, min - pad), suggestedMax: max + pad };
}
// Collapse commit list down to 1 commit per day (keep the most recent commit for that day)
function collapseCommitsToDaily(commits) {
// commits should be oldest -> newest.
const byDate = new Map();
for (const c of commits) {
const d = String(c?.date || "");
const sha = String(c?.sha || "");
if (!d || !sha) continue;
byDate.set(d, { sha, date: d, ts: String(c?.ts || "") });
}
return [...byDate.values()];
}
function cacheKeySeries(sku, dbFile, cacheBust) {
return `stviz:v2:series:${cacheBust}:${sku}:${dbFile}`;
}
function loadSeriesCache(sku, dbFile, cacheBust) {
try {
const raw = localStorage.getItem(cacheKeySeries(sku, dbFile, cacheBust));
if (!raw) return null;
const obj = JSON.parse(raw);
if (!obj || !Array.isArray(obj.points)) return null;
const savedAt = Number(obj.savedAt || 0);
if (!Number.isFinite(savedAt) || Date.now() - savedAt > 7 * 24 * 3600 * 1000) return null;
return obj;
} catch {
return null;
}
}
function saveSeriesCache(sku, dbFile, cacheBust, points) {
try {
localStorage.setItem(cacheKeySeries(sku, dbFile, cacheBust), JSON.stringify({ savedAt: Date.now(), points }));
} catch {}
}
let DB_COMMITS = null;
async function loadDbCommitsManifest() {
if (DB_COMMITS) return DB_COMMITS;
try {
DB_COMMITS = await fetchJson("./data/db_commits.json");
return DB_COMMITS;
} catch {
DB_COMMITS = null;
return null;
}
}
async function renderItem(sku) {
destroyChart();
$app.innerHTML = `
<div class="container">
<div class="topbar">
<button id="back" class="btn"> Back</button>
<span class="badge mono">${esc(displaySku(it.sku))}</span>
</div>
<div class="card detailCard">
<div class="detailHeader">
<div id="thumbBox" class="detailThumbBox"></div>
<div class="detailHeaderText">
<div id="title" class="h1">Loading</div>
<div id="links" class="links"></div>
<div class="small" id="status"></div>
</div>
</div>
<div class="chartBox">
<canvas id="chart"></canvas>
</div>
</div>
</div>
`;
document.getElementById("back").addEventListener("click", () => {
location.hash = "#/";
});
const $title = document.getElementById("title");
const $links = document.getElementById("links");
const $status = document.getElementById("status");
const $canvas = document.getElementById("chart");
const $thumbBox = document.getElementById("thumbBox");
const idx = await loadIndex();
const all = Array.isArray(idx.items) ? idx.items : [];
const cur = all.filter((x) => (String(x.sku || "").trim() || makeUnknownSku(x)) === String(sku || ""));
if (!cur.length) {
$title.textContent = "Item not found in current index";
$status.textContent = "Tip: index.json only includes current (non-removed) items.";
if ($thumbBox) $thumbBox.innerHTML = `<div class="thumbPlaceholder"></div>`;
return;
}
const nameCounts = new Map();
for (const r of cur) {
const n = String(r.name || "");
if (!n) continue;
nameCounts.set(n, (nameCounts.get(n) || 0) + 1);
}
let bestName = cur[0].name || `(SKU ${sku})`;
let bestCount = -1;
for (const [n, c] of nameCounts.entries()) {
if (c > bestCount) {
bestName = n;
bestCount = c;
}
}
$title.textContent = bestName;
// Pick image that matches the picked name (fallback: any)
let bestImg = "";
for (const r of cur) {
if (String(r?.name || "") === String(bestName || "") && normImg(r?.img)) {
bestImg = normImg(r.img);
break;
}
}
if (!bestImg) {
for (const r of cur) {
if (normImg(r?.img)) {
bestImg = normImg(r.img);
break;
}
}
}
if ($thumbBox) {
$thumbBox.innerHTML = bestImg ? renderThumbHtml(bestImg, "detailThumb") : `<div class="thumbPlaceholder"></div>`;
}
$links.innerHTML = cur
.slice()
.sort((a, b) => String(a.storeLabel || "").localeCompare(String(b.storeLabel || "")))
.map(
(r) =>
`<a href="${esc(r.url)}" target="_blank" rel="noopener noreferrer">${esc(r.storeLabel || r.store || "Store")}</a>`
)
.join("");
const gh = inferGithubOwnerRepo();
const owner = gh.owner;
const repo = gh.repo;
const branch = "data";
const byDbFile = new Map();
for (const r of cur) {
if (!r.dbFile) continue;
if (!byDbFile.has(r.dbFile)) byDbFile.set(r.dbFile, r);
}
const dbFiles = [...byDbFile.keys()].sort();
$status.textContent = `Loading history for ${dbFiles.length} store file(s)…`;
const manifest = await loadDbCommitsManifest();
const allDatesSet = new Set();
const series = [];
const fileJsonCache = new Map();
const cacheBust = String(idx.generatedAt || new Date().toISOString());
const today = dateOnly(idx.generatedAt || new Date().toISOString());
for (const dbFile of dbFiles) {
const row = byDbFile.get(dbFile);
const storeLabel = String(row.storeLabel || row.store || dbFile);
const cached = loadSeriesCache(sku, dbFile, cacheBust);
if (cached && Array.isArray(cached.points) && cached.points.length) {
const points = new Map();
const values = [];
for (const p of cached.points) {
const d = String(p.date || "");
const v = p.price === null ? null : Number(p.price);
if (!d) continue;
points.set(d, Number.isFinite(v) ? v : null);
if (Number.isFinite(v)) values.push(v);
allDatesSet.add(d);
}
series.push({ label: storeLabel, points, values });
continue;
}
let commits = [];
if (manifest && manifest.files && Array.isArray(manifest.files[dbFile])) {
commits = manifest.files[dbFile];
} else {
try {
let apiCommits = await githubListCommits({ owner, repo, branch, path: dbFile });
apiCommits = apiCommits.slice().reverse(); // oldest -> newest
commits = apiCommits
.map((c) => {
const sha = String(c?.sha || "");
const dIso = c?.commit?.committer?.date || c?.commit?.author?.date || "";
const d = dateOnly(dIso);
return sha && d ? { sha, date: d, ts: String(dIso || "") } : null;
})
.filter(Boolean);
} catch {
commits = [];
}
}
commits = collapseCommitsToDaily(commits);
const points = new Map();
const values = [];
const compactPoints = [];
const MAX_POINTS = 260; // daily points (~8-9 months)
if (commits.length > MAX_POINTS) commits = commits.slice(commits.length - MAX_POINTS);
for (const c of commits) {
const sha = String(c.sha || "");
const d = String(c.date || "");
if (!sha || !d) continue;
const ck = `${sha}|${dbFile}`;
let obj = fileJsonCache.get(ck) || null;
if (!obj) {
try {
obj = await githubFetchFileAtSha({ owner, repo, sha, path: dbFile });
fileJsonCache.set(ck, obj);
} catch {
continue;
}
}
const it = findItemBySkuInDb(obj, sku);
const pNum = it ? parsePriceToNumber(it.price) : null;
points.set(d, pNum);
if (pNum !== null) values.push(pNum);
allDatesSet.add(d);
compactPoints.push({ date: d, price: pNum });
}
// Always add "today" from the current index
const curP = parsePriceToNumber(row.price);
if (curP !== null) {
points.set(today, curP);
values.push(curP);
allDatesSet.add(today);
compactPoints.push({ date: today, price: curP });
}
saveSeriesCache(sku, dbFile, cacheBust, compactPoints);
series.push({ label: storeLabel, points, values });
}
const labels = [...allDatesSet].sort();
if (!labels.length) {
$status.textContent = "No historical points found.";
return;
}
const allVals = [];
for (const s of series) for (const v of s.values) allVals.push(v);
const ySug = computeSuggestedY(allVals);
const datasets = series.map((s) => ({
label: s.label,
data: labels.map((d) => (s.points.has(d) ? s.points.get(d) : null)),
spanGaps: false,
tension: 0.15,
}));
const ctx = $canvas.getContext("2d");
CHART = new Chart(ctx, {
type: "line",
data: { labels, datasets },
options: {
responsive: true,
maintainAspectRatio: false,
interaction: { mode: "nearest", intersect: false },
plugins: {
legend: { display: true },
tooltip: {
callbacks: {
label: (ctx) => {
const v = ctx.parsed?.y;
if (!Number.isFinite(v)) return `${ctx.dataset.label}: (no data)`;
return `${ctx.dataset.label}: $${v.toFixed(2)}`;
},
},
},
},
scales: {
x: {
ticks: { maxRotation: 0, autoSkip: true, maxTicksLimit: 12 },
grid: { display: false },
},
y: {
...ySug,
ticks: { callback: (v) => `$${Number(v).toFixed(0)}` },
},
},
},
});
$status.textContent = manifest
? `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.`
: `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`;
}
/* ---------------- boot ---------------- */
window.addEventListener("hashchange", route);
route();

16
viz/index.html Normal file
View file

@ -0,0 +1,16 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>Spirit Tracker Viz</title>
<link rel="stylesheet" href="./style.css" />
</head>
<body>
<div id="app"></div>
<!-- Chart.js (no build step) -->
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
<script src="./app.js"></script>
</body>
</html>

57
viz/serve.js Executable file
View file

@ -0,0 +1,57 @@
#!/usr/bin/env node
"use strict";
const http = require("http");
const fs = require("fs");
const path = require("path");
const root = path.resolve(__dirname);
const MIME = {
".html": "text/html; charset=utf-8",
".js": "application/javascript; charset=utf-8",
".css": "text/css; charset=utf-8",
".json": "application/json; charset=utf-8",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".svg": "image/svg+xml",
};
function safePath(urlPath) {
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
const joined = path.join(root, p);
const norm = path.normalize(joined);
if (!norm.startsWith(root)) return null;
return norm;
}
const server = http.createServer((req, res) => {
const u = req.url || "/";
let file = safePath(u === "/" ? "/index.html" : u);
if (!file) {
res.writeHead(400);
res.end("Bad path");
return;
}
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
file = path.join(file, "index.html");
}
fs.readFile(file, (err, buf) => {
if (err) {
res.writeHead(404);
res.end("Not found");
return;
}
const ext = path.extname(file);
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
res.end(buf);
});
});
const port = Number(process.env.PORT || 8080);
server.listen(port, "127.0.0.1", () => {
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
});

240
viz/style.css Normal file
View file

@ -0,0 +1,240 @@
:root {
--bg: #0b0d10;
--panel: #12161b;
--text: #e7edf3;
--muted: #9aa6b2;
--border: #242c35;
--accent: #7dd3fc;
}
* { box-sizing: border-box; }
body {
margin: 0;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji";
background: var(--bg);
color: var(--text);
}
a { color: var(--accent); text-decoration: none; }
a:hover { text-decoration: underline; }
.container {
max-width: 980px;
margin: 0 auto;
padding: 18px;
}
.header {
display: flex;
gap: 12px;
align-items: center;
justify-content: space-between;
margin-bottom: 14px;
}
.h1 {
font-size: 18px;
font-weight: 700;
margin: 0;
}
.card {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 12px;
padding: 14px;
}
.input {
width: 100%;
padding: 12px 12px;
border-radius: 10px;
border: 1px solid var(--border);
background: #0f1318;
color: var(--text);
outline: none;
}
.input:focus { border-color: #37566b; }
.list {
margin-top: 12px;
display: flex;
flex-direction: column;
gap: 10px;
}
.item {
border: 1px solid var(--border);
border-radius: 12px;
padding: 12px;
background: #0f1318;
cursor: pointer;
}
.item:hover { border-color: #2f3a46; }
.itemRow {
display: flex;
gap: 12px;
align-items: flex-start;
}
.thumbBox {
width: 64px;
height: 64px;
border-radius: 12px;
overflow: hidden;
border: 1px solid var(--border);
background: #0b0d10;
flex: 0 0 64px;
display: flex;
align-items: center;
justify-content: center;
}
.thumb {
width: 100%;
height: 100%;
object-fit: cover;
display: block;
}
.thumbPlaceholder {
width: 100%;
height: 100%;
background: #0b0d10;
}
.itemBody {
flex: 1;
min-width: 0;
}
.itemTop {
display: flex;
justify-content: space-between;
gap: 10px;
align-items: baseline;
}
.itemName {
font-weight: 700;
font-size: 14px;
}
.badge {
font-size: 12px;
color: var(--muted);
border: 1px solid var(--border);
padding: 2px 8px;
border-radius: 999px;
white-space: nowrap;
}
.meta {
margin-top: 6px;
display: flex;
gap: 10px;
flex-wrap: wrap;
color: var(--muted);
font-size: 12px;
}
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; }
.topbar {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 12px;
}
.btn {
border: 1px solid var(--border);
background: #0f1318;
color: var(--text);
border-radius: 10px;
padding: 10px 10px;
cursor: pointer;
}
.btn:hover { border-color: #2f3a46; }
.links {
display: flex;
gap: 10px;
flex-wrap: wrap;
margin: 8px 0 14px;
}
.small {
color: var(--muted);
font-size: 12px;
}
/* Detail view sizing */
.detailCard {
display: flex;
flex-direction: column;
gap: 10px;
}
.detailHeader {
display: flex;
gap: 12px;
align-items: flex-start;
}
.detailThumbBox {
width: 96px;
height: 96px;
border-radius: 14px;
overflow: hidden;
border: 1px solid var(--border);
background: #0b0d10;
flex: 0 0 96px;
display: flex;
align-items: center;
justify-content: center;
}
.detailThumb {
width: 100%;
height: 100%;
object-fit: cover;
display: block;
}
.detailHeaderText {
flex: 1;
min-width: 0;
}
/* Chart fills most of viewport, but stays usable on mobile */
.chartBox {
width: 100%;
height: min(72vh, 720px);
min-height: 320px;
border: 1px solid var(--border);
border-radius: 12px;
background: #0f1318;
padding: 10px;
}
@media (max-width: 640px) {
.container { padding: 14px; }
.thumbBox { width: 56px; height: 56px; flex: 0 0 56px; }
.detailThumbBox { width: 84px; height: 84px; flex: 0 0 84px; }
.chartBox {
height: 58vh;
min-height: 260px;
padding: 8px;
}
}
.chartBox canvas {
width: 100% !important;
height: 100% !important;
}