mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-03-25 09:25:51 +00:00
chore: initial code
This commit is contained in:
commit
470eb8ca56
46 changed files with 6127 additions and 0 deletions
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
node_modules/
|
||||
*.log
|
||||
|
||||
# Data & reports live on the data branch
|
||||
/data/
|
||||
/reports/
|
||||
|
||||
.worktrees/
|
||||
|
||||
# Generated viz artifacts live on the data branch
|
||||
viz/data/
|
||||
|
||||
# Keep cron log out of git even on data branch
|
||||
reports/cron.log
|
||||
10
bin/tracker.js
Executable file
10
bin/tracker.js
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const { main } = require("../src/main");
|
||||
|
||||
main().catch((e) => {
|
||||
const msg = e && e.stack ? e.stack : String(e);
|
||||
console.error(msg);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
87
scripts/bootstrap_clone.sh
Executable file
87
scripts/bootstrap_clone.sh
Executable file
|
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MAIN_BRANCH="${MAIN_BRANCH:-main}"
|
||||
DATA_BRANCH="${DATA_BRANCH:-data}"
|
||||
WORKTREE_DIR="${DATA_WORKTREE_DIR:-.worktrees/data}"
|
||||
RUN_DAILY="${RUN_DAILY:-0}" # set RUN_DAILY=1 to run at the end
|
||||
|
||||
# must be in a git repo root-ish
|
||||
git rev-parse --is-inside-work-tree >/dev/null
|
||||
|
||||
# ensure we have origin
|
||||
if ! git remote get-url origin >/dev/null 2>&1; then
|
||||
echo "ERROR: remote 'origin' not configured" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[bootstrap] fetching..."
|
||||
git fetch --prune origin
|
||||
|
||||
# ensure local main exists and tracks origin/main (best effort)
|
||||
if git show-ref --verify --quiet "refs/remotes/origin/$MAIN_BRANCH"; then
|
||||
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
|
||||
git checkout -q "$MAIN_BRANCH"
|
||||
git merge -q --ff-only "origin/$MAIN_BRANCH" || true
|
||||
else
|
||||
git checkout -q -b "$MAIN_BRANCH" "origin/$MAIN_BRANCH"
|
||||
fi
|
||||
git branch --set-upstream-to="origin/$MAIN_BRANCH" "$MAIN_BRANCH" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# ensure local data branch exists (from origin/data)
|
||||
if git show-ref --verify --quiet "refs/remotes/origin/$DATA_BRANCH"; then
|
||||
if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
|
||||
# fast-forward local data to origin/data when possible; otherwise leave it alone
|
||||
git checkout -q "$DATA_BRANCH"
|
||||
git merge -q --ff-only "origin/$DATA_BRANCH" || true
|
||||
else
|
||||
git checkout -q -b "$DATA_BRANCH" "origin/$DATA_BRANCH"
|
||||
fi
|
||||
git branch --set-upstream-to="origin/$DATA_BRANCH" "$DATA_BRANCH" >/dev/null 2>&1 || true
|
||||
else
|
||||
echo "ERROR: origin/$DATA_BRANCH not found. Did you push the data branch?" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# go back to main (so run_daily can merge main->data in the worktree cleanly)
|
||||
git checkout -q "$MAIN_BRANCH" || true
|
||||
|
||||
echo "[bootstrap] preparing worktree..."
|
||||
git worktree prune >/dev/null 2>&1 || true
|
||||
|
||||
# if dir exists but isn't a valid worktree checkout, remove it
|
||||
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
rm -rf "$WORKTREE_DIR"
|
||||
fi
|
||||
|
||||
# ensure worktree exists for data branch
|
||||
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
mkdir -p "$(dirname "$WORKTREE_DIR")"
|
||||
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
|
||||
fi
|
||||
|
||||
# keep worktree data branch in a reasonable state
|
||||
(
|
||||
cd "$WORKTREE_DIR"
|
||||
git fetch -q --prune origin || true
|
||||
git merge -q --ff-only "origin/$DATA_BRANCH" || true
|
||||
# merge main into data if main exists (best effort, matches your run_daily behavior)
|
||||
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
|
||||
git merge -q --no-edit "$MAIN_BRANCH" || true
|
||||
fi
|
||||
)
|
||||
|
||||
echo "[bootstrap] done."
|
||||
echo " main repo: $(pwd)"
|
||||
echo " data worktree: $(cd "$WORKTREE_DIR" && pwd)"
|
||||
|
||||
if [[ "$RUN_DAILY" == "1" ]]; then
|
||||
echo "[bootstrap] running daily..."
|
||||
NODE_BIN="${NODE_BIN:-$(command -v node || true)}"
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
echo "ERROR: node not found in PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
NODE_BIN="$NODE_BIN" bash scripts/run_daily.sh || true
|
||||
fi
|
||||
31
scripts/cron_setup.sh
Executable file
31
scripts/cron_setup.sh
Executable file
|
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
MAIN_BRANCH="${MAIN_BRANCH:-main}"
|
||||
DATA_BRANCH="${DATA_BRANCH:-data}"
|
||||
|
||||
NODE_BIN="${NODE_BIN:-}"
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
NODE_BIN="$(command -v node || true)"
|
||||
fi
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
echo "ERROR: node not found in PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$REPO_ROOT/reports"
|
||||
|
||||
# Default: run 4 times/day (every 6 hours). Override via:
|
||||
# CRON_SCHEDULE="15 */4 * * *" (example)
|
||||
CRON_SCHEDULE="${CRON_SCHEDULE:-0 */6 * * *}"
|
||||
|
||||
# Use a stable marker so we can replace old lines (including the previous "daily" one).
|
||||
MARKER="# spirit-tracker"
|
||||
CRON_LINE="$CRON_SCHEDULE NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER"
|
||||
|
||||
# Install (idempotent): remove any previous line with the marker, then append.
|
||||
{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE"; } | crontab -
|
||||
|
||||
echo "Installed cron job:"
|
||||
echo "$CRON_LINE"
|
||||
7
scripts/repo_reset.sh
Executable file
7
scripts/repo_reset.sh
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
rm -rf .git .worktrees
|
||||
rm -rf data/db reports
|
||||
bash scripts/repo_setup.sh --force
|
||||
|
||||
git remote add origin git@github.com:brennanwilkes/spirit-tracker.git
|
||||
git push -u origin main --force
|
||||
git push -u origin data --force
|
||||
95
scripts/repo_setup.sh
Executable file
95
scripts/repo_setup.sh
Executable file
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MAIN_BRANCH="${MAIN_BRANCH:-main}"
|
||||
DATA_BRANCH="${DATA_BRANCH:-data}"
|
||||
|
||||
FORCE=0
|
||||
if [[ "${1:-}" == "--force" ]]; then
|
||||
FORCE=1
|
||||
fi
|
||||
|
||||
if [[ -d .git ]]; then
|
||||
if [[ $FORCE -eq 1 ]]; then
|
||||
rm -rf .git
|
||||
else
|
||||
echo "ERROR: .git already exists. Remove it first or run: $0 --force" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
git init -q
|
||||
git branch -M "$MAIN_BRANCH"
|
||||
|
||||
mkdir -p data/db reports .worktrees viz/data
|
||||
|
||||
# Move existing DB snapshots (e.g. kwm__scotch__2b16b533.json) into data/db so
|
||||
# they don't end up committed on the main branch.
|
||||
shopt -s nullglob
|
||||
for f in *__*__*.json; do
|
||||
mv -f "$f" data/db/
|
||||
done
|
||||
shopt -u nullglob
|
||||
|
||||
# Ensure expected runtime dirs exist (they are ignored on main).
|
||||
mkdir -p data/db reports viz/data
|
||||
|
||||
# Move old root-level DB JSONs into data/db if present.
|
||||
shopt -s nullglob
|
||||
for f in *.json; do
|
||||
if [[ "$f" =~ __[0-9a-f]{8}\.json$ ]]; then
|
||||
mv -f "$f" "data/db/$f"
|
||||
fi
|
||||
done
|
||||
shopt -u nullglob
|
||||
|
||||
cat > .gitignore <<'GITIGNORE'
|
||||
node_modules/
|
||||
*.log
|
||||
|
||||
# Data & reports live on the data branch
|
||||
/data/
|
||||
/reports/
|
||||
|
||||
.worktrees/
|
||||
|
||||
# Generated viz artifacts live on the data branch
|
||||
viz/data/
|
||||
|
||||
# Keep cron log out of git even on data branch
|
||||
reports/cron.log
|
||||
GITIGNORE
|
||||
|
||||
# Make sure scripts/tools are executable (best effort)
|
||||
chmod +x bin/tracker.js 2>/dev/null || true
|
||||
chmod +x scripts/*.sh 2>/dev/null || true
|
||||
chmod +x tools/*.js 2>/dev/null || true
|
||||
|
||||
git add -A
|
||||
if git diff --cached --quiet; then
|
||||
echo "Nothing to commit on $MAIN_BRANCH (did you already commit?)" >&2
|
||||
else
|
||||
git commit -m "chore: initial code" -q
|
||||
fi
|
||||
|
||||
# Create data branch, un-ignore data and reports (and viz/data).
|
||||
if git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
|
||||
echo "Data branch already exists: $DATA_BRANCH" >&2
|
||||
else
|
||||
git checkout -b "$DATA_BRANCH" -q
|
||||
|
||||
cat > .gitignore <<'GITIGNORE'
|
||||
node_modules/
|
||||
*.log
|
||||
|
||||
# Keep cron log out of git
|
||||
reports/cron.log
|
||||
GITIGNORE
|
||||
|
||||
git add .gitignore
|
||||
git commit -m "chore: enable tracking of data + reports + viz on data branch" -q
|
||||
|
||||
git checkout "$MAIN_BRANCH" -q
|
||||
fi
|
||||
|
||||
echo "Repo setup complete. Main=$MAIN_BRANCH Data=$DATA_BRANCH"
|
||||
90
scripts/run_daily.sh
Executable file
90
scripts/run_daily.sh
Executable file
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
MAIN_BRANCH="${MAIN_BRANCH:-main}"
|
||||
DATA_BRANCH="${DATA_BRANCH:-data}"
|
||||
WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}"
|
||||
|
||||
NODE_BIN="${NODE_BIN:-}"
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
NODE_BIN="$(command -v node || true)"
|
||||
fi
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
echo "ERROR: node not found in PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
git rev-parse --is-inside-work-tree >/dev/null
|
||||
|
||||
# Ensure data branch exists.
|
||||
if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
|
||||
echo "ERROR: data branch not found: $DATA_BRANCH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create/repair worktree for data branch.
|
||||
git worktree prune >/dev/null 2>&1 || true
|
||||
|
||||
# If the dir exists but isn't a valid worktree checkout, remove it properly.
|
||||
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
rm -rf "$WORKTREE_DIR"
|
||||
fi
|
||||
|
||||
# If the worktree directory is missing, add it (force is safe after prune).
|
||||
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
mkdir -p "$(dirname "$WORKTREE_DIR")"
|
||||
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
|
||||
fi
|
||||
|
||||
cd "$WORKTREE_DIR"
|
||||
|
||||
# Keep data branch up-to-date with main (merge only when main moved).
|
||||
if git show-ref --verify --quiet "refs/heads/$MAIN_BRANCH"; then
|
||||
if ! git merge -q --no-edit "$MAIN_BRANCH"; then
|
||||
echo "ERROR: failed to merge $MAIN_BRANCH into $DATA_BRANCH" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Run tracker (writes data/db + a plain report file in reports/)
|
||||
"$NODE_BIN" bin/tracker.js
|
||||
|
||||
# Build viz artifacts on the data branch
|
||||
"$NODE_BIN" tools/build_viz_index.js
|
||||
"$NODE_BIN" tools/build_viz_commits.js
|
||||
"$NODE_BIN" tools/build_viz_recent.js
|
||||
|
||||
# Stage only data/report/viz outputs
|
||||
git add -A data/db reports viz/data
|
||||
|
||||
if git diff --cached --quiet; then
|
||||
echo "No data/report/viz changes to commit." >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Commit message: include the latest report as the commit body.
|
||||
ts="$(date -u +'%Y-%m-%dT%H:%M:%SZ')"
|
||||
|
||||
REPORT_FILE=""
|
||||
if compgen -G "reports/*.txt" > /dev/null; then
|
||||
REPORT_FILE="$(ls -1t reports/*.txt | head -n 1 || true)"
|
||||
fi
|
||||
|
||||
MSG_FILE="$(mktemp)"
|
||||
{
|
||||
echo "run: ${ts}"
|
||||
echo
|
||||
if [[ -n "$REPORT_FILE" && -f "$REPORT_FILE" ]]; then
|
||||
cat "$REPORT_FILE"
|
||||
else
|
||||
echo "(no report file found in reports/*.txt)"
|
||||
fi
|
||||
} > "$MSG_FILE"
|
||||
|
||||
git commit -F "$MSG_FILE" -q
|
||||
rm -f "$MSG_FILE"
|
||||
|
||||
git push -q
|
||||
56
scripts/serve_viz.sh
Executable file
56
scripts/serve_viz.sh
Executable file
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
DATA_BRANCH="${DATA_BRANCH:-data}"
|
||||
WORKTREE_DIR="${DATA_WORKTREE_DIR:-$REPO_ROOT/.worktrees/data}"
|
||||
|
||||
NODE_BIN="${NODE_BIN:-}"
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
NODE_BIN="$(command -v node || true)"
|
||||
fi
|
||||
if [[ -z "$NODE_BIN" ]]; then
|
||||
echo "ERROR: node not found in PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
git rev-parse --is-inside-work-tree >/dev/null
|
||||
|
||||
# Ensure data branch exists.
|
||||
if ! git show-ref --verify --quiet "refs/heads/$DATA_BRANCH"; then
|
||||
echo "ERROR: data branch not found: $DATA_BRANCH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create/repair worktree for data branch.
|
||||
git worktree prune >/dev/null 2>&1 || true
|
||||
|
||||
# If dir exists but isn't a valid worktree checkout, remove it.
|
||||
if [[ -e "$WORKTREE_DIR" && ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
rm -rf "$WORKTREE_DIR"
|
||||
fi
|
||||
|
||||
# If missing, add it.
|
||||
if [[ ! -e "$WORKTREE_DIR/.git" ]]; then
|
||||
mkdir -p "$(dirname "$WORKTREE_DIR")"
|
||||
git worktree add -f -q "$WORKTREE_DIR" "$DATA_BRANCH"
|
||||
fi
|
||||
|
||||
cd "$WORKTREE_DIR"
|
||||
|
||||
# Ensure viz artifacts exist (helpful if you haven't run daily yet)
|
||||
if [[ ! -f "viz/data/index.json" ]]; then
|
||||
echo "viz/data/index.json missing; building..." >&2
|
||||
"$NODE_BIN" tools/build_viz_index.js
|
||||
fi
|
||||
if [[ ! -f "viz/data/db_commits.json" ]]; then
|
||||
echo "viz/data/db_commits.json missing; building..." >&2
|
||||
"$NODE_BIN" tools/build_viz_commits.js
|
||||
fi
|
||||
if [[ ! -f "viz/data/recent.json" ]]; then
|
||||
echo "viz/data/recent.json missing; building..." >&2
|
||||
"$NODE_BIN" tools/build_viz_recent.js
|
||||
fi
|
||||
|
||||
exec "$NODE_BIN" viz/serve.js
|
||||
232
src/core/http.js
Normal file
232
src/core/http.js
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
"use strict";
|
||||
|
||||
const { setTimeout: sleep } = require("timers/promises");
|
||||
|
||||
class RetryableError extends Error {
|
||||
constructor(msg) {
|
||||
super(msg);
|
||||
this.name = "RetryableError";
|
||||
}
|
||||
}
|
||||
|
||||
function isRetryable(e) {
|
||||
if (!e) return false;
|
||||
if (e.name === "AbortError") return true;
|
||||
if (e instanceof RetryableError) return true;
|
||||
const msg = String(e.message || e);
|
||||
return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg);
|
||||
}
|
||||
|
||||
function backoffMs(attempt) {
|
||||
const base = Math.min(12000, 500 * Math.pow(2, attempt));
|
||||
const jitter = Math.floor(Math.random() * 400);
|
||||
return base + jitter;
|
||||
}
|
||||
|
||||
async function safeText(res) {
|
||||
try {
|
||||
return await res.text();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------- Cookies (simple jar) ---------------- */
|
||||
|
||||
// host -> Map(cookieName -> "name=value")
|
||||
function createCookieJar() {
|
||||
const jar = new Map();
|
||||
|
||||
function getHost(u) {
|
||||
try {
|
||||
return new URL(u).hostname || "";
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function parseSetCookieLine(line) {
|
||||
// "name=value; Path=/; Secure; HttpOnly; ..."
|
||||
const s = String(line || "").trim();
|
||||
if (!s) return null;
|
||||
const first = s.split(";")[0] || "";
|
||||
const eq = first.indexOf("=");
|
||||
if (eq <= 0) return null;
|
||||
const name = first.slice(0, eq).trim();
|
||||
const value = first.slice(eq + 1).trim();
|
||||
if (!name) return null;
|
||||
return { name, pair: `${name}=${value}` };
|
||||
}
|
||||
|
||||
function getSetCookieArray(headers) {
|
||||
// Node/undici may support headers.getSetCookie()
|
||||
if (headers && typeof headers.getSetCookie === "function") {
|
||||
try {
|
||||
const arr = headers.getSetCookie();
|
||||
return Array.isArray(arr) ? arr : [];
|
||||
} catch {
|
||||
// fall through
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: single combined header (may lose multiples, but better than nothing)
|
||||
const one = headers?.get ? headers.get("set-cookie") : null;
|
||||
if (!one) return [];
|
||||
|
||||
// Best-effort split. This is imperfect with Expires=... commas, but OK for most WP cookies.
|
||||
// If this causes issues later, we can replace with a more robust splitter.
|
||||
return String(one)
|
||||
.split(/,(?=[^;,]*=)/g)
|
||||
.map((x) => x.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function storeFromResponse(url, res) {
|
||||
const host = getHost(res?.url || url);
|
||||
if (!host) return;
|
||||
|
||||
const lines = getSetCookieArray(res?.headers);
|
||||
if (!lines.length) return;
|
||||
|
||||
let m = jar.get(host);
|
||||
if (!m) {
|
||||
m = new Map();
|
||||
jar.set(host, m);
|
||||
}
|
||||
|
||||
for (const line of lines) {
|
||||
const c = parseSetCookieLine(line);
|
||||
if (!c) continue;
|
||||
m.set(c.name, c.pair);
|
||||
}
|
||||
}
|
||||
|
||||
function cookieHeaderFor(url) {
|
||||
const host = getHost(url);
|
||||
if (!host) return "";
|
||||
const m = jar.get(host);
|
||||
if (!m || m.size === 0) return "";
|
||||
return [...m.values()].join("; ");
|
||||
}
|
||||
|
||||
return { storeFromResponse, cookieHeaderFor };
|
||||
}
|
||||
|
||||
/* ---------------- HTTP client ---------------- */
|
||||
|
||||
function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
||||
let inflight = 0;
|
||||
let reqSeq = 0;
|
||||
|
||||
const cookieJar = createCookieJar();
|
||||
|
||||
function inflightStr() {
|
||||
return `inflight=${inflight}`;
|
||||
}
|
||||
|
||||
async function fetchWithRetry(
|
||||
url,
|
||||
tag,
|
||||
ua,
|
||||
{ mode = "text", method = "GET", headers = {}, body = null, cookies = true } = {}
|
||||
) {
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
const reqId = ++reqSeq;
|
||||
const start = Date.now();
|
||||
|
||||
inflight++;
|
||||
logger?.dbg?.(
|
||||
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`
|
||||
);
|
||||
|
||||
try {
|
||||
const ctrl = new AbortController();
|
||||
const t = setTimeout(() => ctrl.abort(), timeoutMs);
|
||||
|
||||
const cookieHdr =
|
||||
cookies && !Object.prototype.hasOwnProperty.call(headers, "Cookie") && !Object.prototype.hasOwnProperty.call(headers, "cookie")
|
||||
? cookieJar.cookieHeaderFor(url)
|
||||
: "";
|
||||
|
||||
const res = await fetch(url, {
|
||||
method,
|
||||
redirect: "follow",
|
||||
headers: {
|
||||
"user-agent": ua || defaultUa,
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
...(mode === "text"
|
||||
? { accept: "text/html,application/xhtml+xml", "cache-control": "no-cache" }
|
||||
: { accept: "application/json, text/plain, */*" }),
|
||||
...(cookieHdr ? { cookie: cookieHdr } : {}),
|
||||
...headers,
|
||||
},
|
||||
body,
|
||||
signal: ctrl.signal,
|
||||
}).finally(() => clearTimeout(t));
|
||||
|
||||
const status = res.status;
|
||||
const finalUrl = res.url || url;
|
||||
|
||||
// capture cookies for subsequent requests to same host
|
||||
if (cookies) cookieJar.storeFromResponse(url, res);
|
||||
|
||||
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} finalUrl=${finalUrl}`);
|
||||
|
||||
if (status === 429 || status === 408 || (status >= 500 && status <= 599)) {
|
||||
throw new RetryableError(`HTTP ${status}`);
|
||||
}
|
||||
if (status >= 400) {
|
||||
const bodyTxt = await safeText(res);
|
||||
throw new Error(
|
||||
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`
|
||||
);
|
||||
}
|
||||
|
||||
if (mode === "json") {
|
||||
const txt = await res.text();
|
||||
const ms = Date.now() - start;
|
||||
let json;
|
||||
try {
|
||||
json = JSON.parse(txt);
|
||||
} catch (e) {
|
||||
throw new RetryableError(`Bad JSON: ${e?.message || e}`);
|
||||
}
|
||||
return { json, ms, bytes: txt.length, status, finalUrl };
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
if (!text || text.length < 200) throw new RetryableError(`Short HTML bytes=${text.length}`);
|
||||
|
||||
const ms = Date.now() - start;
|
||||
return { text, ms, bytes: text.length, status, finalUrl };
|
||||
} catch (e) {
|
||||
const retryable = isRetryable(e);
|
||||
logger?.dbg?.(
|
||||
`REQ#${reqId} ERROR ${tag} retryable=${retryable} err=${e?.message || e} (${inflightStr()})`
|
||||
);
|
||||
|
||||
if (!retryable || attempt === maxRetries) throw e;
|
||||
|
||||
const delay = backoffMs(attempt);
|
||||
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
|
||||
await sleep(delay);
|
||||
} finally {
|
||||
inflight--;
|
||||
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
|
||||
}
|
||||
}
|
||||
throw new Error("unreachable");
|
||||
}
|
||||
|
||||
function fetchTextWithRetry(url, tag, ua, opts) {
|
||||
return fetchWithRetry(url, tag, ua, { mode: "text", ...(opts || {}) });
|
||||
}
|
||||
|
||||
function fetchJsonWithRetry(url, tag, ua, opts) {
|
||||
return fetchWithRetry(url, tag, ua, { mode: "json", ...(opts || {}) });
|
||||
}
|
||||
|
||||
return { fetchTextWithRetry, fetchJsonWithRetry, inflightStr };
|
||||
}
|
||||
|
||||
module.exports = { createHttpClient, RetryableError };
|
||||
58
src/core/logger.js
Normal file
58
src/core/logger.js
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
"use strict";
|
||||
|
||||
const { C, color } = require("../utils/ansi");
|
||||
const { ts } = require("../utils/time");
|
||||
|
||||
function createLogger({ debug = false, colorize: wantColor = true } = {}) {
|
||||
const isTTY = Boolean(process.stdout && process.stdout.isTTY);
|
||||
const enabled = Boolean(wantColor && isTTY);
|
||||
|
||||
function ok(msg) {
|
||||
console.log(color(`[OK ${ts()}] `, C.green, enabled) + String(msg));
|
||||
}
|
||||
|
||||
function warn(msg) {
|
||||
console.log(color(`[WARN ${ts()}] `, C.yellow, enabled) + String(msg));
|
||||
}
|
||||
|
||||
function err(msg) {
|
||||
console.error(color(`[ERR ${ts()}] `, C.red, enabled) + String(msg));
|
||||
}
|
||||
|
||||
function info(msg) {
|
||||
if (debug) console.log(color(`[INFO ${ts()}] `, C.cyan, enabled) + String(msg));
|
||||
}
|
||||
|
||||
function dbg(msg) {
|
||||
if (debug) console.log(color(`[DEBUG ${ts()}] `, C.gray, enabled) + String(msg));
|
||||
}
|
||||
|
||||
function dim(s) {
|
||||
return color(s, C.dim, enabled);
|
||||
}
|
||||
|
||||
function bold(s) {
|
||||
return color(s, C.bold, enabled);
|
||||
}
|
||||
|
||||
function paint(s, code) {
|
||||
return color(s, code, enabled);
|
||||
}
|
||||
|
||||
return {
|
||||
debug,
|
||||
isTTY,
|
||||
colorize: enabled,
|
||||
C,
|
||||
ok,
|
||||
warn,
|
||||
err,
|
||||
info,
|
||||
dbg,
|
||||
dim,
|
||||
bold,
|
||||
color: paint,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createLogger };
|
||||
82
src/main.js
Normal file
82
src/main.js
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const { parseArgs, clampInt } = require("./utils/args");
|
||||
const { isoTimestampFileSafe } = require("./utils/time");
|
||||
|
||||
const { createLogger } = require("./core/logger");
|
||||
const { createHttpClient } = require("./core/http");
|
||||
|
||||
const { createStores, parseProductsSierra } = require("./stores");
|
||||
const { runAllStores } = require("./tracker/run_all");
|
||||
const { renderFinalReport } = require("./tracker/report");
|
||||
const { ensureDir } = require("./tracker/db");
|
||||
|
||||
const DEFAULT_UA =
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36";
|
||||
|
||||
function resolveDir(p, fallback) {
|
||||
const v = String(p || "").trim();
|
||||
if (!v) return fallback;
|
||||
return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (typeof fetch !== "function") {
|
||||
throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). ");
|
||||
}
|
||||
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
const logger = createLogger({ debug: args.debug, colorize: true });
|
||||
|
||||
const config = {
|
||||
debug: args.debug,
|
||||
maxPages: args.maxPages,
|
||||
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
|
||||
staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
|
||||
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
|
||||
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
|
||||
discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
|
||||
discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
|
||||
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
|
||||
defaultUa: DEFAULT_UA,
|
||||
defaultParseProducts: parseProductsSierra,
|
||||
dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")),
|
||||
reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")),
|
||||
};
|
||||
|
||||
ensureDir(config.dbDir);
|
||||
ensureDir(config.reportDir);
|
||||
|
||||
const http = createHttpClient({ maxRetries: config.maxRetries, timeoutMs: config.timeoutMs, defaultUa: config.defaultUa, logger });
|
||||
const stores = createStores({ defaultUa: config.defaultUa });
|
||||
|
||||
const report = await runAllStores(stores, { config, logger, http });
|
||||
|
||||
const reportTextColor = renderFinalReport(report, { dbDir: config.dbDir, colorize: logger.colorize });
|
||||
process.stdout.write(reportTextColor);
|
||||
|
||||
const reportTextPlain = renderFinalReport(report, { dbDir: config.dbDir, colorize: false });
|
||||
const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`);
|
||||
try {
|
||||
fs.writeFileSync(file, reportTextPlain, "utf8");
|
||||
logger.ok(`Report saved: ${logger.dim(file)}`);
|
||||
} catch (e) {
|
||||
logger.warn(`Report save failed: ${e?.message || e}`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { main };
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch((e) => {
|
||||
const msg = e && e.stack ? e.stack : String(e);
|
||||
// no logger here; keep simple
|
||||
console.error(msg);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
}
|
||||
332
src/stores/bcl.js
Normal file
332
src/stores/bcl.js
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
"use strict";
|
||||
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { humanBytes } = require("../utils/bytes");
|
||||
const { padLeft, padRight } = require("../utils/string");
|
||||
|
||||
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
|
||||
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||
const { addCategoryResultToReport } = require("../tracker/report");
|
||||
|
||||
function kbStr(bytes) {
|
||||
return humanBytes(bytes).padStart(8, " ");
|
||||
}
|
||||
|
||||
function secStr(ms) {
|
||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||
const tenths = Math.round(s * 10) / 10;
|
||||
let out;
|
||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||
else out = `${Math.round(s)}s`;
|
||||
return out.padStart(7, " ");
|
||||
}
|
||||
|
||||
function pageStr(i, total) {
|
||||
const leftW = String(total).length;
|
||||
return `${padLeft(i, leftW)}/${total}`;
|
||||
}
|
||||
|
||||
function pctStr(done, total) {
|
||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||
return `${padLeft(pct, 3)}%`;
|
||||
}
|
||||
|
||||
function cad(n) {
|
||||
const x = Number(n);
|
||||
if (!Number.isFinite(x)) return "";
|
||||
return `$${x.toFixed(2)}`;
|
||||
}
|
||||
|
||||
function asNumber(n) {
|
||||
if (n == null) return NaN;
|
||||
if (typeof n === "number") return n;
|
||||
const t = String(n).trim();
|
||||
if (!t) return NaN;
|
||||
const x = Number(t.replace(/[^0-9.]/g, ""));
|
||||
return x;
|
||||
}
|
||||
|
||||
function bclTotalHits(json) {
|
||||
const t = json?.hits?.total;
|
||||
if (typeof t === "number") return t;
|
||||
if (t && typeof t.value === "number") return t.value; // ES-style
|
||||
return 0;
|
||||
}
|
||||
|
||||
function bclIsInStock(src) {
|
||||
// Prefer explicit text if present (matches site UI)
|
||||
const candidates = [
|
||||
src?.availability,
|
||||
src?.availabilityText,
|
||||
src?.availabilityStatus,
|
||||
src?.availability_status,
|
||||
src?.stockStatus,
|
||||
src?.stock_status,
|
||||
src?.status,
|
||||
src?.statusText,
|
||||
]
|
||||
.map((v) => (v == null ? "" : String(v)))
|
||||
.filter(Boolean);
|
||||
|
||||
for (const s of candidates) {
|
||||
if (/out of stock/i.test(s)) return false;
|
||||
if (/\bin stock\b/i.test(s)) return true;
|
||||
}
|
||||
|
||||
// Fallback only: units
|
||||
const units = Number(src?.availableUnits);
|
||||
if (Number.isFinite(units)) return units > 0;
|
||||
|
||||
// If we can't tell, keep it (better than dropping 90% of a category)
|
||||
return true;
|
||||
}
|
||||
|
||||
function bclNormalizeAbsUrl(raw) {
|
||||
const s = String(raw || "").trim();
|
||||
if (!s) return "";
|
||||
if (s.startsWith("//")) return `https:${s}`;
|
||||
if (/^https?:\/\//i.test(s)) return s;
|
||||
try {
|
||||
return new URL(s, "https://www.bcliquorstores.com/").toString();
|
||||
} catch {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
function bclPickImage(src) {
|
||||
const cands = [
|
||||
src?.imageUrl,
|
||||
src?.imageURL,
|
||||
src?.image,
|
||||
src?.thumbnail,
|
||||
src?.thumbnailUrl,
|
||||
src?.thumbnailURL,
|
||||
src?.primaryImage,
|
||||
src?.primaryImageUrl,
|
||||
];
|
||||
|
||||
for (const c of cands) {
|
||||
if (typeof c === "string" && c.trim()) return bclNormalizeAbsUrl(c);
|
||||
}
|
||||
|
||||
const arrs = [src?.images, src?.imageUrls, src?.image_urls];
|
||||
for (const a of arrs) {
|
||||
if (!Array.isArray(a) || !a.length) continue;
|
||||
const v = a[0];
|
||||
if (typeof v === "string" && v.trim()) return bclNormalizeAbsUrl(v);
|
||||
if (v && typeof v === "object") {
|
||||
const s = String(v.src || v.url || "").trim();
|
||||
if (s) return bclNormalizeAbsUrl(s);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function bclHitToItem(hit) {
|
||||
const src = hit?._source || null;
|
||||
if (!src) return null;
|
||||
|
||||
const skuRaw = src.sku != null ? String(src.sku).trim() : "";
|
||||
if (!skuRaw) return null;
|
||||
|
||||
// SKU in URL (requested)
|
||||
const url = `https://www.bcliquorstores.com/product/${encodeURIComponent(skuRaw)}`;
|
||||
|
||||
const name = String(src.name || "").trim();
|
||||
if (!name) return null;
|
||||
|
||||
// Sale support: pick currentPrice when present; otherwise regularPrice.
|
||||
const current = asNumber(src.currentPrice);
|
||||
const regular = asNumber(src.regularPrice);
|
||||
const price = cad(Number.isFinite(current) ? current : regular);
|
||||
|
||||
const sku = normalizeCspc(url);
|
||||
|
||||
const inStock = bclIsInStock(src);
|
||||
if (!inStock) return null;
|
||||
|
||||
// ✅ Fix: BCL appears to serve .jpg (not .jpeg) for these imagecache URLs.
|
||||
// Also use https.
|
||||
const img = `https://www.bcliquorstores.com/sites/default/files/imagecache/height400px/${encodeURIComponent(
|
||||
skuRaw
|
||||
)}.jpg`;
|
||||
|
||||
return { name, price, url, sku, img };
|
||||
}
|
||||
|
||||
|
||||
|
||||
async function bclFetchBrowsePage(ctx, page1, size) {
|
||||
const type = ctx.cat.bclType; // e.g. "rum" or "whisky / whiskey"
|
||||
const category = "spirits";
|
||||
const sort = "featuredProducts:desc";
|
||||
|
||||
const u = new URL("https://www.bcliquorstores.com/ajax/browse");
|
||||
u.searchParams.set("category", category);
|
||||
u.searchParams.set("type", type);
|
||||
u.searchParams.set("sort", sort);
|
||||
u.searchParams.set("size", String(size));
|
||||
u.searchParams.set("page", String(page1));
|
||||
|
||||
const referer =
|
||||
`https://www.bcliquorstores.com/product-catalogue?` +
|
||||
`category=${encodeURIComponent(category)}` +
|
||||
`&type=${encodeURIComponent(type)}` +
|
||||
`&sort=${encodeURIComponent(sort)}` +
|
||||
`&page=${encodeURIComponent(String(page1))}`;
|
||||
|
||||
return await ctx.http.fetchJsonWithRetry(u.toString(), `bcl:${ctx.cat.key}:p${page1}`, ctx.store.ua, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Accept: "application/json, text/plain, */*",
|
||||
Referer: referer,
|
||||
Origin: "https://www.bcliquorstores.com",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function scanCategoryBCLAjax(ctx, prevDb, report) {
|
||||
const t0 = Date.now();
|
||||
const size = 24;
|
||||
|
||||
let first;
|
||||
try {
|
||||
first = await bclFetchBrowsePage(ctx, 1, size);
|
||||
} catch (e) {
|
||||
ctx.logger.warn(`${ctx.catPrefixOut} | BCL browse fetch failed: ${e?.message || e}`);
|
||||
|
||||
const discovered = new Map();
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: 1,
|
||||
discoveredUnique: 0,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
return;
|
||||
}
|
||||
|
||||
const total = bclTotalHits(first?.json);
|
||||
const totalPages = Math.max(1, Math.ceil(total / size));
|
||||
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Total=${total} Size=${size} Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
||||
|
||||
const pageNums = [];
|
||||
for (let p = 1; p <= scanPages; p++) pageNums.push(p);
|
||||
|
||||
let donePages = 0;
|
||||
|
||||
const perPageItems = await require("../utils/async").parallelMapStaggered(
|
||||
pageNums,
|
||||
ctx.config.concurrency,
|
||||
ctx.config.staggerMs,
|
||||
async (page1, idx) => {
|
||||
const r = page1 === 1 ? first : await bclFetchBrowsePage(ctx, page1, size);
|
||||
const hits = Array.isArray(r?.json?.hits?.hits) ? r.json.hits.hits : [];
|
||||
|
||||
const items = [];
|
||||
for (const h of hits) {
|
||||
const it = bclHitToItem(h);
|
||||
if (it) items.push(it);
|
||||
}
|
||||
|
||||
donePages++;
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Page ${pageStr(idx + 1, pageNums.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageNums.length)} | items=${padLeft(
|
||||
items.length,
|
||||
3
|
||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
||||
);
|
||||
|
||||
return items;
|
||||
}
|
||||
);
|
||||
|
||||
const discovered = new Map();
|
||||
let dups = 0;
|
||||
for (const arr of perPageItems) {
|
||||
for (const it of arr) {
|
||||
if (discovered.has(it.url)) dups++;
|
||||
discovered.set(it.url, it);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
||||
);
|
||||
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: scanPages,
|
||||
discoveredUnique: discovered.size,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: removedItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "bcl",
|
||||
name: "BCL",
|
||||
host: "www.bcliquorstores.com",
|
||||
ua: defaultUa,
|
||||
scanCategory: scanCategoryBCLAjax, // JSON-driven (async browse)
|
||||
categories: [
|
||||
{
|
||||
key: "whisky",
|
||||
label: "Whisky / Whiskey",
|
||||
// informational only; scan uses ajax/browse
|
||||
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=whisky%20/%20whiskey&sort=featuredProducts:desc&page=1",
|
||||
bclType: "whisky / whiskey",
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://www.bcliquorstores.com/product-catalogue?category=spirits&type=rum&sort=featuredProducts:desc&page=1",
|
||||
bclType: "rum",
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
387
src/stores/bsw.js
Normal file
387
src/stores/bsw.js
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
"use strict";
|
||||
|
||||
const { cleanText } = require("../utils/html");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { padLeft, padRight } = require("../utils/string");
|
||||
const { humanBytes } = require("../utils/bytes");
|
||||
|
||||
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
|
||||
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||
const { addCategoryResultToReport } = require("../tracker/report");
|
||||
|
||||
const BSW_ALGOLIA_APP_ID = "25TO6MPUL0";
|
||||
const BSW_ALGOLIA_API_KEY = "1aa0c19fe6a0931340570bd358c2c9d2";
|
||||
const BSW_ALGOLIA_URL = `https://${BSW_ALGOLIA_APP_ID.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries`;
|
||||
|
||||
function usd(n) {
|
||||
if (!Number.isFinite(n)) return "";
|
||||
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||
}
|
||||
|
||||
function bswExtractCollectionIdFromHtml(html) {
|
||||
const s = String(html || "");
|
||||
const patterns = [
|
||||
/collection_ids%3A(\d{6,})/i,
|
||||
/collection_ids\s*:\s*(\d{6,})/i,
|
||||
/"collection_ids"\s*:\s*(\d{6,})/i,
|
||||
/"collection_id"\s*:\s*(\d{6,})/i,
|
||||
/collection_id\s*=\s*(\d{6,})/i,
|
||||
/collectionId["']?\s*[:=]\s*["']?(\d{6,})/i,
|
||||
/data-collection-id=["'](\d{6,})["']/i,
|
||||
];
|
||||
for (const re of patterns) {
|
||||
const m = s.match(re);
|
||||
if (m && m[1]) return Number.parseInt(m[1], 10);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function bswFormatPrice(value, hintCents) {
|
||||
if (value === null || value === undefined) return "";
|
||||
|
||||
if (typeof value === "string") {
|
||||
const t = value.trim();
|
||||
if (!t) return "";
|
||||
if (t.includes("$")) return t.replace(/\s+/g, "");
|
||||
const n = Number(t.replace(/[^0-9.]/g, ""));
|
||||
if (!Number.isFinite(n)) return t;
|
||||
return usd(n);
|
||||
}
|
||||
|
||||
if (typeof value === "number") {
|
||||
let n = value;
|
||||
|
||||
if (hintCents) n = n / 100;
|
||||
else if (Number.isInteger(n) && n >= 100000) n = n / 100;
|
||||
|
||||
return usd(n);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function bswPickPrice(hit) {
|
||||
const pick = (val, cents) => ({ val, cents });
|
||||
|
||||
if (hit && hit.price_cents != null) return pick(hit.price_cents, true);
|
||||
if (hit && hit.compare_at_price_cents != null) return pick(hit.compare_at_price_cents, true);
|
||||
|
||||
if (hit && hit.price != null) return pick(hit.price, false);
|
||||
if (hit && hit.price_min != null) return pick(hit.price_min, false);
|
||||
if (hit && hit.priceMin != null) return pick(hit.priceMin, false);
|
||||
if (hit && hit.min_price != null) return pick(hit.min_price, false);
|
||||
if (hit && hit.variants_min_price != null) return pick(hit.variants_min_price, false);
|
||||
|
||||
if (hit && hit.variants && Array.isArray(hit.variants) && hit.variants[0]) {
|
||||
const v = hit.variants[0];
|
||||
if (v.price_cents != null) return pick(v.price_cents, true);
|
||||
if (v.compare_at_price_cents != null) return pick(v.compare_at_price_cents, true);
|
||||
if (v.price != null) return pick(v.price, false);
|
||||
}
|
||||
|
||||
return pick(null, false);
|
||||
}
|
||||
|
||||
|
||||
function bswHitToItem(hit) {
|
||||
const name = cleanText(hit && (hit.title || hit.name || hit.product_title || hit.product_name || ""));
|
||||
const handle = hit && (hit.handle || hit.product_handle || hit.slug || "");
|
||||
const url =
|
||||
(hit && (hit.url || hit.product_url)) ||
|
||||
(handle ? `https://www.bswliquor.com/products/${String(handle).replace(/^\/+/, "")}` : "");
|
||||
|
||||
const { val: priceVal, cents: hintCents } = bswPickPrice(hit);
|
||||
const price = bswFormatPrice(priceVal, hintCents);
|
||||
|
||||
const sku = normalizeCspc(hit?.sku || hit?.SKU || hit?.cspc || hit?.CSPC || "");
|
||||
|
||||
const img = bswPickImage(hit);
|
||||
|
||||
if (!name || !url) return null;
|
||||
return { name, price, url, sku, img };
|
||||
}
|
||||
|
||||
async function bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage) {
|
||||
const filtersExpr = `collection_ids:${collectionId} AND (inventory_available:"true")`;
|
||||
|
||||
const params =
|
||||
`facets=%5B%22price%22%2C%22*%22%5D` +
|
||||
`&filters=${encodeURIComponent(filtersExpr)}` +
|
||||
`&hitsPerPage=${encodeURIComponent(String(hitsPerPage))}` +
|
||||
`&page=${encodeURIComponent(String(page0))}` +
|
||||
`&query=` +
|
||||
`&clickAnalytics=true` +
|
||||
`&maxValuesPerFacet=100` +
|
||||
(ruleContext ? `&ruleContexts=${encodeURIComponent(String(ruleContext))}` : "");
|
||||
|
||||
const bodyObj = { requests: [{ indexName: "shopify_products", params }] };
|
||||
|
||||
return await ctx.http.fetchJsonWithRetry(BSW_ALGOLIA_URL, `algolia:${ctx.cat.key}:p${page0}`, ctx.store.ua, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Accept: "*/*",
|
||||
"content-type": "application/x-www-form-urlencoded",
|
||||
Origin: "https://www.bswliquor.com",
|
||||
Referer: "https://www.bswliquor.com/",
|
||||
"x-algolia-api-key": BSW_ALGOLIA_API_KEY,
|
||||
"x-algolia-application-id": BSW_ALGOLIA_APP_ID,
|
||||
},
|
||||
body: JSON.stringify(bodyObj),
|
||||
});
|
||||
}
|
||||
|
||||
function kbStr(bytes) {
|
||||
return humanBytes(bytes).padStart(8, " ");
|
||||
}
|
||||
|
||||
function secStr(ms) {
|
||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||
const tenths = Math.round(s * 10) / 10;
|
||||
let out;
|
||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||
else out = `${Math.round(s)}s`;
|
||||
return out.padStart(7, " ");
|
||||
}
|
||||
|
||||
function pageStr(i, total) {
|
||||
const leftW = String(total).length;
|
||||
return `${padLeft(i, leftW)}/${total}`;
|
||||
}
|
||||
|
||||
function pctStr(done, total) {
|
||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||
return `${padLeft(pct, 3)}%`;
|
||||
}
|
||||
|
||||
function bswNormalizeAbsUrl(raw) {
|
||||
const s = String(raw || "").trim();
|
||||
if (!s) return "";
|
||||
if (s.startsWith("//")) return `https:${s}`;
|
||||
if (/^https?:\/\//i.test(s)) return s;
|
||||
try {
|
||||
return new URL(s, "https://www.bswliquor.com/").toString();
|
||||
} catch {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
function bswNormalizeImg(v) {
|
||||
if (!v) return "";
|
||||
if (typeof v === "string") return bswNormalizeAbsUrl(v);
|
||||
if (typeof v === "object") {
|
||||
const cands = [
|
||||
v.src,
|
||||
v.url,
|
||||
v.originalSrc,
|
||||
v.original_src,
|
||||
v.original,
|
||||
v.secure_url,
|
||||
v.large,
|
||||
v.medium,
|
||||
v.small,
|
||||
];
|
||||
for (const c of cands) {
|
||||
if (typeof c === "string" && c.trim()) return bswNormalizeAbsUrl(c);
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function bswPickImage(hit) {
|
||||
const cands = [
|
||||
hit?.image,
|
||||
hit?.image_url,
|
||||
hit?.imageUrl,
|
||||
hit?.imageURL,
|
||||
hit?.featured_image,
|
||||
hit?.featured_image_url,
|
||||
hit?.featuredImage,
|
||||
hit?.featuredImageUrl,
|
||||
hit?.product_image,
|
||||
hit?.product_image_url,
|
||||
hit?.productImage,
|
||||
hit?.productImageUrl,
|
||||
hit?.thumbnail,
|
||||
hit?.thumbnail_url,
|
||||
hit?.thumbnailUrl,
|
||||
];
|
||||
|
||||
for (const c of cands) {
|
||||
const s = bswNormalizeImg(c);
|
||||
if (s) return s;
|
||||
}
|
||||
|
||||
if (Array.isArray(hit?.images)) {
|
||||
for (const im of hit.images) {
|
||||
const s = bswNormalizeImg(im);
|
||||
if (s) return s;
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(hit?.media)) {
|
||||
for (const im of hit.media) {
|
||||
const s = bswNormalizeImg(im);
|
||||
if (s) return s;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
async function scanCategoryBSWAlgolia(ctx, prevDb, report) {
|
||||
const t0 = Date.now();
|
||||
|
||||
let collectionId = Number.isFinite(ctx.cat.bswCollectionId) ? ctx.cat.bswCollectionId : null;
|
||||
if (!collectionId) {
|
||||
try {
|
||||
const { text: html } = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `bsw:html:${ctx.cat.key}`, ctx.store.ua);
|
||||
collectionId = bswExtractCollectionIdFromHtml(html);
|
||||
if (collectionId) ctx.logger.ok(`${ctx.catPrefixOut} | BSW discovered collectionId=${collectionId}`);
|
||||
else ctx.logger.warn(`${ctx.catPrefixOut} | BSW could not discover collectionId from HTML.`);
|
||||
} catch (e) {
|
||||
ctx.logger.warn(`${ctx.catPrefixOut} | BSW HTML fetch failed for collectionId discovery: ${e?.message || e}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!collectionId) {
|
||||
ctx.logger.warn(`${ctx.catPrefixOut} | BSW missing collectionId; defaulting to 1 page with 0 items.`);
|
||||
|
||||
const discovered = new Map();
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: 1,
|
||||
discoveredUnique: 0,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
return;
|
||||
}
|
||||
|
||||
const ruleContext = ctx.cat.bswRuleContext || "";
|
||||
const hitsPerPage = 50;
|
||||
|
||||
const first = await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, 0, hitsPerPage);
|
||||
const result0 = first?.json?.results?.[0] || null;
|
||||
const nbPages = result0 && Number.isFinite(result0.nbPages) ? result0.nbPages : 1;
|
||||
|
||||
const totalPages = Math.max(1, nbPages);
|
||||
const scanPages = ctx.config.maxPages === null ? totalPages : Math.min(ctx.config.maxPages, totalPages);
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
||||
|
||||
const pageIdxs = [];
|
||||
for (let p = 0; p < scanPages; p++) pageIdxs.push(p);
|
||||
|
||||
let donePages = 0;
|
||||
|
||||
const perPageItems = await require("../utils/async").parallelMapStaggered(pageIdxs, ctx.config.concurrency, ctx.config.staggerMs, async (page0, idx) => {
|
||||
const pnum = idx + 1;
|
||||
const r = page0 === 0 ? first : await bswFetchAlgoliaPage(ctx, collectionId, ruleContext, page0, hitsPerPage);
|
||||
|
||||
const res0 = r?.json?.results?.[0] || null;
|
||||
const hits = res0 && Array.isArray(res0.hits) ? res0.hits : [];
|
||||
|
||||
const items = [];
|
||||
for (const h of hits) {
|
||||
const it = bswHitToItem(h);
|
||||
if (it) items.push(it);
|
||||
}
|
||||
|
||||
donePages++;
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Page ${pageStr(pnum, pageIdxs.length)} | ${String(r.status || "").padEnd(3)} | ${pctStr(donePages, pageIdxs.length)} | items=${padLeft(
|
||||
items.length,
|
||||
3
|
||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
||||
);
|
||||
|
||||
return items;
|
||||
});
|
||||
|
||||
const discovered = new Map();
|
||||
let dups = 0;
|
||||
for (const arr of perPageItems) {
|
||||
for (const it of arr) {
|
||||
if (discovered.has(it.url)) dups++;
|
||||
discovered.set(it.url, it);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
||||
);
|
||||
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: scanPages,
|
||||
discoveredUnique: discovered.size,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "bsw",
|
||||
name: "BSW",
|
||||
host: "www.bswliquor.com",
|
||||
ua: defaultUa,
|
||||
scanCategory: scanCategoryBSWAlgolia,
|
||||
categories: [
|
||||
{
|
||||
key: "scotch-whisky",
|
||||
label: "Scotch Whisky",
|
||||
startUrl: "https://www.bswliquor.com/collections/scotch-whisky?page=1",
|
||||
bswRuleContext: "scotch-whisky",
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://www.bswliquor.com/collections/rum?page=1",
|
||||
bswRuleContext: "rum",
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
307
src/stores/craftcellars.js
Normal file
307
src/stores/craftcellars.js
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, stripTags, extractFirstImgUrl } = require("../utils/html");
|
||||
const { sanitizeName } = require("../utils/text");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { makePageUrlShopifyQueryPage } = require("../utils/url");
|
||||
|
||||
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
|
||||
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||
const { addCategoryResultToReport } = require("../tracker/report");
|
||||
|
||||
function craftCellarsIsEmptyListingPage(html) {
|
||||
const s = String(html || "");
|
||||
if (/collection--empty\b/i.test(s)) return true;
|
||||
if (/No products found/i.test(s)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function canonicalizeCraftProductUrl(raw) {
|
||||
try {
|
||||
const u = new URL(String(raw));
|
||||
u.search = "";
|
||||
u.hash = "";
|
||||
return u.toString();
|
||||
} catch {
|
||||
return String(raw || "");
|
||||
}
|
||||
}
|
||||
|
||||
function extractShopifyCardPrice(block) {
|
||||
const b = String(block || "");
|
||||
const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
|
||||
|
||||
const saleRegion = b.split(/sale price/i)[1] || "";
|
||||
const saleD = dollars(saleRegion);
|
||||
if (saleD.length) return saleD[0];
|
||||
|
||||
const regRegion = b.split(/regular price/i)[1] || "";
|
||||
const regD = dollars(regRegion);
|
||||
if (regD.length) return regD[0];
|
||||
|
||||
const any = dollars(b);
|
||||
return any[0] || "";
|
||||
}
|
||||
|
||||
function parseProductsCraftCellars(html, ctx) {
|
||||
const s = String(html || "");
|
||||
|
||||
const g1 = s.match(/<div\b[^>]*id=["']ProductGridContainer["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
|
||||
const g2 = s.match(/<div\b[^>]*id=["']product-grid["'][^>]*>[\s\S]*?<\/div>/i)?.[0] || "";
|
||||
|
||||
const gridCandidate = g1.length > g2.length ? g1 : g2;
|
||||
const grid = /\/products\//i.test(gridCandidate) ? gridCandidate : s;
|
||||
|
||||
return parseProductsCraftCellarsInner(grid, ctx);
|
||||
}
|
||||
|
||||
function parseProductsCraftCellarsInner(html, ctx) {
|
||||
const s = String(html || "");
|
||||
const items = [];
|
||||
|
||||
let blocks = [...s.matchAll(/<li\b[^>]*>[\s\S]*?<\/li>/gi)].map((m) => m[0]);
|
||||
if (blocks.length < 5) {
|
||||
blocks = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bcard\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi)].map(
|
||||
(m) => m[0]
|
||||
);
|
||||
}
|
||||
|
||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "craftcellars.ca"}/`;
|
||||
|
||||
for (const block of blocks) {
|
||||
const href =
|
||||
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1] ||
|
||||
block.match(/href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
||||
if (!href) continue;
|
||||
|
||||
let url = "";
|
||||
try {
|
||||
url = new URL(decodeHtml(href), base).toString();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
url = canonicalizeCraftProductUrl(url);
|
||||
|
||||
const nameHtml =
|
||||
block.match(
|
||||
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i
|
||||
)?.[1] ||
|
||||
block.match(
|
||||
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i
|
||||
)?.[1] ||
|
||||
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
|
||||
|
||||
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
|
||||
if (!name) continue;
|
||||
|
||||
const price = extractShopifyCardPrice(block);
|
||||
const img = extractFirstImgUrl(block, base);
|
||||
|
||||
items.push({ name, price, url, img });
|
||||
}
|
||||
|
||||
const uniq = new Map();
|
||||
for (const it of items) uniq.set(it.url, it);
|
||||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function usdFromShopifyPriceStr(s) {
|
||||
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
|
||||
if (!Number.isFinite(n)) return "";
|
||||
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Craft Cellars:
|
||||
* - HTML listing with ?filter.v.availability=1 is the allowlist (prevents OOS leaking in)
|
||||
* - Shopify products.json is used only to enrich SKU (and optionally price) for those allowed URLs
|
||||
*/
|
||||
async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
||||
const t0 = Date.now();
|
||||
|
||||
// 1) HTML scan: allowlist of in-stock listing URLs
|
||||
const htmlMap = new Map(); // url -> {name, price, url, img}
|
||||
|
||||
const maxPages = ctx.config.maxPages === null ? 200 : Math.min(ctx.config.maxPages, 200);
|
||||
let htmlPagesFetched = 0;
|
||||
let emptyStreak = 0;
|
||||
|
||||
for (let p = 1; p <= maxPages; p++) {
|
||||
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
|
||||
const { text: html } = await ctx.http.fetchTextWithRetry(pageUrl, `craft:html:${ctx.cat.key}:p${p}`, ctx.store.ua);
|
||||
htmlPagesFetched++;
|
||||
|
||||
if (craftCellarsIsEmptyListingPage(html)) break;
|
||||
|
||||
const items = parseProductsCraftCellars(html, ctx);
|
||||
if (!items.length) {
|
||||
emptyStreak++;
|
||||
if (emptyStreak >= 2) break;
|
||||
continue;
|
||||
}
|
||||
emptyStreak = 0;
|
||||
|
||||
for (const it of items) {
|
||||
const url = canonicalizeCraftProductUrl(it.url);
|
||||
if (!url) continue;
|
||||
htmlMap.set(url, { name: it.name || "", price: it.price || "", url, img: it.img || "" });
|
||||
}
|
||||
}
|
||||
|
||||
// If HTML returns nothing, don't let JSON invent a category
|
||||
if (!htmlMap.size) {
|
||||
ctx.logger.warn(
|
||||
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing to use products.json as source of truth.`
|
||||
);
|
||||
}
|
||||
|
||||
// 2) JSON scan: build SKU index (but do NOT add new URLs from JSON)
|
||||
const jsonMap = new Map(); // url -> { sku, price, img }
|
||||
|
||||
if (htmlMap.size) {
|
||||
const start = new URL(ctx.cat.startUrl);
|
||||
const m = start.pathname.match(/^\/collections\/([^/]+)/i);
|
||||
if (!m) throw new Error(`CraftCellars: couldn't extract collection handle from ${ctx.cat.startUrl}`);
|
||||
const collectionHandle = m[1];
|
||||
|
||||
const limit = 250;
|
||||
let jsonPage = 1;
|
||||
let jsonPagesFetched = 0;
|
||||
|
||||
while (true) {
|
||||
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
|
||||
const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
|
||||
|
||||
const products = Array.isArray(r?.json?.products) ? r.json.products : [];
|
||||
jsonPagesFetched++;
|
||||
|
||||
if (!products.length) break;
|
||||
|
||||
for (const p of products) {
|
||||
const handle = String(p?.handle || "");
|
||||
if (!handle) continue;
|
||||
|
||||
const prodUrl = canonicalizeCraftProductUrl(`https://${ctx.store.host}/products/${handle}`);
|
||||
|
||||
// Only enrich if it's on the HTML allowlist
|
||||
if (!htmlMap.has(prodUrl)) continue;
|
||||
|
||||
const variants = Array.isArray(p?.variants) ? p.variants : [];
|
||||
const v = variants.find((x) => x && x.available === true) || variants[0] || null;
|
||||
|
||||
const sku = normalizeCspc(v?.sku || "");
|
||||
const price = v?.price ? usdFromShopifyPriceStr(v.price) : "";
|
||||
|
||||
// Product image (best effort)
|
||||
let img = "";
|
||||
const images = Array.isArray(p?.images) ? p.images : [];
|
||||
if (images[0]) {
|
||||
if (typeof images[0] === "string") img = images[0];
|
||||
else img = String(images[0]?.src || images[0]?.url || "");
|
||||
}
|
||||
if (!img && p?.image) img = String(p.image?.src || p.image?.url || p.image || "");
|
||||
img = String(img || "").trim();
|
||||
if (img.startsWith("//")) img = `https:${img}`;
|
||||
if (img && !/^https?:\/\//i.test(img)) {
|
||||
try {
|
||||
img = new URL(img, `https://${ctx.store.host}/`).toString();
|
||||
} catch {
|
||||
// keep as-is
|
||||
}
|
||||
}
|
||||
|
||||
jsonMap.set(prodUrl, { sku, price, img });
|
||||
}
|
||||
|
||||
if (products.length < limit) break;
|
||||
jsonPage++;
|
||||
if (jsonPage > 200) break; // safety
|
||||
}
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=${jsonPagesFetched}`);
|
||||
} else {
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | HTML pages=${htmlPagesFetched} JSON pages=0`);
|
||||
}
|
||||
|
||||
// 3) Final discovered: HTML allowlist, enriched by JSON
|
||||
const discovered = new Map();
|
||||
for (const [url, it] of htmlMap.entries()) {
|
||||
const j = jsonMap.get(url);
|
||||
discovered.set(url, {
|
||||
name: it.name || "",
|
||||
// Prefer JSON price (normalized) when present, else keep HTML price (already formatted)
|
||||
price: j?.price || it.price || "",
|
||||
url,
|
||||
sku: j?.sku || "",
|
||||
img: j?.img || it.img || "",
|
||||
});
|
||||
}
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: htmlPagesFetched,
|
||||
discoveredUnique: discovered.size,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "craftcellars",
|
||||
name: "Craft Cellars",
|
||||
host: "craftcellars.ca",
|
||||
ua: defaultUa,
|
||||
|
||||
// ✅ Custom scan (HTML allowlist + JSON enrichment)
|
||||
scanCategory: scanCategoryCraftCellars,
|
||||
|
||||
// Keep HTML parser for debugging
|
||||
parseProducts: parseProductsCraftCellars,
|
||||
makePageUrl: makePageUrlShopifyQueryPage,
|
||||
isEmptyListingPage: craftCellarsIsEmptyListingPage,
|
||||
|
||||
categories: [
|
||||
{
|
||||
key: "whisky",
|
||||
label: "Whisky",
|
||||
startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
|
||||
discoveryStartPage: 10,
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
|
||||
discoveryStartPage: 5,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
25
src/stores/index.js
Normal file
25
src/stores/index.js
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
"use strict";
|
||||
|
||||
const { createStore: createSierra, parseProductsSierra } = require("./sierrasprings");
|
||||
const { createStore: createBSW } = require("./bsw");
|
||||
const { createStore: createKWM } = require("./kwm");
|
||||
const { createStore: createKegNCork } = require("./kegncork");
|
||||
const { createStore: createMaltsAndGrains } = require("./maltsandgrains");
|
||||
const { createStore: createCraftCellars } = require("./craftcellars");
|
||||
const { createStore: createBCL } = require("./bcl");
|
||||
const { createStore: createStrath } = require("./strath");
|
||||
|
||||
function createStores({ defaultUa } = {}) {
|
||||
return [
|
||||
createSierra(defaultUa),
|
||||
createBSW(defaultUa),
|
||||
createKWM(defaultUa),
|
||||
createKegNCork(defaultUa),
|
||||
createMaltsAndGrains(defaultUa),
|
||||
createCraftCellars(defaultUa),
|
||||
createBCL(defaultUa),
|
||||
createStrath(defaultUa),
|
||||
];
|
||||
}
|
||||
|
||||
module.exports = { createStores, parseProductsSierra };
|
||||
78
src/stores/kegncork.js
Normal file
78
src/stores/kegncork.js
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, cleanText, stripTags, extractFirstImgUrl } = require("../utils/html");
|
||||
const { makePageUrlQueryParam } = require("../utils/url");
|
||||
|
||||
function makePageUrlKegNCork(baseUrl, pageNum) {
|
||||
return makePageUrlQueryParam(baseUrl, "page", pageNum);
|
||||
}
|
||||
|
||||
function parseProductsKegNCork(html, ctx) {
|
||||
const s = String(html || "");
|
||||
const items = [];
|
||||
|
||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kegncork.com"}/`;
|
||||
|
||||
const blocks = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
|
||||
ctx.logger?.dbg?.(`parseProductsKegNCork: li.product blocks=${Math.max(0, blocks.length - 1)} bytes=${s.length}`);
|
||||
|
||||
for (let i = 1; i < blocks.length; i++) {
|
||||
const block = "<li" + blocks[i];
|
||||
|
||||
const mTitle = block.match(
|
||||
/<h4\b[^>]*class=["'][^"']*\bcard-title\b[^"']*["'][^>]*>[\s\S]*?<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/i
|
||||
);
|
||||
if (!mTitle) continue;
|
||||
|
||||
const url = decodeHtml(mTitle[1]).trim();
|
||||
const name = cleanText(decodeHtml(mTitle[2]));
|
||||
if (!url || !/^https?:\/\//i.test(url) || !name) continue;
|
||||
|
||||
let price = "";
|
||||
const mPrice = block.match(/data-product-price-without-tax[^>]*>\s*([^<]+)\s*</i);
|
||||
if (mPrice && mPrice[1]) {
|
||||
const p = cleanText(decodeHtml(mPrice[1])).replace(/\s+/g, "");
|
||||
if (p) price = p.startsWith("$") ? p : `$${p}`;
|
||||
} else {
|
||||
const priceSection = block.match(/data-test-info-type=["']price["'][\s\S]*?<\/div>\s*<\/div>/i)?.[0] || "";
|
||||
const mDollar = cleanText(decodeHtml(stripTags(priceSection))).match(/\$\s*\d+(?:\.\d{2})?/);
|
||||
if (mDollar) price = mDollar[0].replace(/\s+/g, "");
|
||||
}
|
||||
|
||||
const img = extractFirstImgUrl(block, base);
|
||||
|
||||
items.push({ name, price, url, img });
|
||||
}
|
||||
|
||||
const uniq = new Map();
|
||||
for (const it of items) uniq.set(it.url, it);
|
||||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "kegncork",
|
||||
name: "Keg N Cork",
|
||||
host: "kegncork.com",
|
||||
ua: defaultUa,
|
||||
parseProducts: parseProductsKegNCork,
|
||||
makePageUrl: makePageUrlKegNCork,
|
||||
categories: [
|
||||
{
|
||||
key: "whisky",
|
||||
label: "Whisky",
|
||||
startUrl: "https://kegncork.com/whisky/?page=1",
|
||||
discoveryStartPage: 5,
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://kegncork.com/rum/?page=1",
|
||||
discoveryStartPage: 1,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
189
src/stores/kwm.js
Normal file
189
src/stores/kwm.js
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, stripTags, cleanText, extractHtmlAttr, escapeRe, extractFirstImgUrl } = require("../utils/html");
|
||||
const { sanitizeName } = require("../utils/text");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { normalizeBaseUrl } = require("../utils/url");
|
||||
|
||||
function makePageUrlKWM(baseUrl, pageNum) {
|
||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||
u.hash = "";
|
||||
if (pageNum <= 1) {
|
||||
u.searchParams.delete("page");
|
||||
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
||||
return u.toString();
|
||||
}
|
||||
u.searchParams.set("page", String(pageNum));
|
||||
u.search = `?${u.searchParams.toString()}`;
|
||||
return u.toString();
|
||||
}
|
||||
|
||||
function extractDivBlocksByExactClass(html, className, maxBlocks) {
|
||||
const out = [];
|
||||
const s = String(html || "");
|
||||
|
||||
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "gi");
|
||||
|
||||
let m;
|
||||
while ((m = re.exec(s))) {
|
||||
if (out.length >= maxBlocks) break;
|
||||
|
||||
const startTagEnd = m.index + m[0].length;
|
||||
let i = startTagEnd;
|
||||
let depth = 1;
|
||||
|
||||
while (i < s.length) {
|
||||
const nextOpen = s.indexOf("<div", i);
|
||||
const nextClose = s.indexOf("</div>", i);
|
||||
if (nextClose === -1) break;
|
||||
|
||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||
depth++;
|
||||
i = nextOpen + 4;
|
||||
continue;
|
||||
}
|
||||
depth--;
|
||||
if (depth === 0) {
|
||||
out.push(s.slice(m.index, nextClose + 6));
|
||||
re.lastIndex = nextClose + 6;
|
||||
break;
|
||||
}
|
||||
i = nextClose + 6;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function kwmExtractProductLinkHref(block) {
|
||||
let m =
|
||||
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["'][^>]*>\s*<\/a>/i) ||
|
||||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*>\s*<\/a>/i);
|
||||
|
||||
if (m && m[1]) return m[1].trim();
|
||||
|
||||
m =
|
||||
block.match(/<a\b[^>]*class=["'][^"']*\bproduct-link\b[^"']*["'][^>]*href=["']([^"']+)["']/i) ||
|
||||
block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bproduct-link\b[^"']*["']/i);
|
||||
|
||||
return m && m[1] ? m[1].trim() : "";
|
||||
}
|
||||
|
||||
function kwmExtractName(block) {
|
||||
const dataItem = extractHtmlAttr(block, "data-item");
|
||||
if (dataItem) return sanitizeName(dataItem);
|
||||
|
||||
const m = block.match(/<h6\b[^>]*>\s*([\s\S]*?)\s*<\/h6>/i);
|
||||
if (m && m[1]) return sanitizeName(stripTags(m[1]));
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function kwmExtractFirstDivByClass(html, className) {
|
||||
const re = new RegExp(`<div\\b[^>]*class=["'][^"']*\\b${escapeRe(className)}\\b[^"']*["'][^>]*>`, "i");
|
||||
const m = re.exec(html);
|
||||
if (!m) return "";
|
||||
const start = m.index + m[0].length;
|
||||
|
||||
let i = start;
|
||||
let depth = 1;
|
||||
while (i < html.length) {
|
||||
const nextOpen = html.indexOf("<div", i);
|
||||
const nextClose = html.indexOf("</div>", i);
|
||||
if (nextClose === -1) break;
|
||||
|
||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||
depth++;
|
||||
i = nextOpen + 4;
|
||||
continue;
|
||||
}
|
||||
depth--;
|
||||
if (depth === 0) return html.slice(start, nextClose);
|
||||
i = nextClose + 6;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function kwmExtractPrice(block) {
|
||||
let m = block.match(/\bdata-price=["']([^"']+)["']/i);
|
||||
if (m && m[1]) {
|
||||
const raw = String(m[1]).trim();
|
||||
const n = raw.replace(/[^0-9.]/g, "");
|
||||
if (n) return `$${Number(n).toFixed(2)}`;
|
||||
}
|
||||
|
||||
const priceDiv = kwmExtractFirstDivByClass(block, "product-price");
|
||||
if (!priceDiv) return "";
|
||||
|
||||
const cleaned = String(priceDiv).replace(/<span\b[^>]*class=["'][^"']*\bstrike\b[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
|
||||
|
||||
const txt = cleanText(decodeHtml(stripTags(cleaned)));
|
||||
const dollars = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)];
|
||||
if (dollars.length) return dollars[0][0].replace(/\s+/g, "");
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function parseProductsKWM(html, ctx) {
|
||||
const s = String(html || "");
|
||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "kensingtonwinemarket.com"}/`;
|
||||
|
||||
const blocks = extractDivBlocksByExactClass(s, "product-wrap", 5000);
|
||||
ctx.logger?.dbg?.(`parseProductsKWM: productWrapBlocks=${blocks.length} bytes=${s.length}`);
|
||||
|
||||
const items = [];
|
||||
for (const block of blocks) {
|
||||
if (/OUT OF STOCK/i.test(block)) continue;
|
||||
|
||||
const href = kwmExtractProductLinkHref(block);
|
||||
if (!href) continue;
|
||||
|
||||
let url;
|
||||
try {
|
||||
url = new URL(decodeHtml(href), base).toString();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const name = kwmExtractName(block);
|
||||
if (!name) continue;
|
||||
|
||||
const price = kwmExtractPrice(block);
|
||||
const sku = normalizeCspc(url);
|
||||
|
||||
const img = extractFirstImgUrl(block, base);
|
||||
|
||||
items.push({ name, price, url, sku, img });
|
||||
}
|
||||
|
||||
const uniq = new Map();
|
||||
for (const it of items) uniq.set(it.url, it);
|
||||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "kwm",
|
||||
name: "Kensington Wine Market",
|
||||
host: "kensingtonwinemarket.com",
|
||||
ua: defaultUa,
|
||||
parseProducts: parseProductsKWM,
|
||||
makePageUrl: makePageUrlKWM,
|
||||
categories: [
|
||||
{
|
||||
key: "scotch",
|
||||
label: "Scotch",
|
||||
startUrl: "https://kensingtonwinemarket.com/products/scotch/",
|
||||
discoveryStartPage: 200,
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://kensingtonwinemarket.com/products/liqu/rum/",
|
||||
discoveryStartPage: 20,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
107
src/stores/maltsandgrains.js
Normal file
107
src/stores/maltsandgrains.js
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, stripTags, cleanText, extractHtmlAttr, extractFirstImgUrl } = require("../utils/html");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
|
||||
|
||||
function allowMaltsExcludeGinTequilaMezcal(item) {
|
||||
if (item && item.inStock === false) return false;
|
||||
|
||||
const cats = Array.isArray(item?.cats) ? item.cats : [];
|
||||
const has = (re) => cats.some((c) => re.test(String(c || "")));
|
||||
|
||||
if (has(/\bgin\b/i)) return false;
|
||||
if (has(/\btequila\b/i) || has(/\bmezcal\b/i)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function parseProductsMaltsAndGrains(html, ctx) {
|
||||
const s = String(html || "");
|
||||
const items = [];
|
||||
|
||||
const re = /<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>[\s\S]*?<\/li>/gi;
|
||||
const blocks = [...s.matchAll(re)].map((m) => m[0] || "");
|
||||
ctx.logger?.dbg?.(`parseProductsMaltsAndGrains: li.product blocks=${blocks.length} bytes=${s.length}`);
|
||||
|
||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "maltsandgrains.store"}/`;
|
||||
|
||||
for (const block of blocks) {
|
||||
const classAttr = extractHtmlAttr(block, "class");
|
||||
|
||||
const isOut =
|
||||
/\boutofstock\b/i.test(classAttr) ||
|
||||
/ast-shop-product-out-of-stock/i.test(block) ||
|
||||
/>\s*out of stock\s*</i.test(block);
|
||||
if (isOut) continue;
|
||||
|
||||
const cats = [];
|
||||
for (const m of String(classAttr || "").matchAll(/\bproduct_cat-([a-z0-9_-]+)\b/gi)) {
|
||||
const v = String(m[1] || "").trim().toLowerCase();
|
||||
if (v) cats.push(v);
|
||||
}
|
||||
|
||||
let href =
|
||||
block.match(
|
||||
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b/i
|
||||
)?.[1] ||
|
||||
block.match(
|
||||
/<a\b[^>]*class=["'][^"']*\b(woocommerce-LoopProduct-link|woocommerce-loop-product__link|ast-loop-product__link)\b[^"']*["'][^>]*href=["']([^"']+)["']/i
|
||||
)?.[2] ||
|
||||
block.match(/<a\b[^>]*href=["']([^"']*\/product\/[^"']+)["']/i)?.[1];
|
||||
|
||||
if (!href) continue;
|
||||
|
||||
let url = "";
|
||||
try {
|
||||
url = new URL(decodeHtml(href), base).toString();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!/^https?:\/\//i.test(url)) continue;
|
||||
|
||||
const mTitle = block.match(
|
||||
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
|
||||
);
|
||||
const name = mTitle && mTitle[1] ? cleanText(decodeHtml(stripTags(mTitle[1]))) : "";
|
||||
if (!name) continue;
|
||||
|
||||
const price = extractPriceFromTmbBlock(block);
|
||||
|
||||
const sku = normalizeCspc(
|
||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
||||
""
|
||||
);
|
||||
|
||||
const img = extractFirstImgUrl(block, base);
|
||||
|
||||
items.push({ name, price, url, sku, img, cats, inStock: true });
|
||||
}
|
||||
|
||||
const uniq = new Map();
|
||||
for (const it of items) uniq.set(it.url, it);
|
||||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "maltsandgrains",
|
||||
name: "Malts & Grains",
|
||||
host: "maltsandgrains.store",
|
||||
ua: defaultUa,
|
||||
parseProducts: parseProductsMaltsAndGrains,
|
||||
categories: [
|
||||
{
|
||||
key: "all-minus-gin-tequila-mezcal",
|
||||
label: "All Spirits",
|
||||
startUrl: "https://maltsandgrains.store/shop/page/1/",
|
||||
discoveryStartPage: 15,
|
||||
allowUrl: allowMaltsExcludeGinTequilaMezcal,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
91
src/stores/sierrasprings.js
Normal file
91
src/stores/sierrasprings.js
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
|
||||
|
||||
function allowSierraSpiritsLiquorUrlRumWhisky(item) {
|
||||
const u = item && item.url ? item.url : "";
|
||||
const s = String(u || "").toLowerCase();
|
||||
if (!/^https?:\/\/sierraspringsliquor\.ca\/shop\/spirits-liquor\/.+\/$/.test(s)) return false;
|
||||
return /\/shop\/spirits-liquor\/.*(rum|whisk(?:e)?y).*/.test(s);
|
||||
}
|
||||
|
||||
function parseProductsSierra(html, ctx) {
|
||||
const items = [];
|
||||
const blocks = String(html || "").split(/<div class="tmb\b/i);
|
||||
ctx.logger?.dbg?.(
|
||||
`parseProductsSierra: tmbBlocks=${Math.max(0, blocks.length - 1)} bytes=${String(html || "").length}`
|
||||
);
|
||||
|
||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "sierraspringsliquor.ca"}/`;
|
||||
|
||||
for (let i = 1; i < blocks.length; i++) {
|
||||
const block = '<div class="tmb' + blocks[i];
|
||||
|
||||
const titleMatch = block.match(
|
||||
/<h3\b[^>]*class=["'][^"']*t-entry-title[^"']*["'][^>]*>\s*<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>\s*<\/h3>/i
|
||||
);
|
||||
if (!titleMatch) continue;
|
||||
|
||||
const url = new URL(decodeHtml(titleMatch[1]), base).toString();
|
||||
const name = cleanText(decodeHtml(titleMatch[2]));
|
||||
if (!name) continue;
|
||||
|
||||
const price = extractPriceFromTmbBlock(block);
|
||||
|
||||
const sku = normalizeCspc(
|
||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||
block.match(/\bSKU[:\s]*([0-9]{6})\b/i)?.[1] ||
|
||||
""
|
||||
);
|
||||
|
||||
const img = extractFirstImgUrl(block, base);
|
||||
|
||||
items.push({ name, price, url, sku, img });
|
||||
}
|
||||
|
||||
const uniq = new Map();
|
||||
for (const it of items) uniq.set(it.url, it);
|
||||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "sierrasprings",
|
||||
name: "Sierra Springs",
|
||||
host: "sierraspringsliquor.ca",
|
||||
ua: defaultUa,
|
||||
parseProducts: parseProductsSierra,
|
||||
categories: [
|
||||
{
|
||||
key: "whisky",
|
||||
label: "Whisky",
|
||||
startUrl: "https://sierraspringsliquor.ca/product-category/whisky-2/",
|
||||
discoveryStartPage: 20,
|
||||
},
|
||||
{
|
||||
key: "fine-rare",
|
||||
label: "Fine & Rare",
|
||||
startUrl: "https://sierraspringsliquor.ca/product-category/fine-rare/",
|
||||
discoveryStartPage: 1,
|
||||
},
|
||||
{
|
||||
key: "spirits-liquor",
|
||||
label: "Spirits / Liquor",
|
||||
startUrl: "https://sierraspringsliquor.ca/product-category/spirits-liquor/page/2/",
|
||||
discoveryStartPage: 15,
|
||||
allowUrl: allowSierraSpiritsLiquorUrlRumWhisky,
|
||||
},
|
||||
{
|
||||
key: "spirits",
|
||||
label: "Spirits",
|
||||
startUrl: "https://sierraspringsliquor.ca/product-category/spirits/",
|
||||
discoveryStartPage: 1,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore, parseProductsSierra };
|
||||
495
src/stores/strath.js
Normal file
495
src/stores/strath.js
Normal file
|
|
@ -0,0 +1,495 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, stripTags, cleanText, extractFirstImgUrl } = require("../utils/html");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { humanBytes } = require("../utils/bytes");
|
||||
const { padLeft, padRight } = require("../utils/string");
|
||||
|
||||
const { mergeDiscoveredIntoDb } = require("../tracker/merge");
|
||||
const { buildDbObject, writeJsonAtomic } = require("../tracker/db");
|
||||
const { addCategoryResultToReport } = require("../tracker/report");
|
||||
|
||||
function kbStr(bytes) {
|
||||
return humanBytes(bytes).padStart(8, " ");
|
||||
}
|
||||
|
||||
function secStr(ms) {
|
||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||
const tenths = Math.round(s * 10) / 10;
|
||||
let out;
|
||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||
else out = `${Math.round(s)}s`;
|
||||
return out.padStart(7, " ");
|
||||
}
|
||||
|
||||
function pageStr(i, total) {
|
||||
const leftW = String(total).length;
|
||||
return `${padLeft(i, leftW)}/${total}`;
|
||||
}
|
||||
|
||||
function pctStr(done, total) {
|
||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||
return `${padLeft(pct, 3)}%`;
|
||||
}
|
||||
|
||||
function extractArticles(html) {
|
||||
const s = String(html || "");
|
||||
const parts = s.split(/<article\b/i);
|
||||
if (parts.length <= 1) return [];
|
||||
const out = [];
|
||||
for (let i = 1; i < parts.length; i++) out.push("<article" + parts[i]);
|
||||
return out;
|
||||
}
|
||||
|
||||
function normalizePrice(str) {
|
||||
const s = String(str || "");
|
||||
const m = s.match(/\$\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\$\s*\d+(?:\.\d{2})?/);
|
||||
if (!m) return "";
|
||||
const raw = m[0].replace(/\s+/g, "");
|
||||
return raw.replace(/,/g, "");
|
||||
}
|
||||
|
||||
function pickPriceFromArticle(articleHtml) {
|
||||
const a = String(articleHtml || "");
|
||||
const noMember = a.replace(
|
||||
/<div\b[^>]*class=["'][^"']*\bwhiskyfolk-price\b[^"']*["'][^>]*>[\s\S]*?<\/div>/gi,
|
||||
" "
|
||||
);
|
||||
|
||||
const ins = noMember.match(/<ins\b[^>]*>[\s\S]*?(\$[\s\S]{0,32}?)<\/ins>/i);
|
||||
if (ins && ins[1]) return normalizePrice(ins[1]);
|
||||
|
||||
const reg = noMember.match(/class=["'][^"']*\bregular-price-card\b[^"']*["'][^>]*>\s*([^<]+)/i);
|
||||
if (reg && reg[1]) return normalizePrice(reg[1]);
|
||||
|
||||
const priceDiv = noMember.match(
|
||||
/<div\b[^>]*class=["'][^"']*\bproduct-price\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i
|
||||
);
|
||||
const scope = priceDiv && priceDiv[1] ? priceDiv[1] : noMember;
|
||||
|
||||
return normalizePrice(scope);
|
||||
}
|
||||
|
||||
function extractProductIdFromArticle(articleHtml) {
|
||||
const a = String(articleHtml || "");
|
||||
|
||||
let m = a.match(/<article\b[^>]*\bid=["'](\d{1,10})["']/i);
|
||||
if (m && m[1]) return Number(m[1]);
|
||||
|
||||
m = a.match(/\bpost-(\d{1,10})\b/i);
|
||||
if (m && m[1]) return Number(m[1]);
|
||||
|
||||
m = a.match(/\bdata-product_id=["'](\d{1,10})["']/i);
|
||||
if (m && m[1]) return Number(m[1]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
function extractSkuFromArticle(articleHtml) {
|
||||
const a = String(articleHtml || "");
|
||||
|
||||
let m = a.match(/\bdata-product_sku=["'](\d{6})["']/i);
|
||||
if (m && m[1]) return m[1];
|
||||
|
||||
m = a.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i);
|
||||
if (m && m[1]) return m[1];
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
function looksInStock(articleHtml) {
|
||||
const a = String(articleHtml || "");
|
||||
|
||||
if (/\boutofstock\b/i.test(a)) return false;
|
||||
if (/Currently\s+Unavailable/i.test(a)) return false;
|
||||
|
||||
if (/\binstock\b/i.test(a)) return true;
|
||||
if (/\bBottles\s+(?:Remaining|Available)\b/i.test(a)) return true;
|
||||
if (/Only\s+\d+\s+Bottle\s+Left/i.test(a)) return true;
|
||||
if (/10\+\s*Bottles\s+Available/i.test(a)) return true;
|
||||
|
||||
return /\binstock\b/i.test(a);
|
||||
}
|
||||
|
||||
function parseProductFromArticle(articleHtml) {
|
||||
const a = String(articleHtml || "");
|
||||
|
||||
if (!looksInStock(a)) return null;
|
||||
|
||||
const hrefM = a.match(/<a\b[^>]*href=["']([^"']+)["']/i);
|
||||
if (!hrefM || !hrefM[1]) return null;
|
||||
|
||||
let url;
|
||||
try {
|
||||
url = new URL(decodeHtml(hrefM[1]), "https://www.strathliquor.com/").toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
const t2 = a.match(/<h2\b[^>]*class=["'][^"']*\bproduct-title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
|
||||
const t3 = a.match(/<h3\b[^>]*class=["'][^"']*\bproduct-subtitle\b[^"']*["'][^>]*>([\s\S]*?)<\/h3>/i);
|
||||
const title = cleanText(decodeHtml(stripTags((t2 && t2[1]) || "")));
|
||||
const sub = cleanText(decodeHtml(stripTags((t3 && t3[1]) || "")));
|
||||
const name = cleanText([title, sub].filter(Boolean).join(" - "));
|
||||
if (!name) return null;
|
||||
|
||||
const price = pickPriceFromArticle(a);
|
||||
const productId = extractProductIdFromArticle(a);
|
||||
|
||||
const skuFromHtml = extractSkuFromArticle(a);
|
||||
const fallbackSku = normalizeCspc(url) || "";
|
||||
|
||||
const img = extractFirstImgUrl(a, "https://www.strathliquor.com/");
|
||||
|
||||
return {
|
||||
name,
|
||||
price,
|
||||
url,
|
||||
sku: skuFromHtml || fallbackSku,
|
||||
productId,
|
||||
img,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/* ---------------- Store API paging ---------------- */
|
||||
|
||||
function buildStoreApiBaseUrlFromCategoryUrl(startUrl) {
|
||||
const u = new URL(startUrl);
|
||||
const api = new URL(`https://${u.hostname}/wp-json/wc/store/v1/products`);
|
||||
|
||||
api.searchParams.set("order", "desc");
|
||||
api.searchParams.set("orderby", "date");
|
||||
|
||||
const stock = u.searchParams.get("_sfm__stock_status");
|
||||
if (stock && /instock/i.test(stock)) api.searchParams.set("stock_status", "instock");
|
||||
|
||||
const pr = u.searchParams.get("_sfm__regular_price");
|
||||
if (pr) {
|
||||
const m = String(pr).match(/^\s*([0-9]+)\s*\+\s*([0-9]+)\s*$/);
|
||||
if (m) {
|
||||
api.searchParams.set("min_price", m[1]);
|
||||
api.searchParams.set("max_price", m[2]);
|
||||
}
|
||||
}
|
||||
|
||||
return api;
|
||||
}
|
||||
|
||||
function hasCategorySlug(p, wanted) {
|
||||
const w = String(wanted || "").trim().toLowerCase();
|
||||
if (!w) return true;
|
||||
|
||||
const cats = Array.isArray(p?.categories) ? p.categories : [];
|
||||
for (const c of cats) {
|
||||
const slug = String(c?.slug || "").trim().toLowerCase();
|
||||
if (slug === w) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function normalizeProductUrl(p) {
|
||||
const u = String(p?.permalink || p?.link || "").trim();
|
||||
return u && u.startsWith("http") ? u : "";
|
||||
}
|
||||
|
||||
function normalizeProductName(p) {
|
||||
// Store API "name" can contain HTML entities like – and sometimes markup like <em>
|
||||
const raw = String(p?.name || "");
|
||||
return cleanText(decodeHtml(stripTags(raw)));
|
||||
}
|
||||
|
||||
function normalizeProductImage(p) {
|
||||
const imgs = Array.isArray(p?.images) ? p.images : [];
|
||||
for (const im of imgs) {
|
||||
if (!im) continue;
|
||||
const raw =
|
||||
(typeof im === "string" ? im : "") ||
|
||||
(typeof im?.src === "string" ? im.src : "") ||
|
||||
(typeof im?.thumbnail === "string" ? im.thumbnail : "") ||
|
||||
(typeof im?.url === "string" ? im.url : "");
|
||||
const s = String(raw || "").trim();
|
||||
if (!s) continue;
|
||||
if (s.startsWith("//")) return `https:${s}`;
|
||||
return s;
|
||||
}
|
||||
|
||||
const direct = String(p?.image || p?.image_url || p?.imageUrl || "").trim();
|
||||
if (!direct) return "";
|
||||
return direct.startsWith("//") ? `https:${direct}` : direct;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function toMoneyStringFromMinorUnits(valueStr, minorUnit) {
|
||||
const mu = Number(minorUnit);
|
||||
if (!Number.isFinite(mu) || mu < 0 || mu > 6) return "";
|
||||
const v = String(valueStr || "").trim();
|
||||
if (!/^\d+$/.test(v)) return "";
|
||||
|
||||
// Use integer math to avoid float rounding issues
|
||||
const pad = "0".repeat(mu);
|
||||
const s = v.length <= mu ? pad.slice(0, mu - v.length) + v : v;
|
||||
const whole = s.length === mu ? "0" : s.slice(0, s.length - mu);
|
||||
const frac = mu === 0 ? "" : s.slice(s.length - mu);
|
||||
return mu === 0 ? whole : `${whole}.${frac}`;
|
||||
}
|
||||
|
||||
function normalizeProductPrice(p) {
|
||||
const prices = p?.prices;
|
||||
|
||||
// Woo store API commonly returns minor units (e.g., "11035" with minor_unit=2 => 110.35)
|
||||
if (prices && typeof prices === "object") {
|
||||
const minor = prices.currency_minor_unit;
|
||||
const sale = String(prices.sale_price || "").trim();
|
||||
const regular = String(prices.regular_price || "").trim();
|
||||
const chosen = sale || regular;
|
||||
|
||||
if (chosen) {
|
||||
let numeric = chosen;
|
||||
|
||||
if (/^\d+$/.test(chosen) && minor !== undefined && minor !== null) {
|
||||
const converted = toMoneyStringFromMinorUnits(chosen, minor);
|
||||
if (converted) numeric = converted;
|
||||
}
|
||||
|
||||
const num = Number(numeric);
|
||||
if (Number.isFinite(num) && num >= 0) return `$${num.toFixed(2)}`;
|
||||
}
|
||||
}
|
||||
|
||||
const raw = String(p?.price || p?.price_html || "").trim();
|
||||
const norm = normalizePrice(raw);
|
||||
return norm;
|
||||
}
|
||||
|
||||
function normalizeProductSku(p) {
|
||||
const sku = String(p?.sku || "").trim();
|
||||
if (/^\d{6}$/.test(sku)) return sku;
|
||||
return "";
|
||||
}
|
||||
|
||||
function normalizeProductId(p) {
|
||||
const id = Number(p?.id);
|
||||
return Number.isFinite(id) ? id : 0;
|
||||
}
|
||||
|
||||
async function fetchStoreApiPage(ctx, apiBaseUrl, page, perPage) {
|
||||
const u = new URL(apiBaseUrl.toString());
|
||||
u.searchParams.set("page", String(page));
|
||||
u.searchParams.set("per_page", String(perPage));
|
||||
|
||||
return await ctx.http.fetchJsonWithRetry(u.toString(), `strath:storeapi:${ctx.cat.key}:p${page}`, ctx.store.ua, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
Referer: ctx.cat.startUrl,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function avoidMassRemoval(prevDb, discovered, ctx, reason) {
|
||||
const prevSize = prevDb && typeof prevDb.size === "number" ? prevDb.size : 0;
|
||||
const discSize = discovered && typeof discovered.size === "number" ? discovered.size : 0;
|
||||
|
||||
if (prevSize <= 0 || discSize <= 0) return false;
|
||||
|
||||
const ratio = discSize / Math.max(1, prevSize);
|
||||
if (ratio >= 0.6) return false;
|
||||
|
||||
ctx.logger.warn?.(
|
||||
`${ctx.catPrefixOut} | Strath partial scan (${discSize}/${prevSize}); preserving DB to avoid removals (${reason}).`
|
||||
);
|
||||
|
||||
if (prevDb && typeof prevDb.entries === "function") {
|
||||
for (const [k, v] of prevDb.entries()) {
|
||||
if (!discovered.has(k)) discovered.set(k, v);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function scanCategoryStrath(ctx, prevDb, report) {
|
||||
const t0 = Date.now();
|
||||
|
||||
// Listing HTML (seed + sanity)
|
||||
let html = "";
|
||||
let listingFinalUrl = ctx.cat.startUrl;
|
||||
let listingStatus = 0;
|
||||
let listingBytes = 0;
|
||||
let listingMs = 0;
|
||||
|
||||
try {
|
||||
const r = await ctx.http.fetchTextWithRetry(ctx.cat.startUrl, `strath:html:${ctx.cat.key}`, ctx.store.ua);
|
||||
html = r.text || "";
|
||||
listingFinalUrl = r.finalUrl || ctx.cat.startUrl;
|
||||
listingStatus = r.status || 0;
|
||||
listingBytes = r.bytes || 0;
|
||||
listingMs = r.ms || 0;
|
||||
} catch (e) {
|
||||
ctx.logger.warn(`${ctx.catPrefixOut} | Strath listing HTML fetch failed: ${e?.message || e}`);
|
||||
}
|
||||
|
||||
const discovered = new Map();
|
||||
|
||||
const listingArticles = extractArticles(html);
|
||||
let listingItems = 0;
|
||||
for (const art of listingArticles) {
|
||||
const it = parseProductFromArticle(art);
|
||||
if (it) {
|
||||
discovered.set(it.url, it);
|
||||
listingItems++;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Page ${pageStr(1, 1)} | ${String(listingStatus || "").padEnd(3)} | ${pctStr(1, 1)} | items=${padLeft(
|
||||
listingItems,
|
||||
3
|
||||
)} | bytes=${kbStr(listingBytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(listingMs)}`
|
||||
);
|
||||
|
||||
const apiBase = buildStoreApiBaseUrlFromCategoryUrl(listingFinalUrl || ctx.cat.startUrl);
|
||||
|
||||
const perPage = 100;
|
||||
const maxPagesCap = ctx.config.maxPages === null ? 5000 : ctx.config.maxPages;
|
||||
|
||||
const wantedSlug = String(ctx.cat.apiCategorySlug || "").trim().toLowerCase();
|
||||
|
||||
let donePages = 0;
|
||||
let emptyMatchPages = 0;
|
||||
|
||||
for (let page = 1; page <= maxPagesCap; page++) {
|
||||
let r;
|
||||
try {
|
||||
r = await fetchStoreApiPage(ctx, apiBase, page, perPage);
|
||||
} catch (e) {
|
||||
ctx.logger.warn?.(`${ctx.catPrefixOut} | Strath Store API page ${page} failed: ${e?.message || e}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const arr = Array.isArray(r?.json) ? r.json : [];
|
||||
donePages++;
|
||||
|
||||
if (!arr.length) break;
|
||||
|
||||
let kept = 0;
|
||||
|
||||
for (const p of arr) {
|
||||
const stock = String(p?.stock_status || "").toLowerCase();
|
||||
if (stock && stock !== "instock") continue;
|
||||
|
||||
if (wantedSlug && !hasCategorySlug(p, wantedSlug)) continue;
|
||||
|
||||
const url = normalizeProductUrl(p);
|
||||
if (!url) continue;
|
||||
|
||||
const name = normalizeProductName(p);
|
||||
if (!name) continue;
|
||||
|
||||
const price = normalizeProductPrice(p);
|
||||
const sku = normalizeProductSku(p);
|
||||
const productId = normalizeProductId(p);
|
||||
|
||||
const fallbackSku = sku || normalizeCspc(url) || "";
|
||||
|
||||
const prev = discovered.get(url) || null;
|
||||
const img = normalizeProductImage(p) || (prev && prev.img) || "";
|
||||
|
||||
discovered.set(url, {
|
||||
name,
|
||||
price,
|
||||
url,
|
||||
sku: sku || fallbackSku,
|
||||
productId,
|
||||
img,
|
||||
});
|
||||
kept++;
|
||||
}
|
||||
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | API Page ${pageStr(donePages, donePages)} | ${(r?.status || "").toString().padEnd(3)} | kept=${padLeft(
|
||||
kept,
|
||||
3
|
||||
)} | bytes=${kbStr(r.bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(r.ms)}`
|
||||
);
|
||||
|
||||
if (wantedSlug) {
|
||||
if (kept === 0) emptyMatchPages++;
|
||||
else emptyMatchPages = 0;
|
||||
|
||||
// If filter is tight (rum), stop after 2 empty pages in a row.
|
||||
if (emptyMatchPages >= 2) break;
|
||||
}
|
||||
|
||||
if (arr.length < perPage) break;
|
||||
}
|
||||
|
||||
if (prevDb && typeof prevDb.size === "number") {
|
||||
avoidMassRemoval(prevDb, discovered, ctx, `storeapi pages=${donePages} slug=${wantedSlug || "none"}`);
|
||||
}
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | DB saved: ${ctx.logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
ctx.logger.ok(
|
||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
||||
);
|
||||
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: 1 + Math.max(0, donePages),
|
||||
discoveredUnique: discovered.size,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "strath",
|
||||
name: "Strath Liquor",
|
||||
host: "www.strathliquor.com",
|
||||
ua: defaultUa,
|
||||
scanCategory: scanCategoryStrath,
|
||||
categories: [
|
||||
{
|
||||
key: "whisky",
|
||||
label: "Whisky",
|
||||
apiCategorySlug: "whisky",
|
||||
startUrl:
|
||||
"https://www.strathliquor.com/whisky/?_sfm__stock_status=instock&_sfm__regular_price=0+6000&_sfm_product_abv=20+75&orderby=date",
|
||||
},
|
||||
{
|
||||
key: "spirits-rum",
|
||||
label: "Spirits - Rum",
|
||||
apiCategorySlug: "rum",
|
||||
startUrl:
|
||||
"https://www.strathliquor.com/spirits/?_sfm__stock_status=instock&_sfm__regular_price=0+600&_sfm_product_type=Rum&_sfm_product_abv=10+75&orderby=date",
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { createStore };
|
||||
292
src/tracker/category_scan.js
Normal file
292
src/tracker/category_scan.js
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
"use strict";
|
||||
|
||||
const { humanBytes } = require("../utils/bytes");
|
||||
const { padLeft, padRight, padLeftV, padRightV } = require("../utils/string");
|
||||
const { normalizeBaseUrl, makePageUrlForCtx } = require("../utils/url");
|
||||
const { parallelMapStaggered } = require("../utils/async");
|
||||
|
||||
const { ensureDir, dbPathFor, readDb, writeJsonAtomic, buildDbObject } = require("./db");
|
||||
const { mergeDiscoveredIntoDb } = require("./merge");
|
||||
const { addCategoryResultToReport } = require("./report");
|
||||
|
||||
const ACTION_W = 24;
|
||||
const STATUS_W = 4;
|
||||
const PROG_W = 4;
|
||||
|
||||
function kbStr(bytes) {
|
||||
return humanBytes(bytes).padStart(8, " ");
|
||||
}
|
||||
|
||||
function secStr(ms) {
|
||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||
const tenths = Math.round(s * 10) / 10;
|
||||
let out;
|
||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||
else out = `${Math.round(s)}s`;
|
||||
return out.padStart(7, " ");
|
||||
}
|
||||
|
||||
function pctStr(done, total) {
|
||||
const pct = total ? Math.floor((done / total) * 100) : 0;
|
||||
return `${padLeft(pct, 3)}%`;
|
||||
}
|
||||
|
||||
function pageStr(i, total) {
|
||||
const leftW = String(total).length;
|
||||
return `${padLeft(i, leftW)}/${total}`;
|
||||
}
|
||||
|
||||
function actionCell(s) {
|
||||
return padRightV(String(s), ACTION_W);
|
||||
}
|
||||
|
||||
function statusCell(logger, statusRaw, okBool) {
|
||||
const cell = padRightV(String(statusRaw || ""), STATUS_W);
|
||||
if (!statusRaw) return cell;
|
||||
return okBool ? logger.color(cell, logger.C.green) : logger.color(cell, logger.C.yellow);
|
||||
}
|
||||
|
||||
function progCell(v) {
|
||||
const raw = String(v ?? "----");
|
||||
return padLeftV(raw, PROG_W);
|
||||
}
|
||||
|
||||
function logProgressLine(logger, ctx, action, statusRaw, statusOk, progVal, rest) {
|
||||
logger.ok(`${ctx.catPrefixOut} | ${actionCell(action)} | ${statusCell(logger, statusRaw, statusOk)} | ${progCell(progVal)} | ${rest}`);
|
||||
}
|
||||
|
||||
function makeCatPrefixers(stores, logger) {
|
||||
const storeW = Math.max(...stores.map((s) => String(s.name || "").length), 1);
|
||||
const catW = Math.max(...stores.flatMap((s) => (s.categories || []).map((c) => String(c.label || "").length)), 1);
|
||||
|
||||
function catPrefixRaw(store, cat) {
|
||||
return `${padRight(String(store.name || ""), storeW)} | ${padRight(String(cat.label || ""), catW)}`;
|
||||
}
|
||||
|
||||
function catPrefixOut(store, cat) {
|
||||
return logger.bold(catPrefixRaw(store, cat));
|
||||
}
|
||||
|
||||
return { catPrefixRaw, catPrefixOut, width: storeW, catW };
|
||||
}
|
||||
|
||||
function buildCategoryContext(store, cat, catPrefixOutFn, config) {
|
||||
const baseUrl = normalizeBaseUrl(cat.startUrl);
|
||||
const dbFile = dbPathFor(`${store.key}__${cat.key}`, baseUrl, config.dbDir);
|
||||
return {
|
||||
store,
|
||||
cat,
|
||||
baseUrl,
|
||||
dbFile,
|
||||
catPrefixOut: catPrefixOutFn(store, cat),
|
||||
};
|
||||
}
|
||||
|
||||
function loadCategoryDb(logger, ctx) {
|
||||
const prevDb = readDb(ctx.dbFile);
|
||||
logger.ok(`${ctx.catPrefixOut} | DB loaded: ${padLeft(prevDb.byUrl.size, 5)} | ${logger.dim(ctx.dbFile)}`);
|
||||
return prevDb;
|
||||
}
|
||||
|
||||
function shouldTrackItem(ctx, finalUrl, item) {
|
||||
const allow = ctx?.cat?.allowUrl;
|
||||
if (typeof allow !== "function") return true;
|
||||
return allow(item, ctx, finalUrl);
|
||||
}
|
||||
|
||||
async function pageHasProducts(ctx, url) {
|
||||
const { http, config, logger } = ctx;
|
||||
try {
|
||||
const { text } = await http.fetchTextWithRetry(url, "discover", ctx.store.ua);
|
||||
|
||||
if (typeof ctx.store.isEmptyListingPage === "function") {
|
||||
if (ctx.store.isEmptyListingPage(text, ctx, url)) return { ok: false, items: 0 };
|
||||
}
|
||||
|
||||
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
||||
const items = parser(text, ctx).length;
|
||||
return { ok: items > 0, items };
|
||||
} catch {
|
||||
return { ok: false, items: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
async function probePage(ctx, baseUrl, pageNum, state) {
|
||||
const url = makePageUrlForCtx(ctx, baseUrl, pageNum);
|
||||
const t0 = Date.now();
|
||||
const r = await pageHasProducts(ctx, url);
|
||||
const ms = Date.now() - t0;
|
||||
|
||||
const prog = discoverProg(state);
|
||||
|
||||
logProgressLine(
|
||||
ctx.logger,
|
||||
ctx,
|
||||
`Discover probe page=${padLeftV(pageNum, 4)}`,
|
||||
r.ok ? "OK" : "MISS",
|
||||
Boolean(r.ok),
|
||||
prog,
|
||||
`items=${padLeftV(r.items, 3)} | bytes=${padLeftV("", 8)} | ${padRightV(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
|
||||
);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
function discoverProg(state) {
|
||||
if (!state || state.phase !== "binary") return " 0%";
|
||||
const span = Math.max(1, state.hiMiss - state.loOk);
|
||||
const initial = Math.max(1, state.binInitialSpan);
|
||||
if (initial <= 1) return "100%";
|
||||
|
||||
const remaining = Math.max(0, span - 1);
|
||||
const total = Math.max(1, initial - 1);
|
||||
const pct = Math.max(0, Math.min(100, Math.floor(((total - remaining) / total) * 100)));
|
||||
return `${padLeft(pct, 3)}%`;
|
||||
}
|
||||
|
||||
async function binaryFindLastOk(ctx, baseUrl, loOk, hiMiss, state) {
|
||||
state.phase = "binary";
|
||||
state.loOk = loOk;
|
||||
state.hiMiss = hiMiss;
|
||||
state.binInitialSpan = Math.max(1, hiMiss - loOk);
|
||||
|
||||
while (hiMiss - loOk > 1) {
|
||||
const mid = loOk + Math.floor((hiMiss - loOk) / 2);
|
||||
state.loOk = loOk;
|
||||
state.hiMiss = hiMiss;
|
||||
|
||||
const pm = await probePage(ctx, baseUrl, mid, state);
|
||||
if (pm.ok) loOk = mid;
|
||||
else hiMiss = mid;
|
||||
}
|
||||
|
||||
state.loOk = loOk;
|
||||
state.hiMiss = hiMiss;
|
||||
return loOk;
|
||||
}
|
||||
|
||||
async function discoverTotalPagesFast(ctx, baseUrl, guess, step) {
|
||||
const state = { phase: "pre", loOk: 1, hiMiss: 2, binInitialSpan: 0 };
|
||||
|
||||
const p1 = await probePage(ctx, baseUrl, 1, state);
|
||||
if (!p1.ok) {
|
||||
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Page 1 did not look like a listing. Defaulting to 1.`);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const g = Math.max(2, guess);
|
||||
const pg = await probePage(ctx, baseUrl, g, state);
|
||||
if (!pg.ok) return await binaryFindLastOk(ctx, baseUrl, 1, g, state);
|
||||
|
||||
let lastOk = g;
|
||||
while (true) {
|
||||
const probe = lastOk + step;
|
||||
const pr = await probePage(ctx, baseUrl, probe, state);
|
||||
if (!pr.ok) return await binaryFindLastOk(ctx, baseUrl, lastOk, probe, state);
|
||||
lastOk = probe;
|
||||
if (lastOk > 5000) {
|
||||
ctx.logger.warn(`${ctx.store.name} | ${ctx.cat.label} | Discovery hit safety cap at ${lastOk}. Using that as total pages.`);
|
||||
return lastOk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function discoverAndScanCategory(ctx, prevDb, report) {
|
||||
const { logger, config } = ctx;
|
||||
|
||||
if (typeof ctx.store.scanCategory === "function") {
|
||||
await ctx.store.scanCategory(ctx, prevDb, report);
|
||||
return;
|
||||
}
|
||||
|
||||
const t0 = Date.now();
|
||||
|
||||
const guess = Number.isFinite(ctx.cat.discoveryStartPage) ? ctx.cat.discoveryStartPage : config.discoveryGuess;
|
||||
const step = config.discoveryStep;
|
||||
|
||||
const totalPages = await discoverTotalPagesFast(ctx, ctx.baseUrl, guess, step);
|
||||
const scanPages = config.maxPages === null ? totalPages : Math.min(config.maxPages, totalPages);
|
||||
|
||||
logger.ok(`${ctx.catPrefixOut} | Pages: ${scanPages}${scanPages !== totalPages ? ` (cap from ${totalPages})` : ""}`);
|
||||
|
||||
const pages = [];
|
||||
for (let p = 1; p <= scanPages; p++) pages.push(makePageUrlForCtx(ctx, ctx.baseUrl, p));
|
||||
|
||||
let donePages = 0;
|
||||
|
||||
const perPageItems = await parallelMapStaggered(pages, config.concurrency, config.staggerMs, async (pageUrl, idx) => {
|
||||
const pnum = idx + 1;
|
||||
|
||||
const { text: html, ms, bytes, status, finalUrl } = await ctx.http.fetchTextWithRetry(
|
||||
pageUrl,
|
||||
`page:${ctx.store.key}:${ctx.cat.key}:${pnum}`,
|
||||
ctx.store.ua
|
||||
);
|
||||
|
||||
const parser = ctx.store.parseProducts || config.defaultParseProducts;
|
||||
const itemsRaw = parser(html, ctx, finalUrl);
|
||||
|
||||
const items = [];
|
||||
for (const it of itemsRaw) {
|
||||
if (shouldTrackItem(ctx, finalUrl, it)) items.push(it);
|
||||
}
|
||||
|
||||
donePages++;
|
||||
logProgressLine(
|
||||
logger,
|
||||
ctx,
|
||||
`Page ${pageStr(pnum, pages.length)}`,
|
||||
status ? String(status) : "",
|
||||
status >= 200 && status < 400,
|
||||
pctStr(donePages, pages.length),
|
||||
`items=${padLeft(items.length, 3)} | bytes=${kbStr(bytes)} | ${padRight(ctx.http.inflightStr(), 11)} | ${secStr(ms)}`
|
||||
);
|
||||
|
||||
return items;
|
||||
});
|
||||
|
||||
const discovered = new Map();
|
||||
let dups = 0;
|
||||
for (const arr of perPageItems) {
|
||||
for (const it of arr) {
|
||||
if (discovered.has(it.url)) dups++;
|
||||
discovered.set(it.url, it);
|
||||
}
|
||||
}
|
||||
|
||||
logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered);
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
||||
logger.ok(`${ctx.catPrefixOut} | DB saved: ${logger.dim(ctx.dbFile)} (${dbObj.count} items)`);
|
||||
|
||||
const elapsed = Date.now() - t0;
|
||||
logger.ok(
|
||||
`${ctx.catPrefixOut} | Done in ${secStr(elapsed)}. New=${newItems.length} Updated=${updatedItems.length} Removed=${removedItems.length} Restored=${restoredItems.length} Total(DB)=${merged.size}`
|
||||
);
|
||||
|
||||
report.categories.push({
|
||||
store: ctx.store.name,
|
||||
label: ctx.cat.label,
|
||||
key: ctx.cat.key,
|
||||
dbFile: ctx.dbFile,
|
||||
scannedPages: scanPages,
|
||||
discoveredUnique: discovered.size,
|
||||
newCount: newItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
removedCount: removedItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
elapsedMs: elapsed,
|
||||
});
|
||||
report.totals.newCount += newItems.length;
|
||||
report.totals.updatedCount += updatedItems.length;
|
||||
report.totals.removedCount += removedItems.length;
|
||||
report.totals.restoredCount += restoredItems.length;
|
||||
|
||||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
module.exports = { makeCatPrefixers, buildCategoryContext, loadCategoryDb, discoverAndScanCategory };
|
||||
128
src/tracker/db.js
Normal file
128
src/tracker/db.js
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"use strict";
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const crypto = require("crypto");
|
||||
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { priceToNumber } = require("../utils/price");
|
||||
|
||||
function ensureDir(dir) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
function dbPathFor(key, baseUrl, dbDir) {
|
||||
ensureDir(dbDir);
|
||||
const hash = crypto.createHash("sha1").update(String(baseUrl)).digest("hex").slice(0, 8);
|
||||
const safeKey = String(key).replace(/[^a-zA-Z0-9_-]+/g, "-");
|
||||
return path.join(dbDir, `${safeKey}__${hash}.json`);
|
||||
}
|
||||
|
||||
function readDb(file) {
|
||||
const byUrl = new Map();
|
||||
try {
|
||||
const txt = fs.readFileSync(file, "utf8");
|
||||
const obj = JSON.parse(txt);
|
||||
if (obj && Array.isArray(obj.items)) {
|
||||
for (const it of obj.items) {
|
||||
if (it && typeof it.url === "string" && it.url.startsWith("http")) {
|
||||
byUrl.set(it.url, {
|
||||
name: String(it.name || ""),
|
||||
price: String(it.price || ""),
|
||||
sku: String(it.sku || ""),
|
||||
url: it.url,
|
||||
img: String(it.img || it.image || it.thumb || "").trim(),
|
||||
removed: Boolean(it.removed),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore missing or parse errors
|
||||
}
|
||||
return { byUrl };
|
||||
}
|
||||
|
||||
function writeJsonAtomic(file, obj) {
|
||||
ensureDir(path.dirname(file));
|
||||
const tmp = `${file}.tmp`;
|
||||
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
||||
fs.renameSync(tmp, file);
|
||||
}
|
||||
|
||||
function buildDbObject(ctx, merged) {
|
||||
return {
|
||||
version: 6,
|
||||
store: ctx.store.host,
|
||||
storeLabel: ctx.store.name,
|
||||
category: ctx.cat.key,
|
||||
categoryLabel: ctx.cat.label,
|
||||
source: ctx.baseUrl,
|
||||
updatedAt: new Date().toISOString(),
|
||||
count: merged.size,
|
||||
items: [...merged.values()]
|
||||
.sort((a, b) => (a.name || "").localeCompare(b.name || ""))
|
||||
.map((it) => ({
|
||||
name: it.name,
|
||||
price: it.price || "",
|
||||
sku: normalizeCspc(it.sku) || "",
|
||||
url: it.url,
|
||||
img: String(it.img || "").trim(),
|
||||
removed: Boolean(it.removed),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
function listDbFiles(dbDir) {
|
||||
const out = [];
|
||||
try {
|
||||
for (const ent of fs.readdirSync(dbDir, { withFileTypes: true })) {
|
||||
if (!ent.isFile()) continue;
|
||||
const name = ent.name || "";
|
||||
if (!name.endsWith(".json")) continue;
|
||||
out.push(path.join(dbDir, name));
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function buildCheapestSkuIndexFromAllDbs(dbDir) {
|
||||
const cheapest = new Map(); // sku -> { storeLabel, priceNum }
|
||||
|
||||
for (const file of listDbFiles(dbDir)) {
|
||||
try {
|
||||
const obj = JSON.parse(fs.readFileSync(file, "utf8"));
|
||||
const storeLabel = String(obj?.storeLabel || obj?.store || "");
|
||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||
|
||||
for (const it of items) {
|
||||
if (it?.removed) continue;
|
||||
|
||||
const sku = normalizeCspc(it?.sku || "");
|
||||
if (!sku) continue;
|
||||
|
||||
const p = priceToNumber(it?.price || "");
|
||||
if (!Number.isFinite(p) || p <= 0) continue;
|
||||
|
||||
const prev = cheapest.get(sku);
|
||||
if (!prev || p < prev.priceNum) cheapest.set(sku, { storeLabel, priceNum: p });
|
||||
}
|
||||
} catch {
|
||||
// ignore parse errors
|
||||
}
|
||||
}
|
||||
|
||||
return cheapest;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ensureDir,
|
||||
dbPathFor,
|
||||
readDb,
|
||||
writeJsonAtomic,
|
||||
buildDbObject,
|
||||
listDbFiles,
|
||||
buildCheapestSkuIndexFromAllDbs,
|
||||
};
|
||||
100
src/tracker/merge.js
Normal file
100
src/tracker/merge.js
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"use strict";
|
||||
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { normPrice } = require("../utils/price");
|
||||
|
||||
function normImg(v) {
|
||||
const s = String(v || "").trim();
|
||||
if (!s) return "";
|
||||
if (/^data:/i.test(s)) return "";
|
||||
return s;
|
||||
}
|
||||
|
||||
function mergeDiscoveredIntoDb(prevDb, discovered) {
|
||||
const merged = new Map(prevDb.byUrl);
|
||||
|
||||
const newItems = [];
|
||||
const updatedItems = [];
|
||||
const removedItems = [];
|
||||
const restoredItems = [];
|
||||
|
||||
for (const [url, nowRaw] of discovered.entries()) {
|
||||
const prev = prevDb.byUrl.get(url);
|
||||
|
||||
if (!prev) {
|
||||
const now = {
|
||||
...nowRaw,
|
||||
sku: normalizeCspc(nowRaw.sku),
|
||||
img: normImg(nowRaw.img),
|
||||
removed: false,
|
||||
};
|
||||
newItems.push(now);
|
||||
merged.set(url, now);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev.removed) {
|
||||
const now = {
|
||||
...nowRaw,
|
||||
sku: normalizeCspc(nowRaw.sku) || normalizeCspc(prev.sku),
|
||||
img: normImg(nowRaw.img) || normImg(prev.img),
|
||||
removed: false,
|
||||
};
|
||||
restoredItems.push({
|
||||
url,
|
||||
name: now.name || prev.name || "",
|
||||
price: now.price || prev.price || "",
|
||||
sku: now.sku || "",
|
||||
});
|
||||
merged.set(url, now);
|
||||
continue;
|
||||
}
|
||||
|
||||
const prevPrice = normPrice(prev.price);
|
||||
const nowPrice = normPrice(nowRaw.price);
|
||||
|
||||
const prevSku = normalizeCspc(prev.sku);
|
||||
const nowSku = normalizeCspc(nowRaw.sku) || prevSku;
|
||||
|
||||
const prevImg = normImg(prev.img);
|
||||
let nowImg = normImg(nowRaw.img);
|
||||
if (!nowImg) nowImg = prevImg;
|
||||
|
||||
const nameChanged = String(prev.name || "") !== String(nowRaw.name || "");
|
||||
const priceChanged = prevPrice !== nowPrice;
|
||||
const skuChanged = prevSku !== nowSku;
|
||||
const imgChanged = prevImg !== nowImg;
|
||||
|
||||
if (nameChanged || priceChanged || skuChanged || imgChanged) {
|
||||
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
|
||||
}
|
||||
|
||||
if (priceChanged) {
|
||||
updatedItems.push({
|
||||
url,
|
||||
name: nowRaw.name || prev.name || "",
|
||||
sku: nowSku || "",
|
||||
oldPrice: prev.price || "",
|
||||
newPrice: nowRaw.price || "",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const [url, prev] of prevDb.byUrl.entries()) {
|
||||
if (discovered.has(url)) continue;
|
||||
if (!prev.removed) {
|
||||
const removed = { ...prev, removed: true };
|
||||
merged.set(url, removed);
|
||||
removedItems.push({
|
||||
url,
|
||||
name: prev.name || "",
|
||||
price: prev.price || "",
|
||||
sku: normalizeCspc(prev.sku) || "",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { merged, newItems, updatedItems, removedItems, restoredItems };
|
||||
}
|
||||
|
||||
module.exports = { mergeDiscoveredIntoDb };
|
||||
240
src/tracker/report.js
Normal file
240
src/tracker/report.js
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
"use strict";
|
||||
|
||||
const { C, color } = require("../utils/ansi");
|
||||
const { padLeft, padRight } = require("../utils/string");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
const { priceToNumber, salePctOff } = require("../utils/price");
|
||||
const { buildCheapestSkuIndexFromAllDbs } = require("./db");
|
||||
|
||||
function secStr(ms) {
|
||||
const s = Number.isFinite(ms) ? ms / 1000 : 0;
|
||||
const tenths = Math.round(s * 10) / 10;
|
||||
let out;
|
||||
if (tenths < 10) out = `${tenths.toFixed(1)}s`;
|
||||
else out = `${Math.round(s)}s`;
|
||||
return out.padStart(7, " ");
|
||||
}
|
||||
|
||||
function createReport() {
|
||||
return {
|
||||
startedAt: new Date(),
|
||||
categories: [],
|
||||
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
|
||||
newItems: [],
|
||||
updatedItems: [],
|
||||
removedItems: [],
|
||||
restoredItems: [],
|
||||
};
|
||||
}
|
||||
|
||||
function addCategoryResultToReport(report, storeName, catLabel, newItems, updatedItems, removedItems, restoredItems) {
|
||||
const reportCatLabel = `${storeName} | ${catLabel}`;
|
||||
|
||||
for (const it of newItems) report.newItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
||||
|
||||
for (const it of restoredItems)
|
||||
report.restoredItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
||||
|
||||
for (const u of updatedItems) {
|
||||
report.updatedItems.push({
|
||||
catLabel: reportCatLabel,
|
||||
name: u.name,
|
||||
sku: u.sku || "",
|
||||
oldPrice: u.oldPrice,
|
||||
newPrice: u.newPrice,
|
||||
url: u.url,
|
||||
});
|
||||
}
|
||||
|
||||
for (const it of removedItems)
|
||||
report.removedItems.push({ catLabel: reportCatLabel, name: it.name, price: it.price || "", sku: it.sku || "", url: it.url });
|
||||
}
|
||||
|
||||
function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) {
|
||||
const paint = (s, code) => color(s, code, colorize);
|
||||
const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir);
|
||||
|
||||
const endedAt = new Date();
|
||||
const durMs = endedAt - report.startedAt;
|
||||
|
||||
const storesSet = new Set(report.categories.map((c) => c.store));
|
||||
const totalUnique = report.categories.reduce((acc, c) => acc + (Number.isFinite(c.discoveredUnique) ? c.discoveredUnique : 0), 0);
|
||||
|
||||
let out = "";
|
||||
const ln = (s = "") => {
|
||||
out += String(s) + "\n";
|
||||
};
|
||||
|
||||
ln("");
|
||||
ln(paint("========== REPORT ==========", C.bold));
|
||||
ln(
|
||||
paint("[OK] ", C.green) +
|
||||
`Totals | Stores=${storesSet.size} | Categories=${report.categories.length} | Unique=${totalUnique} | New=${report.totals.newCount} | Restored=${report.totals.restoredCount} | Removed=${report.totals.removedCount} | PriceChanges=${report.totals.updatedCount} | Runtime=${secStr(
|
||||
durMs
|
||||
)}`
|
||||
);
|
||||
ln("");
|
||||
|
||||
ln(paint("Per-category summary:", C.bold));
|
||||
const rows = report.categories.map((c) => ({
|
||||
cat: `${c.store} | ${c.label}`,
|
||||
pages: c.scannedPages,
|
||||
uniq: c.discoveredUnique,
|
||||
newC: c.newCount,
|
||||
resC: c.restoredCount,
|
||||
remC: c.removedCount,
|
||||
updC: c.updatedCount,
|
||||
ms: c.elapsedMs,
|
||||
}));
|
||||
|
||||
const catW = Math.min(48, Math.max(...rows.map((r) => r.cat.length), 8));
|
||||
ln(`${padRight("Store | Category", catW)} ${padLeft("Pages", 5)} ${padLeft("Unique", 6)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)} ${padLeft("Sec", 7)}`);
|
||||
ln(`${"-".repeat(catW)} ----- ------ ---- ---- ---- ---- -------`);
|
||||
for (const r of rows) {
|
||||
ln(
|
||||
`${padRight(r.cat, catW)} ${padLeft(r.pages, 5)} ${padLeft(r.uniq, 6)} ${padLeft(r.newC, 4)} ${padLeft(r.resC, 4)} ${padLeft(r.remC, 4)} ${padLeft(r.updC, 4)} ${secStr(r.ms)}`
|
||||
);
|
||||
}
|
||||
ln("");
|
||||
|
||||
const reportLabelW = Math.max(
|
||||
16,
|
||||
...report.newItems.map((x) => x.catLabel.length),
|
||||
...report.restoredItems.map((x) => x.catLabel.length),
|
||||
...report.updatedItems.map((x) => x.catLabel.length),
|
||||
...report.removedItems.map((x) => x.catLabel.length)
|
||||
);
|
||||
|
||||
function storeFromCatLabel(catLabel) {
|
||||
return String(catLabel || "").split(" | ")[0] || "";
|
||||
}
|
||||
|
||||
function skuInline(sku) {
|
||||
const s = normalizeCspc(sku);
|
||||
return s ? paint(` ${s}`, C.gray) : "";
|
||||
}
|
||||
|
||||
function cheaperAtInline(catLabel, sku, currentPriceStr) {
|
||||
const s = normalizeCspc(sku);
|
||||
if (!s) return "";
|
||||
const best = cheapestSku.get(s);
|
||||
if (!best || !best.storeLabel) return "";
|
||||
const curStore = storeFromCatLabel(catLabel);
|
||||
if (!curStore || best.storeLabel === curStore) return "";
|
||||
const curP = priceToNumber(currentPriceStr);
|
||||
if (!Number.isFinite(curP)) return "";
|
||||
if (best.priceNum >= curP) return "";
|
||||
return paint(` (Cheaper at ${best.storeLabel})`, C.gray);
|
||||
}
|
||||
|
||||
function availableAtInline(catLabel, sku) {
|
||||
const s = normalizeCspc(sku);
|
||||
if (!s) return "";
|
||||
const best = cheapestSku.get(s);
|
||||
if (!best || !best.storeLabel) return "";
|
||||
const curStore = storeFromCatLabel(catLabel);
|
||||
if (curStore && best.storeLabel === curStore) return "";
|
||||
return paint(` (Available at ${best.storeLabel})`, C.gray);
|
||||
}
|
||||
|
||||
if (report.newItems.length) {
|
||||
ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green));
|
||||
for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
const sku = normalizeCspc(it.sku || "");
|
||||
const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || "");
|
||||
ln(
|
||||
`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`
|
||||
);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
} else {
|
||||
ln(paint("NEW LISTINGS (0)", C.bold));
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (report.restoredItems.length) {
|
||||
ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green));
|
||||
for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
const sku = normalizeCspc(it.sku || "");
|
||||
const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || "");
|
||||
ln(
|
||||
`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`
|
||||
);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
} else {
|
||||
ln(paint("RESTORED (0)", C.bold));
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (report.removedItems.length) {
|
||||
ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow));
|
||||
for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
const sku = normalizeCspc(it.sku || "");
|
||||
const availTag = availableAtInline(it.catLabel, sku);
|
||||
ln(
|
||||
`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`
|
||||
);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
} else {
|
||||
ln(paint("REMOVED (0)", C.bold));
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (report.updatedItems.length) {
|
||||
ln(paint(`PRICE CHANGES (${report.updatedItems.length})`, C.bold + C.cyan));
|
||||
|
||||
for (const u of report.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const oldRaw = u.oldPrice || "";
|
||||
const newRaw = u.newPrice || "";
|
||||
|
||||
const oldN = priceToNumber(oldRaw);
|
||||
const newN = priceToNumber(newRaw);
|
||||
|
||||
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
||||
|
||||
let newP = newRaw ? newRaw : "(no price)";
|
||||
let offTag = "";
|
||||
|
||||
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
||||
if (newN > oldN) {
|
||||
newP = paint(newP, C.red); // increase
|
||||
} else if (newN < oldN) {
|
||||
newP = paint(newP, C.green); // decrease
|
||||
const pct = salePctOff(oldRaw, newRaw);
|
||||
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
||||
} else {
|
||||
newP = paint(newP, C.cyan);
|
||||
}
|
||||
} else {
|
||||
newP = paint(newP, C.cyan);
|
||||
}
|
||||
|
||||
const sku = normalizeCspc(u.sku || "");
|
||||
const cheapTag = cheaperAtInline(u.catLabel, sku, newRaw || "");
|
||||
|
||||
ln(
|
||||
`${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}`
|
||||
);
|
||||
ln(` ${paint(u.url, C.dim)}`);
|
||||
}
|
||||
|
||||
ln("");
|
||||
} else {
|
||||
ln(paint("PRICE CHANGES (0)", C.bold));
|
||||
ln("");
|
||||
}
|
||||
|
||||
ln(paint("======== END REPORT ========", C.bold));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
module.exports = { createReport, addCategoryResultToReport, renderFinalReport };
|
||||
72
src/tracker/run_all.js
Normal file
72
src/tracker/run_all.js
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"use strict";
|
||||
|
||||
const { createReport } = require("./report");
|
||||
const { parallelMapStaggered } = require("../utils/async");
|
||||
|
||||
const {
|
||||
makeCatPrefixers,
|
||||
buildCategoryContext,
|
||||
loadCategoryDb,
|
||||
discoverAndScanCategory,
|
||||
} = require("./category_scan");
|
||||
|
||||
// Some sites will intermittently 403/429. We don't want a single category/store
|
||||
// to abort the entire run. Log and continue.
|
||||
function formatErr(e) {
|
||||
if (!e) return "Unknown error";
|
||||
if (typeof e === "string") return e;
|
||||
if (e.stack) return e.stack;
|
||||
return String(e);
|
||||
}
|
||||
|
||||
async function runAllStores(stores, { config, logger, http }) {
|
||||
const report = createReport();
|
||||
const { catPrefixOut } = makeCatPrefixers(stores, logger);
|
||||
|
||||
logger.info(`Debug=on`);
|
||||
logger.info(
|
||||
`Concurrency=${config.concurrency} StaggerMs=${config.staggerMs} Retries=${config.maxRetries} TimeoutMs=${config.timeoutMs}`
|
||||
);
|
||||
logger.info(
|
||||
`DiscoveryGuess=${config.discoveryGuess} DiscoveryStep=${config.discoveryStep}`
|
||||
);
|
||||
logger.info(`MaxPages=${config.maxPages === null ? "none" : config.maxPages}`);
|
||||
logger.info(`CategoryConcurrency=${config.categoryConcurrency}`);
|
||||
|
||||
const workItems = [];
|
||||
for (const store of stores) {
|
||||
for (const cat of store.categories) {
|
||||
const baseCtx = buildCategoryContext(store, cat, catPrefixOut, config);
|
||||
const ctx = { ...baseCtx, config, logger, http };
|
||||
const prevDb = loadCategoryDb(logger, ctx);
|
||||
workItems.push({ ctx, prevDb });
|
||||
}
|
||||
}
|
||||
|
||||
await parallelMapStaggered(
|
||||
workItems,
|
||||
Math.min(config.categoryConcurrency, workItems.length),
|
||||
0,
|
||||
async (w) => {
|
||||
try {
|
||||
await discoverAndScanCategory(w.ctx, w.prevDb, report);
|
||||
} catch (e) {
|
||||
const storeName = w?.ctx?.store?.name || w?.ctx?.store?.host || "unknown-store";
|
||||
const catLabel = w?.ctx?.cat?.label || w?.ctx?.cat?.key || "unknown-category";
|
||||
|
||||
// Keep it loud in logs, but do not fail the entire run.
|
||||
logger.warn(
|
||||
`Category failed (continuing): ${storeName} | ${catLabel}\n${formatErr(e)}`
|
||||
);
|
||||
|
||||
// If you want failures surfaced in the final report later, you could also
|
||||
// push a "failed category" record onto report.categories here.
|
||||
}
|
||||
return null;
|
||||
}
|
||||
);
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
module.exports = { runAllStores };
|
||||
19
src/utils/ansi.js
Normal file
19
src/utils/ansi.js
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
"use strict";
|
||||
|
||||
const C = {
|
||||
reset: "\x1b[0m",
|
||||
dim: "\x1b[2m",
|
||||
bold: "\x1b[1m",
|
||||
red: "\x1b[31m",
|
||||
green: "\x1b[32m",
|
||||
yellow: "\x1b[33m",
|
||||
cyan: "\x1b[36m",
|
||||
gray: "\x1b[90m",
|
||||
};
|
||||
|
||||
function color(s, code, enabled) {
|
||||
if (!enabled) return String(s);
|
||||
return String(code || "") + String(s) + C.reset;
|
||||
}
|
||||
|
||||
module.exports = { C, color };
|
||||
86
src/utils/args.js
Normal file
86
src/utils/args.js
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
"use strict";
|
||||
|
||||
function clampInt(v, def, min, max) {
|
||||
if (def === null && (v === null || v === undefined)) return null;
|
||||
const n = Number.parseInt(v ?? "", 10);
|
||||
if (!Number.isFinite(n)) return def;
|
||||
return Math.max(min, Math.min(max, n));
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
let debug = false;
|
||||
let maxPages = null;
|
||||
let concurrency = null;
|
||||
let staggerMs = null;
|
||||
let guess = null;
|
||||
let step = null;
|
||||
let dataDir = null;
|
||||
let reportDir = null;
|
||||
|
||||
const positional = [];
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
|
||||
if (a === "--debug" || a === "-d") {
|
||||
debug = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--max-pages" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
maxPages = clampInt(argv[i + 1], null, 1, 5000);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--concurrency" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
concurrency = clampInt(argv[i + 1], null, 1, 64);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((a === "--stagger-ms" || a === "--staggerMs") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
staggerMs = clampInt(argv[i + 1], null, 0, 5000);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--guess" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
guess = clampInt(argv[i + 1], null, 1, 5000);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--step" && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
step = clampInt(argv[i + 1], null, 1, 500);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((a === "--data-dir" || a === "--dataDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
dataDir = String(argv[i + 1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((a === "--report-dir" || a === "--reportDir") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
reportDir = String(argv[i + 1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!String(a).startsWith("-")) positional.push(a);
|
||||
}
|
||||
|
||||
if (maxPages === null) {
|
||||
const cand = positional.find((x) => /^\d+$/.test(String(x)));
|
||||
if (cand) {
|
||||
const n = Number.parseInt(cand, 10);
|
||||
if (Number.isFinite(n) && n > 0) maxPages = Math.min(n, 5000);
|
||||
}
|
||||
}
|
||||
|
||||
return { maxPages, debug, concurrency, staggerMs, guess, step, dataDir, reportDir };
|
||||
}
|
||||
|
||||
module.exports = { clampInt, parseArgs };
|
||||
26
src/utils/async.js
Normal file
26
src/utils/async.js
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
"use strict";
|
||||
|
||||
const { setTimeout: sleep } = require("timers/promises");
|
||||
|
||||
async function parallelMapStaggered(arr, concurrency, staggerMs, fn) {
|
||||
const out = new Array(arr.length);
|
||||
let next = 0;
|
||||
|
||||
async function worker(workerId) {
|
||||
if (staggerMs > 0 && workerId > 1) await sleep(staggerMs * (workerId - 1));
|
||||
while (true) {
|
||||
const i = next++;
|
||||
if (i >= arr.length) return;
|
||||
if (staggerMs > 0 && i > 0) await sleep(staggerMs);
|
||||
out[i] = await fn(arr[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
const w = Math.min(concurrency, arr.length);
|
||||
const workers = [];
|
||||
for (let i = 0; i < w; i++) workers.push(worker(i + 1));
|
||||
await Promise.all(workers);
|
||||
return out;
|
||||
}
|
||||
|
||||
module.exports = { parallelMapStaggered };
|
||||
12
src/utils/bytes.js
Normal file
12
src/utils/bytes.js
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"use strict";
|
||||
|
||||
function humanBytes(n) {
|
||||
if (!Number.isFinite(n) || n <= 0) return "0B";
|
||||
if (n < 1024) return `${n}B`;
|
||||
const kb = n / 1024;
|
||||
if (kb < 1024) return `${kb.toFixed(1)}KB`;
|
||||
const mb = kb / 1024;
|
||||
return `${mb.toFixed(1)}MB`;
|
||||
}
|
||||
|
||||
module.exports = { humanBytes };
|
||||
111
src/utils/html.js
Normal file
111
src/utils/html.js
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"use strict";
|
||||
|
||||
function stripTags(s) {
|
||||
return String(s).replace(/<[^>]*>/g, "");
|
||||
}
|
||||
|
||||
function cleanText(s) {
|
||||
return String(s)
|
||||
.replace(/<[^>]+>/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function decodeHtml(s) {
|
||||
return String(s)
|
||||
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)))
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/ /g, " ")
|
||||
.replace(/«/g, "«")
|
||||
.replace(/»/g, "»");
|
||||
}
|
||||
|
||||
function escapeRe(s) {
|
||||
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function extractHtmlAttr(html, attrName) {
|
||||
const re = new RegExp(
|
||||
`\\b${escapeRe(attrName)}\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s>]+))`,
|
||||
"i"
|
||||
);
|
||||
const m = re.exec(html);
|
||||
if (!m) return "";
|
||||
return m[1] ?? m[2] ?? m[3] ?? "";
|
||||
}
|
||||
|
||||
function pickFirstUrlFromSrcset(srcset) {
|
||||
const s = String(srcset || "").trim();
|
||||
if (!s) return "";
|
||||
const first = (s.split(",")[0] || "").trim();
|
||||
const url = (first.split(/\s+/)[0] || "").trim();
|
||||
return url.replace(/^["']|["']$/g, "");
|
||||
}
|
||||
|
||||
function normalizeMaybeRelativeUrl(raw, baseUrl) {
|
||||
const r = String(raw || "").trim();
|
||||
if (!r) return "";
|
||||
let u = r;
|
||||
if (u.startsWith("//")) u = `https:${u}`;
|
||||
try {
|
||||
return baseUrl ? new URL(u, baseUrl).toString() : new URL(u).toString();
|
||||
} catch {
|
||||
return u;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort thumbnail extractor for listing HTML blocks.
|
||||
* Returns absolute URL when baseUrl is provided.
|
||||
*/
|
||||
function extractFirstImgUrl(html, baseUrl) {
|
||||
const s = String(html || "");
|
||||
const m = s.match(/<img\b[^>]*>/i);
|
||||
if (!m) return "";
|
||||
|
||||
const tag = m[0];
|
||||
|
||||
const attrs = [
|
||||
"data-src",
|
||||
"data-lazy-src",
|
||||
"data-original",
|
||||
"data-srcset",
|
||||
"srcset",
|
||||
"src",
|
||||
];
|
||||
|
||||
for (const a of attrs) {
|
||||
let v = extractHtmlAttr(tag, a);
|
||||
if (!v) continue;
|
||||
|
||||
v = decodeHtml(String(v)).trim();
|
||||
if (!v) continue;
|
||||
|
||||
if (a.toLowerCase().includes("srcset")) v = pickFirstUrlFromSrcset(v);
|
||||
v = String(v || "").trim();
|
||||
if (!v) continue;
|
||||
|
||||
// Skip data URIs
|
||||
if (/^data:/i.test(v)) continue;
|
||||
|
||||
const abs = normalizeMaybeRelativeUrl(v, baseUrl);
|
||||
if (abs) return abs;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
stripTags,
|
||||
cleanText,
|
||||
decodeHtml,
|
||||
escapeRe,
|
||||
extractHtmlAttr,
|
||||
extractFirstImgUrl,
|
||||
};
|
||||
21
src/utils/price.js
Normal file
21
src/utils/price.js
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"use strict";
|
||||
|
||||
function normPrice(p) {
|
||||
return String(p || "").trim().replace(/\s+/g, "");
|
||||
}
|
||||
|
||||
function priceToNumber(p) {
|
||||
const s = String(p || "");
|
||||
const n = Number(s.replace(/[^0-9.]/g, ""));
|
||||
return Number.isFinite(n) ? n : NaN;
|
||||
}
|
||||
|
||||
function salePctOff(oldPriceStr, newPriceStr) {
|
||||
const oldN = priceToNumber(oldPriceStr);
|
||||
const newN = priceToNumber(newPriceStr);
|
||||
if (!Number.isFinite(oldN) || !Number.isFinite(newN) || oldN <= 0) return null;
|
||||
if (newN >= oldN) return null;
|
||||
return Math.round(((oldN - newN) / oldN) * 100);
|
||||
}
|
||||
|
||||
module.exports = { normPrice, priceToNumber, salePctOff };
|
||||
9
src/utils/sku.js
Normal file
9
src/utils/sku.js
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
"use strict";
|
||||
|
||||
// Alberta CSPC / product code is 6 digits. Some stores label it "SKU".
|
||||
function normalizeCspc(v) {
|
||||
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
module.exports = { normalizeCspc };
|
||||
29
src/utils/string.js
Normal file
29
src/utils/string.js
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"use strict";
|
||||
|
||||
function padRight(s, n) {
|
||||
s = String(s);
|
||||
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
||||
}
|
||||
|
||||
function padLeft(s, n) {
|
||||
s = String(s);
|
||||
return s.length >= n ? s : " ".repeat(n - s.length) + s;
|
||||
}
|
||||
|
||||
function stripAnsi(s) {
|
||||
return String(s).replace(/\x1b\[[0-9;]*m/g, "");
|
||||
}
|
||||
|
||||
function padRightV(s, n) {
|
||||
s = String(s);
|
||||
const w = stripAnsi(s).length;
|
||||
return w >= n ? s : s + " ".repeat(n - w);
|
||||
}
|
||||
|
||||
function padLeftV(s, n) {
|
||||
s = String(s);
|
||||
const w = stripAnsi(s).length;
|
||||
return w >= n ? s : " ".repeat(n - w) + s;
|
||||
}
|
||||
|
||||
module.exports = { padRight, padLeft, stripAnsi, padRightV, padLeftV };
|
||||
13
src/utils/text.js
Normal file
13
src/utils/text.js
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"use strict";
|
||||
|
||||
const { cleanText, decodeHtml } = require("./html");
|
||||
|
||||
function sanitizeName(s) {
|
||||
return cleanText(decodeHtml(String(s || "")))
|
||||
.replace(/['"’“”`´]/g, "")
|
||||
.replace(/[^\p{L}\p{N}\s\-&().,/]/gu, "")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
module.exports = { sanitizeName };
|
||||
16
src/utils/time.js
Normal file
16
src/utils/time.js
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"use strict";
|
||||
|
||||
function ts(d = new Date()) {
|
||||
const h = String(d.getHours()).padStart(2, "0");
|
||||
const m = String(d.getMinutes()).padStart(2, "0");
|
||||
const s = String(d.getSeconds()).padStart(2, "0");
|
||||
const ms = String(d.getMilliseconds()).padStart(3, "0");
|
||||
return `${h}:${m}:${s}.${ms}`;
|
||||
}
|
||||
|
||||
function isoTimestampFileSafe(d = new Date()) {
|
||||
// 2026-01-16T21-27-01Z
|
||||
return d.toISOString().replace(/:/g, "-").replace(/\.\d{3}Z$/, "Z");
|
||||
}
|
||||
|
||||
module.exports = { ts, isoTimestampFileSafe };
|
||||
50
src/utils/url.js
Normal file
50
src/utils/url.js
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
"use strict";
|
||||
|
||||
function normalizeBaseUrl(startUrl) {
|
||||
try {
|
||||
const u = new URL(startUrl);
|
||||
u.hash = "";
|
||||
if (u.searchParams && u.searchParams.has("page")) u.searchParams.delete("page");
|
||||
u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : "";
|
||||
|
||||
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
||||
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
||||
return u.toString();
|
||||
} catch {
|
||||
return startUrl;
|
||||
}
|
||||
}
|
||||
|
||||
function makePageUrl(baseUrl, pageNum) {
|
||||
if (pageNum <= 1) return normalizeBaseUrl(baseUrl);
|
||||
const u = new URL(baseUrl);
|
||||
if (!u.pathname.endsWith("/")) u.pathname += "/";
|
||||
u.pathname = u.pathname.replace(/\/page\/\d+\/?$/, "/");
|
||||
u.pathname = u.pathname + `page/${pageNum}/`;
|
||||
u.hash = "";
|
||||
return u.toString();
|
||||
}
|
||||
|
||||
function makePageUrlForCtx(ctx, baseUrl, pageNum) {
|
||||
const fn = ctx?.store?.makePageUrl;
|
||||
return typeof fn === "function" ? fn(baseUrl, pageNum) : makePageUrl(baseUrl, pageNum);
|
||||
}
|
||||
|
||||
function makePageUrlQueryParam(baseUrl, paramName, pageNum) {
|
||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||
u.hash = "";
|
||||
if (pageNum <= 1) u.searchParams.set(paramName, "1");
|
||||
else u.searchParams.set(paramName, String(pageNum));
|
||||
u.search = `?${u.searchParams.toString()}`;
|
||||
return u.toString();
|
||||
}
|
||||
|
||||
function makePageUrlShopifyQueryPage(baseUrl, pageNum) {
|
||||
const u = new URL(normalizeBaseUrl(baseUrl));
|
||||
u.hash = "";
|
||||
u.searchParams.set("page", String(Math.max(1, pageNum)));
|
||||
u.search = `?${u.searchParams.toString()}`;
|
||||
return u.toString();
|
||||
}
|
||||
|
||||
module.exports = { normalizeBaseUrl, makePageUrl, makePageUrlForCtx, makePageUrlQueryParam, makePageUrlShopifyQueryPage };
|
||||
57
src/utils/woocommerce.js
Normal file
57
src/utils/woocommerce.js
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
"use strict";
|
||||
|
||||
const { decodeHtml, stripTags, cleanText } = require("./html");
|
||||
|
||||
/**
|
||||
* Extracts the *effective* price from Woo price blocks.
|
||||
* - If sale <ins> exists, uses the last <ins> (sale price)
|
||||
* - Else uses the normal price bdi/span content.
|
||||
*/
|
||||
function extractPriceFromTmbBlock(block) {
|
||||
const span = matchFirstPriceSpan(block);
|
||||
if (!span) return "";
|
||||
|
||||
const insMatches = [...span.matchAll(/<ins\b[^>]*>([\s\S]*?)<\/ins>/gi)];
|
||||
const scope = insMatches.length ? insMatches[insMatches.length - 1][1] : span;
|
||||
|
||||
const bdis = [...scope.matchAll(/<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi)];
|
||||
if (bdis.length) {
|
||||
const raw = cleanText(decodeHtml(stripTags(bdis[bdis.length - 1][1]))).replace(/\s+/g, "");
|
||||
if (raw) return raw.startsWith("$") ? raw : `$${raw}`;
|
||||
}
|
||||
|
||||
const sym = scope.match(/woocommerce-Price-currencySymbol[^>]*>\s*([^<\s]+)/i);
|
||||
const text = cleanText(decodeHtml(stripTags(scope)));
|
||||
const num = text.match(/(\d+(?:\.\d{2})?)/);
|
||||
if (sym && num) return `${sym[1].trim()}${num[1]}`;
|
||||
|
||||
const m = cleanText(decodeHtml(stripTags(scope))).match(/\$\s*\d+(?:\.\d{2})?/);
|
||||
return m ? m[0].replace(/\s+/g, "") : "";
|
||||
}
|
||||
|
||||
function matchFirstPriceSpan(html) {
|
||||
const re = /<span\b[^>]*class=["'][^"']*\bprice\b[^"']*["'][^>]*>/i;
|
||||
const m = re.exec(html);
|
||||
if (!m) return "";
|
||||
const start = m.index + m[0].length;
|
||||
|
||||
let i = start;
|
||||
let depth = 1;
|
||||
while (i < html.length) {
|
||||
const nextOpen = html.indexOf("<span", i);
|
||||
const nextClose = html.indexOf("</span>", i);
|
||||
if (nextClose === -1) break;
|
||||
|
||||
if (nextOpen !== -1 && nextOpen < nextClose) {
|
||||
depth++;
|
||||
i = nextOpen + 5;
|
||||
continue;
|
||||
}
|
||||
depth--;
|
||||
if (depth === 0) return html.slice(start, nextClose);
|
||||
i = nextClose + 7;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
module.exports = { extractPriceFromTmbBlock };
|
||||
93
tools/build_viz_commits.js
Executable file
93
tools/build_viz_commits.js
Executable file
|
|
@ -0,0 +1,93 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const { execFileSync } = require("child_process");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
function runGit(args) {
|
||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||
}
|
||||
|
||||
function listDbFiles(dbDir) {
|
||||
try {
|
||||
return fs
|
||||
.readdirSync(dbDir, { withFileTypes: true })
|
||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||
.map((e) => path.join(dbDir, e.name));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function dateOnly(iso) {
|
||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function main() {
|
||||
const repoRoot = process.cwd();
|
||||
const dbDir = path.join(repoRoot, "data", "db");
|
||||
const outDir = path.join(repoRoot, "viz", "data");
|
||||
const outFile = path.join(outDir, "db_commits.json");
|
||||
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
const files = listDbFiles(dbDir).map((abs) => path.posix.join("data/db", path.basename(abs)));
|
||||
|
||||
const payload = {
|
||||
generatedAt: new Date().toISOString(),
|
||||
branch: "data",
|
||||
files: {},
|
||||
};
|
||||
|
||||
// We want the viz to show ONE point per day (the most recent run that day).
|
||||
// So we collapse multiple commits per day down to the newest commit for that date.
|
||||
//
|
||||
// With multiple runs/day, we also want to keep a long-ish daily history.
|
||||
// Raw commits per day could be ~4, so grab a larger raw window and then collapse.
|
||||
const MAX_RAW_PER_FILE = 2400; // ~600 days @ 4 runs/day
|
||||
const MAX_DAYS_PER_FILE = 600; // daily points kept after collapsing
|
||||
|
||||
for (const rel of files.sort()) {
|
||||
let txt = "";
|
||||
try {
|
||||
// %H = sha, %cI = committer date strict ISO 8601 (includes time + tz)
|
||||
txt = runGit(["log", "--format=%H %cI", `-${MAX_RAW_PER_FILE}`, "--", rel]);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lines = txt.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
|
||||
// git log is newest -> oldest.
|
||||
// Keep the FIRST commit we see for each date (that is the most recent commit for that date).
|
||||
const byDate = new Map(); // date -> { sha, date, ts }
|
||||
for (const line of lines) {
|
||||
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
||||
if (!m) continue;
|
||||
|
||||
const sha = m[1];
|
||||
const ts = m[2];
|
||||
const d = dateOnly(ts);
|
||||
if (!d) continue;
|
||||
|
||||
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
|
||||
}
|
||||
|
||||
// Convert to oldest -> newest
|
||||
let arr = [...byDate.values()].reverse();
|
||||
|
||||
// Keep only the newest MAX_DAYS_PER_FILE (still oldest -> newest)
|
||||
if (arr.length > MAX_DAYS_PER_FILE) {
|
||||
arr = arr.slice(arr.length - MAX_DAYS_PER_FILE);
|
||||
}
|
||||
|
||||
payload.files[rel] = arr;
|
||||
}
|
||||
|
||||
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
||||
process.stdout.write(`Wrote ${outFile} (${Object.keys(payload.files).length} files)\n`);
|
||||
}
|
||||
|
||||
main();
|
||||
105
tools/build_viz_index.js
Executable file
105
tools/build_viz_index.js
Executable file
|
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
function ensureDir(dir) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
function listJsonFiles(dir) {
|
||||
const out = [];
|
||||
try {
|
||||
for (const ent of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
if (!ent.isFile()) continue;
|
||||
if (!String(ent.name || "").endsWith(".json")) continue;
|
||||
out.push(path.join(dir, ent.name));
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function readJson(file) {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(file, "utf8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
const repoRoot = path.resolve(__dirname, "..");
|
||||
const dbDir = path.join(repoRoot, "data", "db");
|
||||
const outDir = path.join(repoRoot, "viz", "data");
|
||||
const outFile = path.join(outDir, "index.json");
|
||||
|
||||
ensureDir(outDir);
|
||||
|
||||
const items = [];
|
||||
|
||||
for (const file of listJsonFiles(dbDir)) {
|
||||
const obj = readJson(file);
|
||||
if (!obj) continue;
|
||||
|
||||
const store = String(obj.store || "");
|
||||
const storeLabel = String(obj.storeLabel || store || "");
|
||||
const category = String(obj.category || "");
|
||||
const categoryLabel = String(obj.categoryLabel || "");
|
||||
const source = String(obj.source || "");
|
||||
const updatedAt = String(obj.updatedAt || "");
|
||||
|
||||
const dbFile = path
|
||||
.relative(repoRoot, file)
|
||||
.replace(/\\/g, "/"); // for GitHub raw paths on Windows too
|
||||
|
||||
const arr = Array.isArray(obj.items) ? obj.items : [];
|
||||
for (const it of arr) {
|
||||
if (!it || it.removed) continue;
|
||||
|
||||
const sku = String(it.sku || "").trim();
|
||||
const name = String(it.name || "").trim();
|
||||
const price = String(it.price || "").trim();
|
||||
const url = String(it.url || "").trim();
|
||||
const img = String(it.img || it.image || it.thumb || "").trim();
|
||||
|
||||
items.push({
|
||||
sku,
|
||||
name,
|
||||
price,
|
||||
url,
|
||||
img,
|
||||
store,
|
||||
storeLabel,
|
||||
category,
|
||||
categoryLabel,
|
||||
source,
|
||||
updatedAt,
|
||||
dbFile,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
items.sort((a, b) => {
|
||||
const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`;
|
||||
const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`;
|
||||
return ak.localeCompare(bk);
|
||||
});
|
||||
|
||||
const outObj = {
|
||||
generatedAt: new Date().toISOString(),
|
||||
count: items.length,
|
||||
items,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outFile, JSON.stringify(outObj, null, 2) + "\n", "utf8");
|
||||
process.stdout.write(`Wrote ${path.relative(repoRoot, outFile)} (${items.length} rows)\n`);
|
||||
}
|
||||
|
||||
module.exports = { main };
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
393
tools/build_viz_recent.js
Executable file
393
tools/build_viz_recent.js
Executable file
|
|
@ -0,0 +1,393 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const { execFileSync } = require("child_process");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
function runGit(args) {
|
||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||
}
|
||||
|
||||
function gitShowJson(sha, filePath) {
|
||||
try {
|
||||
const txt = execFileSync("git", ["show", `${sha}:${filePath}`], {
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "pipe"], // silence git fatal spam
|
||||
});
|
||||
return JSON.parse(txt);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function gitListTreeFiles(sha, dirRel) {
|
||||
try {
|
||||
const out = runGit(["ls-tree", "-r", "--name-only", sha, dirRel]);
|
||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function readJsonFileOrNull(filePath) {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeCspc(v) {
|
||||
const m = String(v ?? "").match(/\b(\d{6})\b/);
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function normPriceStr(p) {
|
||||
return String(p ?? "").trim();
|
||||
}
|
||||
|
||||
function priceToNumber(v) {
|
||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||
const n = Number(s);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
|
||||
function dateOnly(iso) {
|
||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
|
||||
const m = new Map();
|
||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||
for (const it of items) {
|
||||
if (!it) continue;
|
||||
const sku = normalizeCspc(it.sku);
|
||||
if (!sku) continue;
|
||||
const removed = Boolean(it.removed);
|
||||
if (!includeRemoved && removed) continue;
|
||||
m.set(sku, {
|
||||
sku,
|
||||
name: String(it.name || ""),
|
||||
price: String(it.price || ""),
|
||||
url: String(it.url || ""),
|
||||
removed,
|
||||
});
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
function diffDb(prevObj, nextObj) {
|
||||
const prevAll = mapBySku(prevObj, { includeRemoved: true });
|
||||
const nextAll = mapBySku(nextObj, { includeRemoved: true });
|
||||
|
||||
const prevLive = mapBySku(prevObj, { includeRemoved: false });
|
||||
const nextLive = mapBySku(nextObj, { includeRemoved: false });
|
||||
|
||||
const newItems = [];
|
||||
const restoredItems = [];
|
||||
const removedItems = [];
|
||||
const priceChanges = [];
|
||||
|
||||
// NEW + RESTORED
|
||||
for (const [sku, now] of nextLive.entries()) {
|
||||
const had = prevAll.get(sku);
|
||||
if (!had) {
|
||||
newItems.push({ ...now });
|
||||
continue;
|
||||
}
|
||||
if (had.removed) {
|
||||
restoredItems.push({ ...now });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// REMOVED
|
||||
for (const [sku, was] of prevLive.entries()) {
|
||||
const nxt = nextAll.get(sku);
|
||||
if (!nxt || nxt.removed) {
|
||||
removedItems.push({ ...was });
|
||||
}
|
||||
}
|
||||
|
||||
// PRICE CHANGES
|
||||
for (const [sku, now] of nextLive.entries()) {
|
||||
const was = prevLive.get(sku);
|
||||
if (!was) continue;
|
||||
|
||||
const a = normPriceStr(was.price);
|
||||
const b = normPriceStr(now.price);
|
||||
if (a === b) continue;
|
||||
|
||||
const aN = priceToNumber(a);
|
||||
const bN = priceToNumber(b);
|
||||
|
||||
let kind = "price_change";
|
||||
if (aN !== null && bN !== null) {
|
||||
if (bN < aN) kind = "price_down";
|
||||
else if (bN > aN) kind = "price_up";
|
||||
else kind = "price_change";
|
||||
}
|
||||
|
||||
priceChanges.push({
|
||||
kind,
|
||||
sku,
|
||||
name: now.name || was.name || "",
|
||||
oldPrice: a,
|
||||
newPrice: b,
|
||||
url: now.url || was.url || "",
|
||||
});
|
||||
}
|
||||
|
||||
return { newItems, restoredItems, removedItems, priceChanges };
|
||||
}
|
||||
|
||||
function getHeadShaOrEmpty() {
|
||||
try {
|
||||
return runGit(["rev-parse", "--verify", "HEAD"]);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function firstParentSha(sha) {
|
||||
try {
|
||||
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
|
||||
const parts = out.split(/\s+/).filter(Boolean);
|
||||
// parts[0] is sha, parts[1] is first parent (if any)
|
||||
return parts.length >= 2 ? parts[1] : "";
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function listChangedDbFiles(fromSha, toSha) {
|
||||
// toSha can be "WORKTREE"
|
||||
if (!fromSha && toSha && toSha !== "WORKTREE") {
|
||||
return gitListTreeFiles(toSha, "data/db");
|
||||
}
|
||||
|
||||
if (!fromSha && toSha === "WORKTREE") {
|
||||
// Fall back: list files on disk
|
||||
try {
|
||||
return fs
|
||||
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
|
||||
.filter((e) => e.isFile() && e.name.endsWith(".json"))
|
||||
.map((e) => path.posix.join("data/db", e.name));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (toSha === "WORKTREE") {
|
||||
const out = runGit(["diff", "--name-only", fromSha, "--", "data/db"]);
|
||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
}
|
||||
const out = runGit(["diff", "--name-only", fromSha, toSha, "--", "data/db"]);
|
||||
return out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function logDbCommitsSince(sinceIso) {
|
||||
try {
|
||||
const out = runGit(["log", `--since=${sinceIso}`, "--format=%H %cI", "--", "data/db"]);
|
||||
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
const arr = [];
|
||||
for (const line of lines) {
|
||||
const m = line.match(/^([0-9a-f]{7,40})\s+(.+)$/i);
|
||||
if (!m) continue;
|
||||
const sha = m[1];
|
||||
const ts = m[2];
|
||||
const d = dateOnly(ts);
|
||||
arr.push({ sha, ts, date: d });
|
||||
}
|
||||
// newest -> oldest from git; convert to oldest -> newest
|
||||
arr.reverse();
|
||||
return arr;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
const repoRoot = process.cwd();
|
||||
const outDir = path.join(repoRoot, "viz", "data");
|
||||
const outFile = path.join(outDir, "recent.json");
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
const windowDays = Math.max(1, Number(process.env.RECENT_DAYS || 3));
|
||||
const maxItems = Math.max(1, Number(process.env.RECENT_MAX_ITEMS || 500));
|
||||
|
||||
const now = new Date();
|
||||
const since = new Date(now.getTime() - windowDays * 24 * 3600 * 1000);
|
||||
const sinceIso = since.toISOString();
|
||||
|
||||
const headSha = getHeadShaOrEmpty();
|
||||
const items = [];
|
||||
|
||||
// Collect committed runs in the last N days (touching data/db)
|
||||
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
|
||||
|
||||
// Build diff pairs:
|
||||
// parent(of first in window) -> first
|
||||
// then each consecutive commit -> next
|
||||
// then HEAD -> WORKTREE (so this run shows up before the commit exists)
|
||||
const pairs = [];
|
||||
|
||||
if (commits.length) {
|
||||
const first = commits[0];
|
||||
const parent = firstParentSha(first.sha);
|
||||
pairs.push({
|
||||
fromSha: parent || "",
|
||||
toSha: first.sha,
|
||||
ts: first.ts,
|
||||
date: first.date,
|
||||
});
|
||||
|
||||
for (let i = 1; i < commits.length; i++) {
|
||||
pairs.push({
|
||||
fromSha: commits[i - 1].sha,
|
||||
toSha: commits[i].sha,
|
||||
ts: commits[i].ts,
|
||||
date: commits[i].date,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (headSha) {
|
||||
pairs.push({
|
||||
fromSha: headSha,
|
||||
toSha: "WORKTREE",
|
||||
ts: now.toISOString(),
|
||||
date: dateOnly(now.toISOString()),
|
||||
});
|
||||
}
|
||||
|
||||
for (const p of pairs) {
|
||||
const fromSha = p.fromSha;
|
||||
const toSha = p.toSha;
|
||||
const ts = p.ts;
|
||||
const d = p.date;
|
||||
|
||||
const files = listChangedDbFiles(fromSha, toSha);
|
||||
if (!files.length) continue;
|
||||
|
||||
for (const file of files) {
|
||||
let prevObj = null;
|
||||
let nextObj = null;
|
||||
|
||||
if (toSha === "WORKTREE") {
|
||||
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
||||
nextObj = readJsonFileOrNull(path.join(repoRoot, file));
|
||||
} else {
|
||||
prevObj = fromSha ? gitShowJson(fromSha, file) : null;
|
||||
nextObj = gitShowJson(toSha, file);
|
||||
}
|
||||
|
||||
if (!prevObj && !nextObj) continue;
|
||||
|
||||
const storeLabel = String(
|
||||
nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || ""
|
||||
);
|
||||
const categoryLabel = String(
|
||||
nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || ""
|
||||
);
|
||||
|
||||
const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
|
||||
|
||||
for (const it of newItems) {
|
||||
items.push({
|
||||
ts,
|
||||
date: d,
|
||||
fromSha: fromSha || "",
|
||||
toSha,
|
||||
kind: "new",
|
||||
sku: it.sku,
|
||||
name: it.name,
|
||||
storeLabel,
|
||||
categoryLabel,
|
||||
price: normPriceStr(it.price),
|
||||
url: it.url,
|
||||
dbFile: file,
|
||||
});
|
||||
}
|
||||
|
||||
for (const it of restoredItems) {
|
||||
items.push({
|
||||
ts,
|
||||
date: d,
|
||||
fromSha: fromSha || "",
|
||||
toSha,
|
||||
kind: "restored",
|
||||
sku: it.sku,
|
||||
name: it.name,
|
||||
storeLabel,
|
||||
categoryLabel,
|
||||
price: normPriceStr(it.price),
|
||||
url: it.url,
|
||||
dbFile: file,
|
||||
});
|
||||
}
|
||||
|
||||
for (const it of removedItems) {
|
||||
items.push({
|
||||
ts,
|
||||
date: d,
|
||||
fromSha: fromSha || "",
|
||||
toSha,
|
||||
kind: "removed",
|
||||
sku: it.sku,
|
||||
name: it.name,
|
||||
storeLabel,
|
||||
categoryLabel,
|
||||
price: normPriceStr(it.price),
|
||||
url: it.url,
|
||||
dbFile: file,
|
||||
});
|
||||
}
|
||||
|
||||
for (const u of priceChanges) {
|
||||
items.push({
|
||||
ts,
|
||||
date: d,
|
||||
fromSha: fromSha || "",
|
||||
toSha,
|
||||
kind: u.kind,
|
||||
sku: u.sku,
|
||||
name: u.name,
|
||||
storeLabel,
|
||||
categoryLabel,
|
||||
oldPrice: normPriceStr(u.oldPrice),
|
||||
newPrice: normPriceStr(u.newPrice),
|
||||
url: u.url,
|
||||
dbFile: file,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Newest first
|
||||
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
|
||||
|
||||
// Keep file size under control (but still allows multiple runs/day over the window)
|
||||
const trimmed = items.slice(0, maxItems);
|
||||
|
||||
const payload = {
|
||||
generatedAt: now.toISOString(),
|
||||
windowDays,
|
||||
since: sinceIso,
|
||||
headSha,
|
||||
count: trimmed.length,
|
||||
items: trimmed,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outFile, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
||||
process.stdout.write(`Wrote ${outFile} (${trimmed.length} items)\n`);
|
||||
}
|
||||
|
||||
main();
|
||||
318
tools/diff_report.js
Executable file
318
tools/diff_report.js
Executable file
|
|
@ -0,0 +1,318 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const { execFileSync } = require("child_process");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const { C, color } = require("../src/utils/ansi");
|
||||
const { padLeft, padRight } = require("../src/utils/string");
|
||||
const { normalizeCspc } = require("../src/utils/sku");
|
||||
const { priceToNumber, salePctOff, normPrice } = require("../src/utils/price");
|
||||
const { isoTimestampFileSafe } = require("../src/utils/time");
|
||||
|
||||
function runGit(args) {
|
||||
return execFileSync("git", args, { encoding: "utf8" }).trimEnd();
|
||||
}
|
||||
|
||||
function gitShowText(sha, filePath) {
|
||||
try {
|
||||
return execFileSync("git", ["show", `${sha}:${filePath}`], { encoding: "utf8" });
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function gitListDbFiles(sha, dbDirRel) {
|
||||
const out = runGit(["ls-tree", "-r", "--name-only", sha, dbDirRel]);
|
||||
const lines = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
|
||||
return new Set(lines);
|
||||
}
|
||||
|
||||
function parseJsonOrNull(txt) {
|
||||
if (txt == null) return null;
|
||||
try {
|
||||
return JSON.parse(txt);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function mapItemsByUrl(obj) {
|
||||
const m = new Map();
|
||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||
for (const it of items) {
|
||||
if (!it || typeof it.url !== "string" || !it.url.startsWith("http")) continue;
|
||||
m.set(it.url, {
|
||||
name: String(it.name || ""),
|
||||
price: String(it.price || ""),
|
||||
sku: String(it.sku || ""),
|
||||
url: it.url,
|
||||
removed: Boolean(it.removed),
|
||||
});
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
function buildDiffForDb(prevObj, nextObj) {
|
||||
const prev = mapItemsByUrl(prevObj);
|
||||
const next = mapItemsByUrl(nextObj);
|
||||
|
||||
const urls = new Set([...prev.keys(), ...next.keys()]);
|
||||
|
||||
const newItems = [];
|
||||
const restoredItems = [];
|
||||
const removedItems = [];
|
||||
const updatedItems = [];
|
||||
|
||||
for (const url of urls) {
|
||||
const a = prev.get(url);
|
||||
const b = next.get(url);
|
||||
|
||||
const aExists = Boolean(a);
|
||||
const bExists = Boolean(b);
|
||||
|
||||
const aRemoved = Boolean(a?.removed);
|
||||
const bRemoved = Boolean(b?.removed);
|
||||
|
||||
if (!aExists && bExists && !bRemoved) {
|
||||
newItems.push({ ...b });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (aExists && aRemoved && bExists && !bRemoved) {
|
||||
restoredItems.push({ ...b });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (aExists && !aRemoved && (!bExists || bRemoved)) {
|
||||
removedItems.push({ ...a });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (aExists && bExists && !aRemoved && !bRemoved) {
|
||||
const aP = normPrice(a.price);
|
||||
const bP = normPrice(b.price);
|
||||
if (aP !== bP) {
|
||||
updatedItems.push({
|
||||
name: b.name || a.name || "",
|
||||
sku: normalizeCspc(b.sku || a.sku || ""),
|
||||
oldPrice: a.price || "",
|
||||
newPrice: b.price || "",
|
||||
url,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { newItems, restoredItems, removedItems, updatedItems };
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const flags = new Set();
|
||||
const kv = new Map();
|
||||
const positional = [];
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (!a.startsWith("-")) {
|
||||
positional.push(a);
|
||||
continue;
|
||||
}
|
||||
if (a === "--no-color") {
|
||||
flags.add("no-color");
|
||||
continue;
|
||||
}
|
||||
if (a === "--color") {
|
||||
flags.add("color");
|
||||
continue;
|
||||
}
|
||||
if ((a === "--db-dir" || a === "--out") && argv[i + 1] && !argv[i + 1].startsWith("-")) {
|
||||
kv.set(a, argv[i + 1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
flags.add(a);
|
||||
}
|
||||
|
||||
const fromSha = positional[0] || "";
|
||||
const toSha = positional[1] || "";
|
||||
const dbDir = kv.get("--db-dir") || "data/db";
|
||||
const outFile = kv.get("--out") || "";
|
||||
|
||||
return { fromSha, toSha, dbDir, outFile, flags };
|
||||
}
|
||||
|
||||
function renderDiffReport(diffReport, { fromSha, toSha, colorize }) {
|
||||
const paint = (s, code) => color(s, code, colorize);
|
||||
|
||||
let out = "";
|
||||
const ln = (s = "") => {
|
||||
out += String(s) + "\n";
|
||||
};
|
||||
|
||||
ln(paint("========== DIFF REPORT ==========", C.bold));
|
||||
ln(`${paint("From", C.bold)} ${fromSha} ${paint("to", C.bold)} ${toSha}`);
|
||||
ln(
|
||||
`${paint("Totals", C.bold)} | Categories=${diffReport.categories.length} | New=${diffReport.totals.newCount} | Restored=${diffReport.totals.restoredCount} | Removed=${diffReport.totals.removedCount} | PriceChanges=${diffReport.totals.updatedCount}`
|
||||
);
|
||||
ln("");
|
||||
|
||||
const rows = diffReport.categories;
|
||||
const catW = Math.min(56, Math.max(...rows.map((r) => r.catLabel.length), 12));
|
||||
|
||||
ln(paint("Per-category summary:", C.bold));
|
||||
ln(`${padRight("Store | Category", catW)} ${padLeft("New", 4)} ${padLeft("Res", 4)} ${padLeft("Rem", 4)} ${padLeft("Upd", 4)}`);
|
||||
ln(`${"-".repeat(catW)} ---- ---- ---- ----`);
|
||||
for (const r of rows) {
|
||||
ln(`${padRight(r.catLabel, catW)} ${padLeft(r.newCount, 4)} ${padLeft(r.restoredCount, 4)} ${padLeft(r.removedCount, 4)} ${padLeft(r.updatedCount, 4)}`);
|
||||
}
|
||||
ln("");
|
||||
|
||||
const labelW = Math.max(16, ...diffReport.newItems.map((x) => x.catLabel.length), ...diffReport.restoredItems.map((x) => x.catLabel.length), ...diffReport.removedItems.map((x) => x.catLabel.length), ...diffReport.updatedItems.map((x) => x.catLabel.length));
|
||||
|
||||
const skuInline = (sku) => {
|
||||
const s = normalizeCspc(sku);
|
||||
return s ? paint(` ${s}`, C.gray) : "";
|
||||
};
|
||||
|
||||
if (diffReport.newItems.length) {
|
||||
ln(paint(`NEW (${diffReport.newItems.length})`, C.bold + C.green));
|
||||
for (const it of diffReport.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
ln(`${paint("+", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (diffReport.restoredItems.length) {
|
||||
ln(paint(`RESTORED (${diffReport.restoredItems.length})`, C.bold + C.green));
|
||||
for (const it of diffReport.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
ln(`${paint("R", C.green)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (diffReport.removedItems.length) {
|
||||
ln(paint(`REMOVED (${diffReport.removedItems.length})`, C.bold + C.yellow));
|
||||
for (const it of diffReport.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray);
|
||||
ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, labelW)} | ${paint(it.name, C.bold)}${skuInline(it.sku)} ${price}`);
|
||||
ln(` ${paint(it.url, C.dim)}`);
|
||||
}
|
||||
ln("");
|
||||
}
|
||||
|
||||
if (diffReport.updatedItems.length) {
|
||||
ln(paint(`PRICE CHANGES (${diffReport.updatedItems.length})`, C.bold + C.cyan));
|
||||
|
||||
for (const u of diffReport.updatedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) {
|
||||
const oldRaw = u.oldPrice || "";
|
||||
const newRaw = u.newPrice || "";
|
||||
|
||||
const oldN = priceToNumber(oldRaw);
|
||||
const newN = priceToNumber(newRaw);
|
||||
|
||||
const oldP = oldRaw ? paint(oldRaw, C.yellow) : paint("(no price)", C.gray);
|
||||
|
||||
let newP = newRaw ? newRaw : "(no price)";
|
||||
let offTag = "";
|
||||
|
||||
if (Number.isFinite(oldN) && Number.isFinite(newN)) {
|
||||
if (newN > oldN) newP = paint(newP, C.red);
|
||||
else if (newN < oldN) {
|
||||
newP = paint(newP, C.green);
|
||||
const pct = salePctOff(oldRaw, newRaw);
|
||||
if (pct !== null) offTag = " " + paint(`[${pct}% Off]`, C.green);
|
||||
} else newP = paint(newP, C.cyan);
|
||||
} else newP = paint(newP, C.cyan);
|
||||
|
||||
ln(
|
||||
`${paint("~", C.cyan)} ${padRight(u.catLabel, labelW)} | ${paint(u.name, C.bold)}${skuInline(u.sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}`
|
||||
);
|
||||
ln(` ${paint(u.url, C.dim)}`);
|
||||
}
|
||||
|
||||
ln("");
|
||||
}
|
||||
|
||||
ln(paint("======== END DIFF REPORT ========", C.bold));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { fromSha, toSha, dbDir, outFile, flags } = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!fromSha || !toSha) {
|
||||
console.error(`Usage: ${path.basename(process.argv[1])} <fromSha> <toSha> [--db-dir data/db] [--out reports/<file>.txt] [--no-color]`);
|
||||
process.exitCode = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
// If user provides short SHAs, git accepts them.
|
||||
const colorize = flags.has("no-color") ? false : Boolean(process.stdout && process.stdout.isTTY);
|
||||
|
||||
const filesA = gitListDbFiles(fromSha, dbDir);
|
||||
const filesB = gitListDbFiles(toSha, dbDir);
|
||||
const files = new Set([...filesA, ...filesB]);
|
||||
|
||||
const diffReport = {
|
||||
categories: [],
|
||||
totals: { newCount: 0, updatedCount: 0, removedCount: 0, restoredCount: 0 },
|
||||
newItems: [],
|
||||
restoredItems: [],
|
||||
removedItems: [],
|
||||
updatedItems: [],
|
||||
};
|
||||
|
||||
for (const file of [...files].sort()) {
|
||||
const prevObj = parseJsonOrNull(gitShowText(fromSha, file));
|
||||
const nextObj = parseJsonOrNull(gitShowText(toSha, file));
|
||||
|
||||
const storeLabel = String(nextObj?.storeLabel || prevObj?.storeLabel || nextObj?.store || prevObj?.store || "?");
|
||||
const catLabel = String(nextObj?.categoryLabel || prevObj?.categoryLabel || nextObj?.category || prevObj?.category || path.basename(file));
|
||||
const catLabelFull = `${storeLabel} | ${catLabel}`;
|
||||
|
||||
const { newItems, restoredItems, removedItems, updatedItems } = buildDiffForDb(prevObj, nextObj);
|
||||
|
||||
diffReport.categories.push({
|
||||
catLabel: catLabelFull,
|
||||
newCount: newItems.length,
|
||||
restoredCount: restoredItems.length,
|
||||
removedCount: removedItems.length,
|
||||
updatedCount: updatedItems.length,
|
||||
});
|
||||
|
||||
diffReport.totals.newCount += newItems.length;
|
||||
diffReport.totals.restoredCount += restoredItems.length;
|
||||
diffReport.totals.removedCount += removedItems.length;
|
||||
diffReport.totals.updatedCount += updatedItems.length;
|
||||
|
||||
for (const it of newItems) diffReport.newItems.push({ catLabel: catLabelFull, ...it });
|
||||
for (const it of restoredItems) diffReport.restoredItems.push({ catLabel: catLabelFull, ...it });
|
||||
for (const it of removedItems) diffReport.removedItems.push({ catLabel: catLabelFull, ...it });
|
||||
for (const u of updatedItems) diffReport.updatedItems.push({ catLabel: catLabelFull, ...u });
|
||||
}
|
||||
|
||||
const reportText = renderDiffReport(diffReport, { fromSha, toSha, colorize });
|
||||
process.stdout.write(reportText);
|
||||
|
||||
const outPath = outFile
|
||||
? (path.isAbsolute(outFile) ? outFile : path.join(process.cwd(), outFile))
|
||||
: "";
|
||||
|
||||
if (outPath) {
|
||||
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||
fs.writeFileSync(outPath, renderDiffReport(diffReport, { fromSha, toSha, colorize: false }), "utf8");
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
const msg = e && e.stack ? e.stack : String(e);
|
||||
console.error(msg);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
10
tracker.js
Executable file
10
tracker.js
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const { main } = require("./src/main");
|
||||
|
||||
main().catch((e) => {
|
||||
const msg = e && e.stack ? e.stack : String(e);
|
||||
console.error(msg);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
841
viz/app.js
Normal file
841
viz/app.js
Normal file
|
|
@ -0,0 +1,841 @@
|
|||
"use strict";
|
||||
|
||||
/**
|
||||
* Hash routes:
|
||||
* #/ search
|
||||
* #/item/<sku> detail
|
||||
*/
|
||||
|
||||
const $app = document.getElementById("app");
|
||||
|
||||
function esc(s) {
|
||||
return String(s ?? "").replace(
|
||||
/[&<>"']/g,
|
||||
(c) =>
|
||||
({
|
||||
"&": "&",
|
||||
"<": "<",
|
||||
">": ">",
|
||||
'"': """,
|
||||
"'": "'",
|
||||
}[c])
|
||||
);
|
||||
}
|
||||
|
||||
function parsePriceToNumber(v) {
|
||||
const s = String(v ?? "").replace(/[^0-9.]/g, "");
|
||||
const n = Number(s);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
|
||||
function dateOnly(iso) {
|
||||
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function prettyTs(iso) {
|
||||
const s = String(iso || "");
|
||||
if (!s) return "";
|
||||
return s.replace("T", " ");
|
||||
}
|
||||
|
||||
function makeUnknownSku(r) {
|
||||
const store = String(r?.storeLabel || r?.store || "store").toLowerCase().replace(/[^a-z0-9]+/g, "-");
|
||||
const url = String(r?.url || "");
|
||||
const h = url ? btoa(unescape(encodeURIComponent(url))).replace(/=+$/g, "").slice(0, 16) : "no-url";
|
||||
return `unknown:${store}:${h}`;
|
||||
}
|
||||
function fnv1a32(str) {
|
||||
let h = 0x811c9dc5; // offset basis
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
h ^= str.charCodeAt(i);
|
||||
h = Math.imul(h, 0x01000193); // FNV prime
|
||||
}
|
||||
// unsigned -> 8 hex chars
|
||||
return (h >>> 0).toString(16).padStart(8, "0");
|
||||
}
|
||||
|
||||
function makeSyntheticSku(r) {
|
||||
const store = String(r?.storeLabel || r?.store || "store");
|
||||
const url = String(r?.url || "");
|
||||
const key = `${store}|${url}`;
|
||||
return `u:${fnv1a32(key)}`; // stable per store+url
|
||||
}
|
||||
|
||||
function keySkuForRow(r) {
|
||||
const real = String(r?.sku || "").trim();
|
||||
return real ? real : makeSyntheticSku(r);
|
||||
}
|
||||
|
||||
function displaySku(key) {
|
||||
return String(key || "").startsWith("u:") ? "unknown" : String(key || "");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Normalize for search: lowercase, punctuation -> space, collapse spaces
|
||||
function normSearchText(s) {
|
||||
return String(s ?? "")
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function tokenizeQuery(q) {
|
||||
const n = normSearchText(q);
|
||||
return n ? n.split(" ").filter(Boolean) : [];
|
||||
}
|
||||
|
||||
function inferGithubOwnerRepo() {
|
||||
const host = location.hostname || "";
|
||||
const m = host.match(/^([a-z0-9-]+)\.github\.io$/i);
|
||||
if (m) {
|
||||
const owner = m[1];
|
||||
const parts = (location.pathname || "/").split("/").filter(Boolean);
|
||||
const repo = parts.length >= 1 ? parts[0] : `${owner}.github.io`;
|
||||
return { owner, repo };
|
||||
}
|
||||
return { owner: "brennanwilkes", repo: "spirit-tracker" };
|
||||
}
|
||||
|
||||
async function fetchJson(url) {
|
||||
const res = await fetch(url, { cache: "no-store" });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
||||
return await res.json();
|
||||
}
|
||||
|
||||
async function fetchText(url) {
|
||||
const res = await fetch(url, { cache: "no-store" });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
|
||||
return await res.text();
|
||||
}
|
||||
|
||||
function route() {
|
||||
const h = location.hash || "#/";
|
||||
const parts = h.replace(/^#\/?/, "").split("/").filter(Boolean);
|
||||
if (parts.length === 0) return renderSearch();
|
||||
if (parts[0] === "item" && parts[1]) return renderItem(parts[1]);
|
||||
return renderSearch();
|
||||
}
|
||||
|
||||
/* ---------------- Search ---------------- */
|
||||
|
||||
let INDEX = null;
|
||||
let RECENT = null;
|
||||
|
||||
// persist search box value across navigation
|
||||
const Q_LS_KEY = "stviz:v1:search:q";
|
||||
function loadSavedQuery() {
|
||||
try {
|
||||
return localStorage.getItem(Q_LS_KEY) || "";
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
function saveQuery(v) {
|
||||
try {
|
||||
localStorage.setItem(Q_LS_KEY, String(v ?? ""));
|
||||
} catch {}
|
||||
}
|
||||
|
||||
async function loadIndex() {
|
||||
if (INDEX) return INDEX;
|
||||
INDEX = await fetchJson("./data/index.json");
|
||||
return INDEX;
|
||||
}
|
||||
|
||||
async function loadRecent() {
|
||||
if (RECENT) return RECENT;
|
||||
try {
|
||||
RECENT = await fetchJson("./data/recent.json");
|
||||
} catch {
|
||||
RECENT = { count: 0, items: [] };
|
||||
}
|
||||
return RECENT;
|
||||
}
|
||||
|
||||
function normImg(s) {
|
||||
const v = String(s || "").trim();
|
||||
if (!v) return "";
|
||||
if (/^data:/i.test(v)) return "";
|
||||
return v;
|
||||
}
|
||||
|
||||
// Build one row per SKU + combined searchable text across all listings of that SKU
|
||||
function aggregateBySku(listings) {
|
||||
const bySku = new Map();
|
||||
|
||||
for (const r of listings) {
|
||||
|
||||
const sku = keySkuForRow(r);
|
||||
|
||||
const name = String(r?.name || "");
|
||||
const url = String(r?.url || "");
|
||||
const storeLabel = String(r?.storeLabel || r?.store || "");
|
||||
|
||||
const img = normImg(r?.img || r?.image || r?.thumb || "");
|
||||
|
||||
const pNum = parsePriceToNumber(r?.price);
|
||||
const pStr = String(r?.price || "");
|
||||
|
||||
let agg = bySku.get(sku);
|
||||
if (!agg) {
|
||||
agg = {
|
||||
sku,
|
||||
name: name || "",
|
||||
img: "",
|
||||
cheapestPriceStr: pStr || "",
|
||||
cheapestPriceNum: pNum,
|
||||
cheapestStoreLabel: storeLabel || "",
|
||||
stores: new Set(),
|
||||
sampleUrl: url || "",
|
||||
_searchParts: [],
|
||||
searchText: "", // normalized blob
|
||||
|
||||
_imgByName: new Map(), // name -> img
|
||||
_imgAny: "",
|
||||
};
|
||||
bySku.set(sku, agg);
|
||||
}
|
||||
|
||||
if (storeLabel) agg.stores.add(storeLabel);
|
||||
if (!agg.sampleUrl && url) agg.sampleUrl = url;
|
||||
|
||||
// Keep the first non-empty name (existing behavior), but make sure img matches that chosen name
|
||||
if (!agg.name && name) {
|
||||
agg.name = name;
|
||||
if (img) agg.img = img;
|
||||
} else if (agg.name && name === agg.name && img && !agg.img) {
|
||||
agg.img = img;
|
||||
}
|
||||
|
||||
if (img) {
|
||||
if (!agg._imgAny) agg._imgAny = img;
|
||||
if (name) agg._imgByName.set(name, img);
|
||||
}
|
||||
|
||||
// cheapest
|
||||
if (pNum !== null) {
|
||||
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
|
||||
agg.cheapestPriceNum = pNum;
|
||||
agg.cheapestPriceStr = pStr || "";
|
||||
agg.cheapestStoreLabel = storeLabel || agg.cheapestStoreLabel;
|
||||
}
|
||||
}
|
||||
|
||||
// search parts (include everything we might want to match)
|
||||
agg._searchParts.push(sku);
|
||||
if (name) agg._searchParts.push(name);
|
||||
if (url) agg._searchParts.push(url);
|
||||
if (storeLabel) agg._searchParts.push(storeLabel);
|
||||
}
|
||||
|
||||
const out = [...bySku.values()];
|
||||
|
||||
for (const it of out) {
|
||||
// Ensure thumbnail matches chosen name when possible
|
||||
if (!it.img) {
|
||||
const m = it._imgByName;
|
||||
if (it.name && m && m.has(it.name)) it.img = m.get(it.name) || "";
|
||||
else it.img = it._imgAny || "";
|
||||
}
|
||||
|
||||
delete it._imgByName;
|
||||
delete it._imgAny;
|
||||
|
||||
// Ensure at least these are in the blob even if index rows are already aggregated
|
||||
it._searchParts.push(it.sku);
|
||||
it._searchParts.push(it.name || "");
|
||||
it._searchParts.push(it.sampleUrl || "");
|
||||
it._searchParts.push(it.cheapestStoreLabel || "");
|
||||
|
||||
it.searchText = normSearchText(it._searchParts.join(" | "));
|
||||
delete it._searchParts;
|
||||
}
|
||||
|
||||
out.sort((a, b) => (String(a.name) + a.sku).localeCompare(String(b.name) + b.sku));
|
||||
return out;
|
||||
}
|
||||
|
||||
function matchesAllTokens(hayNorm, tokens) {
|
||||
if (!tokens.length) return true;
|
||||
for (const t of tokens) {
|
||||
if (!hayNorm.includes(t)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function renderThumbHtml(imgUrl, cls = "thumb") {
|
||||
const img = normImg(imgUrl);
|
||||
if (!img) return `<div class="thumbPlaceholder"></div>`;
|
||||
return `<img class="${esc(cls)}" src="${esc(img)}" alt="" loading="lazy" onerror="this.style.display='none'" />`;
|
||||
}
|
||||
|
||||
function renderSearch() {
|
||||
$app.innerHTML = `
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1 class="h1">Spirit Tracker Viz</h1>
|
||||
<div class="small">Search name / url / sku (word AND)</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<input id="q" class="input" placeholder="e.g. bowmore sherry, 303821, sierrasprings..." autocomplete="off" />
|
||||
<div id="results" class="list"></div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
const $q = document.getElementById("q");
|
||||
const $results = document.getElementById("results");
|
||||
|
||||
$q.value = loadSavedQuery();
|
||||
|
||||
let aggBySku = new Map();
|
||||
|
||||
function renderAggregates(items) {
|
||||
if (!items.length) {
|
||||
$results.innerHTML = `<div class="small">No matches.</div>`;
|
||||
return;
|
||||
}
|
||||
|
||||
const limited = items.slice(0, 80);
|
||||
$results.innerHTML = limited
|
||||
.map((it) => {
|
||||
const storeCount = it.stores.size || 0;
|
||||
const plus = storeCount > 1 ? ` +${storeCount - 1}` : "";
|
||||
const price = it.cheapestPriceStr ? it.cheapestPriceStr : "(no price)";
|
||||
const store = it.cheapestStoreLabel || ([...it.stores][0] || "Store");
|
||||
|
||||
return `
|
||||
<div class="item" data-sku="${esc(it.sku)}">
|
||||
<div class="itemRow">
|
||||
<div class="thumbBox">
|
||||
${renderThumbHtml(it.img)}
|
||||
</div>
|
||||
<div class="itemBody">
|
||||
<div class="itemTop">
|
||||
<div class="itemName">${esc(it.name || "(no name)")}</div>
|
||||
<span class="badge mono">${esc(displaySku(it.sku))}</span>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span class="mono">${esc(price)}</span>
|
||||
<span class="badge">${esc(store)}${esc(plus)}</span>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span class="mono">${esc(it.sampleUrl || "")}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
||||
el.addEventListener("click", () => {
|
||||
const sku = el.getAttribute("data-sku") || "";
|
||||
if (!sku) return;
|
||||
saveQuery($q.value);
|
||||
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function renderRecent(recent) {
|
||||
const items = Array.isArray(recent?.items) ? recent.items : [];
|
||||
if (!items.length) {
|
||||
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
||||
return;
|
||||
}
|
||||
|
||||
const days = Number.isFinite(Number(recent?.windowDays)) ? Number(recent.windowDays) : 3;
|
||||
const limited = items.slice(0, 140);
|
||||
|
||||
$results.innerHTML =
|
||||
`<div class="small">Recently changed (last ${esc(days)} day(s)):</div>` +
|
||||
limited
|
||||
.map((r) => {
|
||||
const kind =
|
||||
r.kind === "new"
|
||||
? "NEW"
|
||||
: r.kind === "restored"
|
||||
? "RESTORED"
|
||||
: r.kind === "removed"
|
||||
? "REMOVED"
|
||||
: r.kind === "price_down"
|
||||
? "PRICE ↓"
|
||||
: r.kind === "price_up"
|
||||
? "PRICE ↑"
|
||||
: r.kind === "price_change"
|
||||
? "PRICE"
|
||||
: "CHANGE";
|
||||
|
||||
const priceLine =
|
||||
r.kind === "new" || r.kind === "restored" || r.kind === "removed"
|
||||
? `${esc(r.price || "")}`
|
||||
: `${esc(r.oldPrice || "")} → ${esc(r.newPrice || "")}`;
|
||||
|
||||
const when = r.ts ? prettyTs(r.ts) : r.date || "";
|
||||
|
||||
const sku = String(r.sku || "");
|
||||
const img = aggBySku.get(sku)?.img || "";
|
||||
|
||||
return `
|
||||
<div class="item" data-sku="${esc(sku)}">
|
||||
<div class="itemRow">
|
||||
<div class="thumbBox">
|
||||
${renderThumbHtml(img)}
|
||||
</div>
|
||||
<div class="itemBody">
|
||||
<div class="itemTop">
|
||||
<div class="itemName">${esc(r.name || "(no name)")}</div>
|
||||
<span class="badge mono">${esc(displaySku(it.sku))}</span>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span class="badge">${esc(kind)}</span>
|
||||
<span class="badge">${esc(r.storeLabel || "")}</span>
|
||||
<span class="mono">${esc(priceLine)}</span>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span class="mono">${esc(when)}</span>
|
||||
</div>
|
||||
<div class="meta">
|
||||
<span class="mono">${esc(r.url || "")}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
for (const el of Array.from($results.querySelectorAll(".item"))) {
|
||||
el.addEventListener("click", () => {
|
||||
const sku = el.getAttribute("data-sku") || "";
|
||||
if (!sku) return;
|
||||
saveQuery($q.value);
|
||||
location.hash = `#/item/${encodeURIComponent(sku)}`;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let allAgg = [];
|
||||
let indexReady = false;
|
||||
|
||||
function applySearch() {
|
||||
if (!indexReady) return;
|
||||
|
||||
const tokens = tokenizeQuery($q.value);
|
||||
if (!tokens.length) {
|
||||
loadRecent()
|
||||
.then(renderRecent)
|
||||
.catch(() => {
|
||||
$results.innerHTML = `<div class="small">Type to search…</div>`;
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const matches = allAgg.filter((it) => matchesAllTokens(it.searchText, tokens));
|
||||
renderAggregates(matches);
|
||||
}
|
||||
|
||||
$results.innerHTML = `<div class="small">Loading index…</div>`;
|
||||
|
||||
loadIndex()
|
||||
.then((idx) => {
|
||||
const listings = Array.isArray(idx.items) ? idx.items : [];
|
||||
allAgg = aggregateBySku(listings);
|
||||
aggBySku = new Map(allAgg.map((x) => [String(x.sku || ""), x]));
|
||||
indexReady = true;
|
||||
$q.focus();
|
||||
applySearch();
|
||||
return loadRecent();
|
||||
})
|
||||
.then((recent) => {
|
||||
if (!tokenizeQuery($q.value).length) renderRecent(recent);
|
||||
})
|
||||
.catch((e) => {
|
||||
$results.innerHTML = `<div class="small">Failed to load: ${esc(e.message)}</div>`;
|
||||
});
|
||||
|
||||
let t = null;
|
||||
$q.addEventListener("input", () => {
|
||||
saveQuery($q.value);
|
||||
|
||||
if (t) clearTimeout(t);
|
||||
t = setTimeout(applySearch, 50);
|
||||
});
|
||||
}
|
||||
|
||||
/* ---------------- Detail (chart) ---------------- */
|
||||
|
||||
let CHART = null;
|
||||
|
||||
function destroyChart() {
|
||||
if (CHART) {
|
||||
CHART.destroy();
|
||||
CHART = null;
|
||||
}
|
||||
}
|
||||
|
||||
async function githubListCommits({ owner, repo, branch, path }) {
|
||||
const base = `https://api.github.com/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits`;
|
||||
const u1 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=1`;
|
||||
const page1 = await fetchJson(u1);
|
||||
|
||||
if (Array.isArray(page1) && page1.length === 100) {
|
||||
const u2 = `${base}?sha=${encodeURIComponent(branch)}&path=${encodeURIComponent(path)}&per_page=100&page=2`;
|
||||
const page2 = await fetchJson(u2);
|
||||
return [...page1, ...(Array.isArray(page2) ? page2 : [])];
|
||||
}
|
||||
|
||||
return Array.isArray(page1) ? page1 : [];
|
||||
}
|
||||
|
||||
async function githubFetchFileAtSha({ owner, repo, sha, path }) {
|
||||
const raw = `https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(
|
||||
sha
|
||||
)}/${path}`;
|
||||
const txt = await fetchText(raw);
|
||||
return JSON.parse(txt);
|
||||
}
|
||||
|
||||
function findItemBySkuInDb(obj, sku) {
|
||||
const items = Array.isArray(obj?.items) ? obj.items : [];
|
||||
for (const it of items) {
|
||||
if (!it || it.removed) continue;
|
||||
const s = String(it.sku || "");
|
||||
if (s === sku) return it;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function computeSuggestedY(values) {
|
||||
const nums = values.filter((v) => Number.isFinite(v));
|
||||
if (!nums.length) return { suggestedMin: undefined, suggestedMax: undefined };
|
||||
let min = nums[0],
|
||||
max = nums[0];
|
||||
for (const n of nums) {
|
||||
if (n < min) min = n;
|
||||
if (n > max) max = n;
|
||||
}
|
||||
if (min === max) return { suggestedMin: min * 0.95, suggestedMax: max * 1.05 };
|
||||
const pad = (max - min) * 0.08;
|
||||
return { suggestedMin: Math.max(0, min - pad), suggestedMax: max + pad };
|
||||
}
|
||||
|
||||
// Collapse commit list down to 1 commit per day (keep the most recent commit for that day)
|
||||
function collapseCommitsToDaily(commits) {
|
||||
// commits should be oldest -> newest.
|
||||
const byDate = new Map();
|
||||
for (const c of commits) {
|
||||
const d = String(c?.date || "");
|
||||
const sha = String(c?.sha || "");
|
||||
if (!d || !sha) continue;
|
||||
byDate.set(d, { sha, date: d, ts: String(c?.ts || "") });
|
||||
}
|
||||
return [...byDate.values()];
|
||||
}
|
||||
|
||||
function cacheKeySeries(sku, dbFile, cacheBust) {
|
||||
return `stviz:v2:series:${cacheBust}:${sku}:${dbFile}`;
|
||||
}
|
||||
|
||||
function loadSeriesCache(sku, dbFile, cacheBust) {
|
||||
try {
|
||||
const raw = localStorage.getItem(cacheKeySeries(sku, dbFile, cacheBust));
|
||||
if (!raw) return null;
|
||||
const obj = JSON.parse(raw);
|
||||
if (!obj || !Array.isArray(obj.points)) return null;
|
||||
const savedAt = Number(obj.savedAt || 0);
|
||||
if (!Number.isFinite(savedAt) || Date.now() - savedAt > 7 * 24 * 3600 * 1000) return null;
|
||||
return obj;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function saveSeriesCache(sku, dbFile, cacheBust, points) {
|
||||
try {
|
||||
localStorage.setItem(cacheKeySeries(sku, dbFile, cacheBust), JSON.stringify({ savedAt: Date.now(), points }));
|
||||
} catch {}
|
||||
}
|
||||
|
||||
let DB_COMMITS = null;
|
||||
|
||||
async function loadDbCommitsManifest() {
|
||||
if (DB_COMMITS) return DB_COMMITS;
|
||||
try {
|
||||
DB_COMMITS = await fetchJson("./data/db_commits.json");
|
||||
return DB_COMMITS;
|
||||
} catch {
|
||||
DB_COMMITS = null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function renderItem(sku) {
|
||||
destroyChart();
|
||||
|
||||
$app.innerHTML = `
|
||||
<div class="container">
|
||||
<div class="topbar">
|
||||
<button id="back" class="btn">← Back</button>
|
||||
<span class="badge mono">${esc(displaySku(it.sku))}</span>
|
||||
</div>
|
||||
|
||||
<div class="card detailCard">
|
||||
<div class="detailHeader">
|
||||
<div id="thumbBox" class="detailThumbBox"></div>
|
||||
<div class="detailHeaderText">
|
||||
<div id="title" class="h1">Loading…</div>
|
||||
<div id="links" class="links"></div>
|
||||
<div class="small" id="status"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chartBox">
|
||||
<canvas id="chart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
document.getElementById("back").addEventListener("click", () => {
|
||||
location.hash = "#/";
|
||||
});
|
||||
|
||||
const $title = document.getElementById("title");
|
||||
const $links = document.getElementById("links");
|
||||
const $status = document.getElementById("status");
|
||||
const $canvas = document.getElementById("chart");
|
||||
const $thumbBox = document.getElementById("thumbBox");
|
||||
|
||||
const idx = await loadIndex();
|
||||
const all = Array.isArray(idx.items) ? idx.items : [];
|
||||
const cur = all.filter((x) => (String(x.sku || "").trim() || makeUnknownSku(x)) === String(sku || ""));
|
||||
if (!cur.length) {
|
||||
$title.textContent = "Item not found in current index";
|
||||
$status.textContent = "Tip: index.json only includes current (non-removed) items.";
|
||||
if ($thumbBox) $thumbBox.innerHTML = `<div class="thumbPlaceholder"></div>`;
|
||||
return;
|
||||
}
|
||||
|
||||
const nameCounts = new Map();
|
||||
for (const r of cur) {
|
||||
const n = String(r.name || "");
|
||||
if (!n) continue;
|
||||
nameCounts.set(n, (nameCounts.get(n) || 0) + 1);
|
||||
}
|
||||
let bestName = cur[0].name || `(SKU ${sku})`;
|
||||
let bestCount = -1;
|
||||
for (const [n, c] of nameCounts.entries()) {
|
||||
if (c > bestCount) {
|
||||
bestName = n;
|
||||
bestCount = c;
|
||||
}
|
||||
}
|
||||
$title.textContent = bestName;
|
||||
|
||||
// Pick image that matches the picked name (fallback: any)
|
||||
let bestImg = "";
|
||||
for (const r of cur) {
|
||||
if (String(r?.name || "") === String(bestName || "") && normImg(r?.img)) {
|
||||
bestImg = normImg(r.img);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bestImg) {
|
||||
for (const r of cur) {
|
||||
if (normImg(r?.img)) {
|
||||
bestImg = normImg(r.img);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($thumbBox) {
|
||||
$thumbBox.innerHTML = bestImg ? renderThumbHtml(bestImg, "detailThumb") : `<div class="thumbPlaceholder"></div>`;
|
||||
}
|
||||
|
||||
$links.innerHTML = cur
|
||||
.slice()
|
||||
.sort((a, b) => String(a.storeLabel || "").localeCompare(String(b.storeLabel || "")))
|
||||
.map(
|
||||
(r) =>
|
||||
`<a href="${esc(r.url)}" target="_blank" rel="noopener noreferrer">${esc(r.storeLabel || r.store || "Store")}</a>`
|
||||
)
|
||||
.join("");
|
||||
|
||||
const gh = inferGithubOwnerRepo();
|
||||
const owner = gh.owner;
|
||||
const repo = gh.repo;
|
||||
const branch = "data";
|
||||
|
||||
const byDbFile = new Map();
|
||||
for (const r of cur) {
|
||||
if (!r.dbFile) continue;
|
||||
if (!byDbFile.has(r.dbFile)) byDbFile.set(r.dbFile, r);
|
||||
}
|
||||
const dbFiles = [...byDbFile.keys()].sort();
|
||||
|
||||
$status.textContent = `Loading history for ${dbFiles.length} store file(s)…`;
|
||||
|
||||
const manifest = await loadDbCommitsManifest();
|
||||
|
||||
const allDatesSet = new Set();
|
||||
const series = [];
|
||||
|
||||
const fileJsonCache = new Map();
|
||||
|
||||
const cacheBust = String(idx.generatedAt || new Date().toISOString());
|
||||
const today = dateOnly(idx.generatedAt || new Date().toISOString());
|
||||
|
||||
for (const dbFile of dbFiles) {
|
||||
const row = byDbFile.get(dbFile);
|
||||
const storeLabel = String(row.storeLabel || row.store || dbFile);
|
||||
|
||||
const cached = loadSeriesCache(sku, dbFile, cacheBust);
|
||||
if (cached && Array.isArray(cached.points) && cached.points.length) {
|
||||
const points = new Map();
|
||||
const values = [];
|
||||
for (const p of cached.points) {
|
||||
const d = String(p.date || "");
|
||||
const v = p.price === null ? null : Number(p.price);
|
||||
if (!d) continue;
|
||||
points.set(d, Number.isFinite(v) ? v : null);
|
||||
if (Number.isFinite(v)) values.push(v);
|
||||
allDatesSet.add(d);
|
||||
}
|
||||
series.push({ label: storeLabel, points, values });
|
||||
continue;
|
||||
}
|
||||
|
||||
let commits = [];
|
||||
if (manifest && manifest.files && Array.isArray(manifest.files[dbFile])) {
|
||||
commits = manifest.files[dbFile];
|
||||
} else {
|
||||
try {
|
||||
let apiCommits = await githubListCommits({ owner, repo, branch, path: dbFile });
|
||||
apiCommits = apiCommits.slice().reverse(); // oldest -> newest
|
||||
commits = apiCommits
|
||||
.map((c) => {
|
||||
const sha = String(c?.sha || "");
|
||||
const dIso = c?.commit?.committer?.date || c?.commit?.author?.date || "";
|
||||
const d = dateOnly(dIso);
|
||||
return sha && d ? { sha, date: d, ts: String(dIso || "") } : null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
} catch {
|
||||
commits = [];
|
||||
}
|
||||
}
|
||||
|
||||
commits = collapseCommitsToDaily(commits);
|
||||
|
||||
const points = new Map();
|
||||
const values = [];
|
||||
const compactPoints = [];
|
||||
|
||||
const MAX_POINTS = 260; // daily points (~8-9 months)
|
||||
if (commits.length > MAX_POINTS) commits = commits.slice(commits.length - MAX_POINTS);
|
||||
|
||||
for (const c of commits) {
|
||||
const sha = String(c.sha || "");
|
||||
const d = String(c.date || "");
|
||||
if (!sha || !d) continue;
|
||||
|
||||
const ck = `${sha}|${dbFile}`;
|
||||
let obj = fileJsonCache.get(ck) || null;
|
||||
if (!obj) {
|
||||
try {
|
||||
obj = await githubFetchFileAtSha({ owner, repo, sha, path: dbFile });
|
||||
fileJsonCache.set(ck, obj);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const it = findItemBySkuInDb(obj, sku);
|
||||
const pNum = it ? parsePriceToNumber(it.price) : null;
|
||||
|
||||
points.set(d, pNum);
|
||||
if (pNum !== null) values.push(pNum);
|
||||
allDatesSet.add(d);
|
||||
|
||||
compactPoints.push({ date: d, price: pNum });
|
||||
}
|
||||
|
||||
// Always add "today" from the current index
|
||||
const curP = parsePriceToNumber(row.price);
|
||||
if (curP !== null) {
|
||||
points.set(today, curP);
|
||||
values.push(curP);
|
||||
allDatesSet.add(today);
|
||||
compactPoints.push({ date: today, price: curP });
|
||||
}
|
||||
|
||||
saveSeriesCache(sku, dbFile, cacheBust, compactPoints);
|
||||
series.push({ label: storeLabel, points, values });
|
||||
}
|
||||
|
||||
const labels = [...allDatesSet].sort();
|
||||
if (!labels.length) {
|
||||
$status.textContent = "No historical points found.";
|
||||
return;
|
||||
}
|
||||
|
||||
const allVals = [];
|
||||
for (const s of series) for (const v of s.values) allVals.push(v);
|
||||
const ySug = computeSuggestedY(allVals);
|
||||
|
||||
const datasets = series.map((s) => ({
|
||||
label: s.label,
|
||||
data: labels.map((d) => (s.points.has(d) ? s.points.get(d) : null)),
|
||||
spanGaps: false,
|
||||
tension: 0.15,
|
||||
}));
|
||||
|
||||
const ctx = $canvas.getContext("2d");
|
||||
CHART = new Chart(ctx, {
|
||||
type: "line",
|
||||
data: { labels, datasets },
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
interaction: { mode: "nearest", intersect: false },
|
||||
plugins: {
|
||||
legend: { display: true },
|
||||
tooltip: {
|
||||
callbacks: {
|
||||
label: (ctx) => {
|
||||
const v = ctx.parsed?.y;
|
||||
if (!Number.isFinite(v)) return `${ctx.dataset.label}: (no data)`;
|
||||
return `${ctx.dataset.label}: $${v.toFixed(2)}`;
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
ticks: { maxRotation: 0, autoSkip: true, maxTicksLimit: 12 },
|
||||
grid: { display: false },
|
||||
},
|
||||
y: {
|
||||
...ySug,
|
||||
ticks: { callback: (v) => `$${Number(v).toFixed(0)}` },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
$status.textContent = manifest
|
||||
? `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.`
|
||||
: `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`;
|
||||
}
|
||||
|
||||
/* ---------------- boot ---------------- */
|
||||
|
||||
window.addEventListener("hashchange", route);
|
||||
route();
|
||||
16
viz/index.html
Normal file
16
viz/index.html
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
||||
<title>Spirit Tracker Viz</title>
|
||||
<link rel="stylesheet" href="./style.css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
|
||||
<!-- Chart.js (no build step) -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
|
||||
<script src="./app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
57
viz/serve.js
Executable file
57
viz/serve.js
Executable file
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const http = require("http");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const root = path.resolve(__dirname);
|
||||
|
||||
const MIME = {
|
||||
".html": "text/html; charset=utf-8",
|
||||
".js": "application/javascript; charset=utf-8",
|
||||
".css": "text/css; charset=utf-8",
|
||||
".json": "application/json; charset=utf-8",
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".svg": "image/svg+xml",
|
||||
};
|
||||
|
||||
function safePath(urlPath) {
|
||||
const p = decodeURIComponent(urlPath.split("?")[0]).replace(/\\/g, "/");
|
||||
const joined = path.join(root, p);
|
||||
const norm = path.normalize(joined);
|
||||
if (!norm.startsWith(root)) return null;
|
||||
return norm;
|
||||
}
|
||||
|
||||
const server = http.createServer((req, res) => {
|
||||
const u = req.url || "/";
|
||||
let file = safePath(u === "/" ? "/index.html" : u);
|
||||
if (!file) {
|
||||
res.writeHead(400);
|
||||
res.end("Bad path");
|
||||
return;
|
||||
}
|
||||
|
||||
if (fs.existsSync(file) && fs.statSync(file).isDirectory()) {
|
||||
file = path.join(file, "index.html");
|
||||
}
|
||||
|
||||
fs.readFile(file, (err, buf) => {
|
||||
if (err) {
|
||||
res.writeHead(404);
|
||||
res.end("Not found");
|
||||
return;
|
||||
}
|
||||
const ext = path.extname(file);
|
||||
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream" });
|
||||
res.end(buf);
|
||||
});
|
||||
});
|
||||
|
||||
const port = Number(process.env.PORT || 8080);
|
||||
server.listen(port, "127.0.0.1", () => {
|
||||
process.stdout.write(`Serving ${root} on http://127.0.0.1:${port}\n`);
|
||||
});
|
||||
240
viz/style.css
Normal file
240
viz/style.css
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
:root {
|
||||
--bg: #0b0d10;
|
||||
--panel: #12161b;
|
||||
--text: #e7edf3;
|
||||
--muted: #9aa6b2;
|
||||
--border: #242c35;
|
||||
--accent: #7dd3fc;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji";
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
a { color: var(--accent); text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
|
||||
.container {
|
||||
max-width: 980px;
|
||||
margin: 0 auto;
|
||||
padding: 18px;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 14px;
|
||||
}
|
||||
|
||||
.h1 {
|
||||
font-size: 18px;
|
||||
font-weight: 700;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.input {
|
||||
width: 100%;
|
||||
padding: 12px 12px;
|
||||
border-radius: 10px;
|
||||
border: 1px solid var(--border);
|
||||
background: #0f1318;
|
||||
color: var(--text);
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.input:focus { border-color: #37566b; }
|
||||
|
||||
.list {
|
||||
margin-top: 12px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.item {
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
padding: 12px;
|
||||
background: #0f1318;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.item:hover { border-color: #2f3a46; }
|
||||
|
||||
.itemRow {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.thumbBox {
|
||||
width: 64px;
|
||||
height: 64px;
|
||||
border-radius: 12px;
|
||||
overflow: hidden;
|
||||
border: 1px solid var(--border);
|
||||
background: #0b0d10;
|
||||
flex: 0 0 64px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.thumb {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.thumbPlaceholder {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: #0b0d10;
|
||||
}
|
||||
|
||||
.itemBody {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.itemTop {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
gap: 10px;
|
||||
align-items: baseline;
|
||||
}
|
||||
|
||||
.itemName {
|
||||
font-weight: 700;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.badge {
|
||||
font-size: 12px;
|
||||
color: var(--muted);
|
||||
border: 1px solid var(--border);
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.meta {
|
||||
margin-top: 6px;
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; }
|
||||
|
||||
.topbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.btn {
|
||||
border: 1px solid var(--border);
|
||||
background: #0f1318;
|
||||
color: var(--text);
|
||||
border-radius: 10px;
|
||||
padding: 10px 10px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.btn:hover { border-color: #2f3a46; }
|
||||
|
||||
.links {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
margin: 8px 0 14px;
|
||||
}
|
||||
|
||||
.small {
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
/* Detail view sizing */
|
||||
.detailCard {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.detailHeader {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.detailThumbBox {
|
||||
width: 96px;
|
||||
height: 96px;
|
||||
border-radius: 14px;
|
||||
overflow: hidden;
|
||||
border: 1px solid var(--border);
|
||||
background: #0b0d10;
|
||||
flex: 0 0 96px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.detailThumb {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.detailHeaderText {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
/* Chart fills most of viewport, but stays usable on mobile */
|
||||
.chartBox {
|
||||
width: 100%;
|
||||
height: min(72vh, 720px);
|
||||
min-height: 320px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
background: #0f1318;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
@media (max-width: 640px) {
|
||||
.container { padding: 14px; }
|
||||
.thumbBox { width: 56px; height: 56px; flex: 0 0 56px; }
|
||||
.detailThumbBox { width: 84px; height: 84px; flex: 0 0 84px; }
|
||||
|
||||
.chartBox {
|
||||
height: 58vh;
|
||||
min-height: 260px;
|
||||
padding: 8px;
|
||||
}
|
||||
}
|
||||
|
||||
.chartBox canvas {
|
||||
width: 100% !important;
|
||||
height: 100% !important;
|
||||
}
|
||||
Loading…
Reference in a new issue