spirit-tracker/tools/build_common_listings.js
Brennan Wilkes (Text Groove) 35e5956c12 feat: New reports
2026-02-02 19:13:49 -08:00

326 lines
9.2 KiB
JavaScript

#!/usr/bin/env node
"use strict";
/*
Build a report of canonical SKUs and how many STORES carry each one.
- Store = storeLabel (union across categories).
- Canonicalizes via sku_map.
- Debug output while scanning.
- Writes: reports/common_listings_<group>_top<N>.json (or --out)
Flags:
--top N
--min-stores N
--require-all
--group all|bc|ab
--out path/to/file.json
*/
const fs = require("fs");
const path = require("path");
/* ---------------- helpers ---------------- */
function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true });
}
function readJson(p) {
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
}
function listDbFiles() {
const dir = path.join(process.cwd(), "data", "db");
try {
return fs
.readdirSync(dir, { withFileTypes: true })
.filter((e) => e.isFile() && e.name.endsWith(".json"))
.map((e) => path.join(dir, e.name));
} catch {
return [];
}
}
function priceToNumber(v) {
const s = String(v ?? "").replace(/[^0-9.]/g, "");
const n = Number(s);
return Number.isFinite(n) ? n : null;
}
function hasRealSku6(s) {
return /\b\d{6}\b/.test(String(s || ""));
}
function isSyntheticSkuKey(k) {
return String(k || "").startsWith("u:");
}
/* ---------------- sku helpers ---------------- */
function loadSkuMapOrNull() {
try {
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
} catch {
return null;
}
}
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try {
// eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${storeLabel}:${url}`;
return "";
}
}
function canonicalize(k, skuMap) {
if (!k) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") {
return String(skuMap.canonicalSku(k) || k);
}
return k;
}
/* ---------------- args ---------------- */
function parseArgs(argv) {
const out = {
top: 50,
minStores: 2,
requireAll: false,
group: "all", // all|bc|ab
out: "", // optional explicit output path
};
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--require-all") out.requireAll = true;
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all");
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
}
return out;
}
function groupStores(group, allStoresSorted) {
const bc = new Set(["gull", "strath", "bcl", "legacy", "tudor"]);
if (group === "bc") return allStoresSorted.filter((s) => bc.has(s));
if (group === "ab") return allStoresSorted.filter((s) => !bc.has(s));
return allStoresSorted; // "all"
}
/* ---------------- main ---------------- */
function main() {
const args = parseArgs(process.argv.slice(2));
const repoRoot = process.cwd();
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const dbFiles = listDbFiles();
if (!dbFiles.length) {
console.error("No DB files found");
process.exitCode = 2;
return;
}
const skuMap = loadSkuMapOrNull();
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files`);
const storeToCanon = new Map(); // storeLabel -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, perStore:Map(storeLabel -> {priceNum, item}) }
let liveRows = 0;
let removedRows = 0;
for (const abs of dbFiles.sort()) {
const obj = readJson(abs);
if (!obj) continue;
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
if (!storeLabel) continue;
if (!storeToCanon.has(storeLabel)) {
storeToCanon.set(storeLabel, new Set());
}
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : [];
console.log(`[debug] ${rel} store="${storeLabel}" items=${items.length}`);
for (const it of items) {
if (!it) continue;
if (it.removed) {
removedRows++;
continue;
}
liveRows++;
const skuKey = normalizeSkuKeyOrEmpty({
skuRaw: it.sku,
storeLabel,
url: it.url,
});
if (!skuKey) continue;
const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) continue;
storeToCanon.get(storeLabel).add(canonSku);
let agg = canonAgg.get(canonSku);
if (!agg) {
agg = { stores: new Set(), listings: [], cheapest: null, perStore: new Map() };
canonAgg.set(canonSku, agg);
}
agg.stores.add(storeLabel);
const priceNum = priceToNumber(it.price);
const listing = {
canonSku,
skuKey,
skuRaw: String(it.sku || ""),
name: String(it.name || ""),
price: String(it.price || ""),
priceNum,
url: String(it.url || ""),
storeLabel,
categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel,
hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
};
agg.listings.push(listing);
if (priceNum !== null) {
if (!agg.cheapest || priceNum < agg.cheapest.priceNum) {
agg.cheapest = { priceNum, item: listing };
}
}
// per-store numeric price (best/lowest numeric; otherwise first seen)
const prev = agg.perStore.get(storeLabel);
if (priceNum !== null) {
if (!prev || prev.priceNum === null || priceNum < prev.priceNum) {
agg.perStore.set(storeLabel, { priceNum, item: listing });
}
} else {
if (!prev) agg.perStore.set(storeLabel, { priceNum: null, item: listing });
}
}
}
const allStores = [...storeToCanon.keys()].sort();
const stores = groupStores(String(args.group || "all").toLowerCase(), allStores);
const storeCount = stores.length;
console.log(`[debug] stores(all) (${allStores.length}): ${allStores.join(", ")}`);
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
function pickRepresentative(agg) {
const preferred = agg.listings
.filter((l) => l.hasRealSku6)
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
if (preferred.length) return preferred[0];
if (agg.cheapest) return agg.cheapest.item;
return agg.listings[0] || null;
}
const rows = [];
for (const [canonSku, agg] of canonAgg.entries()) {
const groupStoresPresent = stores.filter((s) => agg.stores.has(s));
if (groupStoresPresent.length === 0) continue;
const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s));
const storePrices = {};
for (const s of stores) {
const ps = agg.perStore.get(s);
storePrices[s] = ps ? ps.priceNum : null;
}
rows.push({
canonSku,
storeCount: groupStoresPresent.length,
stores: groupStoresPresent.sort(),
missingStores,
storePrices,
representative: rep
? {
name: rep.name,
price: rep.price,
priceNum: rep.priceNum,
storeLabel: rep.storeLabel,
skuRaw: rep.skuRaw,
skuKey: rep.skuKey,
url: rep.url,
categoryLabel: rep.categoryLabel,
dbFile: rep.dbFile,
}
: null,
cheapest: agg.cheapest
? {
price: agg.cheapest.item.price,
priceNum: agg.cheapest.priceNum,
storeLabel: agg.cheapest.item.storeLabel,
url: agg.cheapest.item.url,
}
: null,
});
}
// stable-ish ordering: primary by store coverage, tie-break by canonSku
rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
return String(a.canonSku).localeCompare(String(b.canonSku));
});
const filtered = args.requireAll
? rows.filter((r) => r.storeCount === storeCount)
: rows.filter((r) => r.storeCount >= args.minStores);
const top = filtered.slice(0, args.top);
const safeGroup = String(args.group || "all").toLowerCase();
const payload = {
generatedAt: new Date().toISOString(),
args: { ...args, group: safeGroup },
storeCount,
stores,
totals: {
liveRows,
removedRows,
canonSkus: canonAgg.size,
outputCount: top.length,
},
rows: top,
};
const defaultName = `common_listings_${safeGroup}_top${args.top}.json`;
const outPath = args.out ? path.resolve(repoRoot, args.out) : path.join(reportsDir, defaultName);
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
}
main();