feat: Adjustments

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-30 15:39:33 -08:00
parent 3f05142cef
commit 1952d02617

View file

@ -3,17 +3,15 @@
/* /*
Build a report of canonical SKUs and how many STORES carry each one. Build a report of canonical SKUs and how many STORES carry each one.
- "Store" means storeLabel, NOT category. We union across categories per store. - Store = storeLabel (union across categories).
- Uses sku_map canonicalization (same as alert tool). - Canonicalizes via sku_map.
- Debug output while scanning.
- Writes: reports/common_listings.json - Writes: reports/common_listings.json
- Prints debug while scanning.
Usage: Flags:
node tools/build_common_listings.js [--top 50] [--min-stores 3] [--prefer-real-sku] [--require-all] --top N
--min-stores N
Notes: --require-all
- If --require-all is set, output includes only SKUs present in ALL stores (often empty).
- Otherwise, outputs top N by store coverage.
*/ */
const fs = require("fs"); const fs = require("fs");
@ -25,15 +23,15 @@ function ensureDir(dir) {
fs.mkdirSync(dir, { recursive: true }); fs.mkdirSync(dir, { recursive: true });
} }
function readJson(filePath) { function readJson(p) {
try { try {
return JSON.parse(fs.readFileSync(filePath, "utf8")); return JSON.parse(fs.readFileSync(p, "utf8"));
} catch { } catch {
return null; return null;
} }
} }
function listDbFilesOnDisk() { function listDbFiles() {
const dir = path.join(process.cwd(), "data", "db"); const dir = path.join(process.cwd(), "data", "db");
try { try {
return fs return fs
@ -59,13 +57,13 @@ function isSyntheticSkuKey(k) {
return String(k || "").startsWith("u:"); return String(k || "").startsWith("u:");
} }
/* ---------------- sku map + normalization ---------------- */ /* ---------------- sku helpers ---------------- */
function loadSkuMapOrNull() { function loadSkuMapOrNull() {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map")); const { loadSkuMap } = require(path.join(process.cwd(), "src/utils/sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") }); return loadSkuMap({ dbDir: path.join(process.cwd(), "data/db") });
} catch { } catch {
return null; return null;
} }
@ -74,40 +72,33 @@ function loadSkuMapOrNull() {
function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) { function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
try { try {
// eslint-disable-next-line node/no-missing-require // eslint-disable-next-line node/no-missing-require
const { normalizeSkuKey } = require(path.join(process.cwd(), "src", "utils", "sku")); const { normalizeSkuKey } = require(path.join(process.cwd(), "src/utils/sku"));
const k = normalizeSkuKey(skuRaw, { storeLabel, url }); const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : ""; return k ? String(k) : "";
} catch { } catch {
// fallback: a 6-digit SKU if present, else synthetic from URL
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/); const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1]; if (m) return m[1];
if (url) return `u:${String(storeLabel || "").toLowerCase()}:${String(url || "").toLowerCase()}`; if (url) return `u:${storeLabel}:${url}`;
return ""; return "";
} }
} }
function canonicalize(skuKey, skuMap) { function canonicalize(k, skuMap) {
if (!skuKey) return ""; if (!k) return "";
if (skuMap && typeof skuMap.canonicalSku === "function") return String(skuMap.canonicalSku(skuKey) || skuKey); if (skuMap && typeof skuMap.canonicalSku === "function") {
return skuKey; return String(skuMap.canonicalSku(k) || k);
}
return k;
} }
/* ---------------- args ---------------- */ /* ---------------- args ---------------- */
function parseArgs(argv) { function parseArgs(argv) {
const out = { const out = { top: 50, minStores: 2, requireAll: false };
top: 50,
minStores: 2,
preferRealSku: true,
requireAll: false,
};
for (let i = 0; i < argv.length; i++) { for (let i = 0; i < argv.length; i++) {
const a = argv[i]; const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Math.max(1, Number(argv[++i]) || 50); if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Math.max(1, Number(argv[++i]) || 2); else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--prefer-real-sku") out.preferRealSku = true;
else if (a === "--no-prefer-real-sku") out.preferRealSku = false;
else if (a === "--require-all") out.requireAll = true; else if (a === "--require-all") out.requireAll = true;
} }
return out; return out;
@ -121,70 +112,56 @@ function main() {
const reportsDir = path.join(repoRoot, "reports"); const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir); ensureDir(reportsDir);
const dbFiles = listDbFilesOnDisk(); const dbFiles = listDbFiles();
if (!dbFiles.length) { if (!dbFiles.length) {
console.error("No DB files found under data/db"); console.error("No DB files found");
process.exitCode = 2; process.exitCode = 2;
return; return;
} }
const skuMap = loadSkuMapOrNull(); const skuMap = loadSkuMapOrNull();
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "NOT loaded (will use raw sku keys)"}`); console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files...`); console.log(`[debug] scanning ${dbFiles.length} db files`);
// storeLabel -> Set(canonSku) (union across categories) const storeToCanon = new Map(); // storeLabel -> Set(canonSku)
const storeToCanon = new Map(); const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest }
// canonSku -> aggregate
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest:{priceNum,item}|null }
const EXCLUDED_STORE_LABELS = new Set(["gull", "legacy", "strath", "vessel", "tudor"]);
let liveRows = 0; let liveRows = 0;
let removedRows = 0; let removedRows = 0;
let skippedNoSku = 0;
for (const abs of dbFiles.sort()) { for (const abs of dbFiles.sort()) {
const obj = readJson(abs); const obj = readJson(abs);
if (!obj) { if (!obj) continue;
console.log(`[debug] skip unreadable: ${path.relative(repoRoot, abs)}`);
continue;
}
const storeLabel = String(obj.storeLabel || obj.store || "").trim(); const storeLabel = String(obj.storeLabel || obj.store || "").trim();
const categoryLabel = String(obj.categoryLabel || obj.category || "").trim(); if (!storeLabel) continue;
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
if (!storeLabel) { if (!storeToCanon.has(storeLabel)) {
console.log(`[debug] skip no-storeLabel: ${rel}`); storeToCanon.set(storeLabel, new Set());
continue;
} }
if (!storeToCanon.has(storeLabel)) storeToCanon.set(storeLabel, new Set()); const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : []; const items = Array.isArray(obj.items) ? obj.items : [];
console.log(`[debug] file ${rel} | store="${storeLabel}" cat="${categoryLabel}" items=${items.length}`);
console.log(`[debug] ${rel} store="${storeLabel}" items=${items.length}`);
for (const it of items) { for (const it of items) {
if (!it) continue; if (!it) continue;
if (Boolean(it.removed)) { if (it.removed) {
removedRows++; removedRows++;
continue; continue;
} }
liveRows++; liveRows++;
const skuRaw = String(it.sku || ""); const skuKey = normalizeSkuKeyOrEmpty({
const url = String(it.url || ""); skuRaw: it.sku,
const skuKey = normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }); storeLabel,
if (!skuKey) { url: it.url,
skippedNoSku++; });
continue; if (!skuKey) continue;
}
const canonSku = canonicalize(skuKey, skuMap); const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) { if (!canonSku) continue;
skippedNoSku++;
continue;
}
storeToCanon.get(storeLabel).add(canonSku); storeToCanon.get(storeLabel).add(canonSku);
@ -193,25 +170,22 @@ function main() {
agg = { stores: new Set(), listings: [], cheapest: null }; agg = { stores: new Set(), listings: [], cheapest: null };
canonAgg.set(canonSku, agg); canonAgg.set(canonSku, agg);
} }
agg.stores.add(storeLabel); agg.stores.add(storeLabel);
const priceStr = String(it.price || ""); const priceNum = priceToNumber(it.price);
const priceNum = priceToNumber(priceStr);
const listing = { const listing = {
canonSku, canonSku,
skuKey, skuKey,
skuRaw, skuRaw: String(it.sku || ""),
name: String(it.name || ""), name: String(it.name || ""),
price: priceStr, price: String(it.price || ""),
priceNum, priceNum,
url, url: String(it.url || ""),
img: String(it.img || it.image || it.thumb || ""),
storeLabel, storeLabel,
categoryLabel, categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel, dbFile: rel,
hasRealSku6: hasRealSku6(skuRaw) && !isSyntheticSkuKey(skuKey), hasRealSku6: hasRealSku6(it.sku) && !isSyntheticSkuKey(skuKey),
excludedStore: EXCLUDED_STORE_LABELS.has(String(storeLabel || "").toLowerCase()),
}; };
agg.listings.push(listing); agg.listings.push(listing);
@ -224,52 +198,33 @@ function main() {
} }
} }
const stores = [...storeToCanon.keys()].sort((a, b) => a.localeCompare(b)); const stores = [...storeToCanon.keys()].sort();
const storeCount = stores.length; const storeCount = stores.length;
console.log(`[debug] stores=${storeCount} (${stores.join(", ")})`); console.log(`[debug] stores (${storeCount}): ${stores.join(", ")}`);
console.log( console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
`[debug] liveRows=${liveRows} removedRows=${removedRows} skippedNoSku=${skippedNoSku} canonSkus=${canonAgg.size}`
);
function pickRepresentative(agg) { function pickRepresentative(agg) {
// prefer: real 6-digit + non-excluded store + cheapest among those const preferred = agg.listings
const candidates = agg.listings.slice(); .filter((l) => l.hasRealSku6)
.sort((a, b) => (a.priceNum ?? Infinity) - (b.priceNum ?? Infinity));
const byPrice = (a, b) => { if (preferred.length) return preferred[0];
const ap = a.priceNum; if (agg.cheapest) return agg.cheapest.item;
const bp = b.priceNum; return agg.listings[0] || null;
if (ap === null && bp === null) return 0;
if (ap === null) return 1;
if (bp === null) return -1;
return ap - bp;
};
const preferred = candidates
.filter((x) => x.hasRealSku6 && !x.excludedStore)
.sort(byPrice);
if (args.preferRealSku && preferred.length) return preferred[0];
// else: cheapest overall if available
if (agg.cheapest && agg.cheapest.item) return agg.cheapest.item;
// else: deterministic fallback
candidates.sort((a, b) => {
const ak = `${a.storeLabel}|${a.name}|${a.url}`;
const bk = `${b.storeLabel}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
return candidates[0] || null;
} }
const rows = []; const rows = [];
for (const [canonSku, agg] of canonAgg.entries()) { for (const [canonSku, agg] of canonAgg.entries()) {
const rep = pickRepresentative(agg); const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s));
rows.push({ rows.push({
canonSku, canonSku,
storeCount: agg.stores.size, storeCount: agg.stores.size,
stores: [...agg.stores].sort((a, b) => a.localeCompare(b)), stores: [...agg.stores].sort(),
missingStores,
representative: rep representative: rep
? { ? {
name: rep.name, name: rep.name,
@ -279,8 +234,8 @@ function main() {
skuRaw: rep.skuRaw, skuRaw: rep.skuRaw,
skuKey: rep.skuKey, skuKey: rep.skuKey,
url: rep.url, url: rep.url,
dbFile: rep.dbFile,
categoryLabel: rep.categoryLabel, categoryLabel: rep.categoryLabel,
dbFile: rep.dbFile,
} }
: null, : null,
cheapest: agg.cheapest cheapest: agg.cheapest
@ -294,34 +249,14 @@ function main() {
}); });
} }
// Sort by coverage desc, then cheapest asc, then canonSku rows.sort((a, b) => b.storeCount - a.storeCount);
rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount; const filtered = args.requireAll
const ap = a.cheapest ? a.cheapest.priceNum : null; ? rows.filter((r) => r.storeCount === storeCount)
const bp = b.cheapest ? b.cheapest.priceNum : null; : rows.filter((r) => r.storeCount >= args.minStores);
if (ap !== null && bp !== null && ap !== bp) return ap - bp;
if (ap !== null && bp === null) return -1;
if (ap === null && bp !== null) return 1;
return String(a.canonSku).localeCompare(String(b.canonSku));
});
const allStoresRows = rows.filter((r) => r.storeCount === storeCount);
const filtered = args.requireAll ? allStoresRows : rows.filter((r) => r.storeCount >= args.minStores);
const top = filtered.slice(0, args.top); const top = filtered.slice(0, args.top);
console.log(
`[debug] all-stores=${allStoresRows.length} minStores>=${args.minStores} filtered=${filtered.length} top=${top.length}`
);
if (top.length) {
console.log("[debug] sample:");
for (const r of top.slice(0, Math.min(10, top.length))) {
const rep = r.representative;
console.log(
` - stores=${r.storeCount}/${storeCount} canon=${r.canonSku} | rep="${rep?.name || "?"}" ${rep?.price || ""} @ ${rep?.storeLabel || "?"}`
);
}
}
const payload = { const payload = {
generatedAt: new Date().toISOString(), generatedAt: new Date().toISOString(),
args, args,
@ -330,9 +265,7 @@ function main() {
totals: { totals: {
liveRows, liveRows,
removedRows, removedRows,
skippedNoSku,
canonSkus: canonAgg.size, canonSkus: canonAgg.size,
allStores: allStoresRows.length,
outputCount: top.length, outputCount: top.length,
}, },
rows: top, rows: top,