fix: Common listings

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-02 19:40:06 -08:00
parent 01ca440585
commit 85e444d7ef
3 changed files with 162 additions and 128 deletions

View file

@ -101,24 +101,21 @@ if [[ $rc -ne 0 ]]; then
exit $rc
fi
# Build common listings reports FIRST (so commits manifest can see them)
for group in all bc ab; do
for top in 50 250 1000; do
"$NODE_BIN" tools/build_common_listings.js \
--group "$group" \
--top "$top" \
--out "reports/common_listings_${group}_top${top}.json"
done
done
# Build viz artifacts on the data branch
"$NODE_BIN" tools/build_viz_index.js
"$NODE_BIN" tools/build_viz_commits.js
"$NODE_BIN" tools/build_viz_recent.js
# Build common listings artifacts (9 files)
"$NODE_BIN" tools/build_common_listings.js --group all --top 50 --out "reports/common_listings_all_top50.json"
"$NODE_BIN" tools/build_common_listings.js --group all --top 250 --out "reports/common_listings_all_top250.json"
"$NODE_BIN" tools/build_common_listings.js --group all --top 1000 --out "reports/common_listings_all_top1000.json"
"$NODE_BIN" tools/build_common_listings.js --group bc --top 50 --out "reports/common_listings_bc_top50.json"
"$NODE_BIN" tools/build_common_listings.js --group bc --top 250 --out "reports/common_listings_bc_top250.json"
"$NODE_BIN" tools/build_common_listings.js --group bc --top 1000 --out "reports/common_listings_bc_top1000.json"
"$NODE_BIN" tools/build_common_listings.js --group ab --top 50 --out "reports/common_listings_ab_top50.json"
"$NODE_BIN" tools/build_common_listings.js --group ab --top 250 --out "reports/common_listings_ab_top250.json"
"$NODE_BIN" tools/build_common_listings.js --group ab --top 1000 --out "reports/common_listings_ab_top1000.json"
# Stage only data/report/viz outputs
git add -A data/db reports viz/data

View file

@ -3,17 +3,17 @@
/*
Build a report of canonical SKUs and how many STORES carry each one.
- Store = storeLabel (union across categories).
- Store = storeKey (stable id derived from db filename).
- Canonicalizes via sku_map.
- Debug output while scanning.
- Writes: reports/common_listings_<group>_top<N>.json (or --out)
- Includes per-store numeric price (min live price per store for that SKU).
- Writes one output file (see --out).
Flags:
--top N
--min-stores N
--require-all
--group all|bc|ab
--out path/to/file.json
--out path
*/
const fs = require("fs");
@ -59,6 +59,13 @@ function isSyntheticSkuKey(k) {
return String(k || "").startsWith("u:");
}
function storeKeyFromDbPath(abs) {
const base = path.basename(abs);
const m = base.match(/^([^_]+)__.+\.json$/i);
const k = m ? m[1] : base.replace(/\.json$/i, "");
return String(k || "").toLowerCase();
}
/* ---------------- sku helpers ---------------- */
function loadSkuMapOrNull() {
@ -93,34 +100,40 @@ function canonicalize(k, skuMap) {
return k;
}
/* ---------------- grouping ---------------- */
const BC_STORE_KEYS = new Set([
"gull",
"strath",
"bcl",
"legacy",
"legacyliquor",
"tudor",
]);
function groupAllowsStore(group, storeKey) {
const k = String(storeKey || "").toLowerCase();
if (group === "bc") return BC_STORE_KEYS.has(k);
if (group === "ab") return !BC_STORE_KEYS.has(k);
return true; // all
}
/* ---------------- args ---------------- */
function parseArgs(argv) {
const out = {
top: 50,
minStores: 2,
requireAll: false,
group: "all", // all|bc|ab
out: "", // optional explicit output path
};
const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" };
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50;
else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2;
else if (a === "--require-all") out.requireAll = true;
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all");
else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase();
else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || "");
}
if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all";
return out;
}
function groupStores(group, allStoresSorted) {
const bc = new Set(["gull", "strath", "bcl", "legacy", "tudor"]);
if (group === "bc") return allStoresSorted.filter((s) => bc.has(s));
if (group === "ab") return allStoresSorted.filter((s) => !bc.has(s));
return allStoresSorted; // "all"
}
/* ---------------- main ---------------- */
function main() {
@ -129,6 +142,9 @@ function main() {
const reportsDir = path.join(repoRoot, "reports");
ensureDir(reportsDir);
const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json");
ensureDir(path.dirname(outPath));
const dbFiles = listDbFiles();
if (!dbFiles.length) {
console.error("No DB files found");
@ -140,8 +156,8 @@ function main() {
console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`);
console.log(`[debug] scanning ${dbFiles.length} db files`);
const storeToCanon = new Map(); // storeLabel -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, perStore:Map(storeLabel -> {priceNum, item}) }
const storeToCanon = new Map(); // storeKey -> Set(canonSku)
const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map }
let liveRows = 0;
let removedRows = 0;
@ -153,14 +169,17 @@ function main() {
const storeLabel = String(obj.storeLabel || obj.store || "").trim();
if (!storeLabel) continue;
if (!storeToCanon.has(storeLabel)) {
storeToCanon.set(storeLabel, new Set());
const storeKey = storeKeyFromDbPath(abs);
if (!groupAllowsStore(args.group, storeKey)) continue;
if (!storeToCanon.has(storeKey)) {
storeToCanon.set(storeKey, new Set());
}
const rel = path.relative(repoRoot, abs).replace(/\\/g, "/");
const items = Array.isArray(obj.items) ? obj.items : [];
console.log(`[debug] ${rel} store="${storeLabel}" items=${items.length}`);
console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`);
for (const it of items) {
if (!it) continue;
@ -180,17 +199,22 @@ function main() {
const canonSku = canonicalize(skuKey, skuMap);
if (!canonSku) continue;
storeToCanon.get(storeLabel).add(canonSku);
storeToCanon.get(storeKey).add(canonSku);
let agg = canonAgg.get(canonSku);
if (!agg) {
agg = { stores: new Set(), listings: [], cheapest: null, perStore: new Map() };
agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() };
canonAgg.set(canonSku, agg);
}
agg.stores.add(storeLabel);
agg.stores.add(storeKey);
const priceNum = priceToNumber(it.price);
if (priceNum !== null) {
const prev = agg.storeMin.get(storeKey);
if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum);
}
const listing = {
canonSku,
skuKey,
@ -199,6 +223,7 @@ function main() {
price: String(it.price || ""),
priceNum,
url: String(it.url || ""),
storeKey,
storeLabel,
categoryLabel: String(obj.categoryLabel || obj.category || ""),
dbFile: rel,
@ -212,24 +237,12 @@ function main() {
agg.cheapest = { priceNum, item: listing };
}
}
// per-store numeric price (best/lowest numeric; otherwise first seen)
const prev = agg.perStore.get(storeLabel);
if (priceNum !== null) {
if (!prev || prev.priceNum === null || priceNum < prev.priceNum) {
agg.perStore.set(storeLabel, { priceNum, item: listing });
}
} else {
if (!prev) agg.perStore.set(storeLabel, { priceNum: null, item: listing });
}
}
}
const allStores = [...storeToCanon.keys()].sort();
const stores = groupStores(String(args.group || "all").toLowerCase(), allStores);
const stores = [...storeToCanon.keys()].sort();
const storeCount = stores.length;
console.log(`[debug] stores(all) (${allStores.length}): ${allStores.join(", ")}`);
console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`);
console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`);
@ -246,29 +259,27 @@ function main() {
const rows = [];
for (const [canonSku, agg] of canonAgg.entries()) {
const groupStoresPresent = stores.filter((s) => agg.stores.has(s));
if (groupStoresPresent.length === 0) continue;
const rep = pickRepresentative(agg);
const missingStores = stores.filter((s) => !agg.stores.has(s));
const storePrices = {};
for (const s of stores) {
const ps = agg.perStore.get(s);
storePrices[s] = ps ? ps.priceNum : null;
const p = agg.storeMin.get(s);
if (Number.isFinite(p)) storePrices[s] = p;
}
rows.push({
canonSku,
storeCount: groupStoresPresent.length,
stores: groupStoresPresent.sort(),
storeCount: agg.stores.size,
stores: [...agg.stores].sort(),
missingStores,
storePrices,
storePrices, // { [storeKey]: number } min live price per store
representative: rep
? {
name: rep.name,
price: rep.price,
priceNum: rep.priceNum,
storeKey: rep.storeKey,
storeLabel: rep.storeLabel,
skuRaw: rep.skuRaw,
skuKey: rep.skuKey,
@ -281,14 +292,14 @@ function main() {
? {
price: agg.cheapest.item.price,
priceNum: agg.cheapest.priceNum,
storeLabel: agg.cheapest.item.storeLabel,
storeKey: agg.cheapest.item.storeKey,
url: agg.cheapest.item.url,
}
: null,
});
}
// stable-ish ordering: primary by store coverage, tie-break by canonSku
// Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time)
rows.sort((a, b) => {
if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount;
return String(a.canonSku).localeCompare(String(b.canonSku));
@ -300,11 +311,15 @@ function main() {
const top = filtered.slice(0, args.top);
const safeGroup = String(args.group || "all").toLowerCase();
const payload = {
generatedAt: new Date().toISOString(),
args: { ...args, group: safeGroup },
args: {
top: args.top,
minStores: args.minStores,
requireAll: args.requireAll,
group: args.group,
out: path.relative(repoRoot, outPath).replace(/\\/g, "/"),
},
storeCount,
stores,
totals: {
@ -316,9 +331,6 @@ function main() {
rows: top,
};
const defaultName = `common_listings_${safeGroup}_top${args.top}.json`;
const outPath = args.out ? path.resolve(repoRoot, args.out) : path.join(reportsDir, defaultName);
fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`);
}

View file

@ -1,5 +1,5 @@
import { esc } from "./dom.js";
import { fetchJson, inferGithubOwnerRepo, githubFetchFileAtSha } from "./api.js";
import { fetchJson, inferGithubOwnerRepo, githubFetchFileAtSha, githubListCommits } from "./api.js";
let _chart = null;
@ -24,7 +24,12 @@ function ensureChartJs() {
});
}
/* ---------------- small helpers ---------------- */
/* ---------------- helpers ---------------- */
function dateOnly(iso) {
const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : "";
}
function medianOfSorted(nums) {
const n = nums.length;
@ -88,34 +93,17 @@ function saveStatsCache(group, size, latestSha, payload) {
} catch {}
}
/* ---------------- data loading ---------------- */
let COMMON_COMMITS = null;
async function loadCommonCommitsManifest() {
if (COMMON_COMMITS) return COMMON_COMMITS;
try {
COMMON_COMMITS = await fetchJson("./data/common_listings_commits.json");
return COMMON_COMMITS;
} catch {
COMMON_COMMITS = null;
return null;
}
}
function relReportPath(group, size) {
return `reports/common_listings_${group}_top${size}.json`;
}
// Computes per-store daily metric:
// avg over SKUs that store has a price for: ((storePrice - medianPrice) / medianPrice) * 100
function computeDailyStoreSeriesFromReport(report) {
const stores = Array.isArray(report?.stores) ? report.stores.map(String) : [];
const rows = Array.isArray(report?.rows) ? report.rows : [];
const sum = new Map(); // store -> sumPct
const cnt = new Map(); // store -> count
const sum = new Map();
const cnt = new Map();
for (const s of stores) {
sum.set(s, 0);
cnt.set(s, 0);
@ -152,15 +140,59 @@ function computeDailyStoreSeriesFromReport(report) {
return { stores, valuesByStore: out };
}
async function buildStatsSeries({ group, size, onStatus }) {
const manifest = await loadCommonCommitsManifest();
if (!manifest?.files) throw new Error("Missing common_listings_commits.json (viz/data)");
/* ---------------- commits manifest ---------------- */
let COMMON_COMMITS = null;
async function loadCommonCommitsManifest() {
if (COMMON_COMMITS) return COMMON_COMMITS;
try {
COMMON_COMMITS = await fetchJson("./data/common_listings_commits.json");
return COMMON_COMMITS;
} catch {
COMMON_COMMITS = null;
return null;
}
}
// Fallback: GitHub API commits for a path, collapsed to one commit per day (newest that day),
// returned oldest -> newest, same shape as manifest entries.
async function loadCommitsFallback({ owner, repo, branch, relPath }) {
let apiCommits = await githubListCommits({ owner, repo, branch, path: relPath });
apiCommits = Array.isArray(apiCommits) ? apiCommits : [];
// newest -> oldest from API; we want newest-per-day then oldest -> newest
const byDate = new Map();
for (const c of apiCommits) {
const sha = String(c?.sha || "");
const ts = String(c?.commit?.committer?.date || c?.commit?.author?.date || "");
const d = dateOnly(ts);
if (!sha || !d) continue;
if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts });
}
return [...byDate.values()].reverse();
}
async function buildStatsSeries({ group, size, onStatus }) {
const rel = relReportPath(group, size);
const commits = Array.isArray(manifest.files[rel]) ? manifest.files[rel] : null;
const gh = inferGithubOwnerRepo();
const owner = gh.owner;
const repo = gh.repo;
const branch = "data";
const manifest = await loadCommonCommitsManifest();
let commits = Array.isArray(manifest?.files?.[rel]) ? manifest.files[rel] : null;
// Fallback if manifest missing/empty
if (!commits || !commits.length) {
if (typeof onStatus === "function") onStatus(`Commits manifest missing for ${rel}; using GitHub API fallback…`);
commits = await loadCommitsFallback({ owner, repo, branch, relPath: rel });
}
if (!commits || !commits.length) throw new Error(`No commits tracked for ${rel}`);
// commits are oldest -> newest in the manifest
const latest = commits[commits.length - 1];
const latestSha = String(latest?.sha || "");
if (!latestSha) throw new Error(`Invalid latest sha for ${rel}`);
@ -168,18 +200,11 @@ async function buildStatsSeries({ group, size, onStatus }) {
const cached = loadStatsCache(group, size, latestSha);
if (cached) return { latestSha, labels: cached.labels, stores: cached.stores, seriesByStore: cached.seriesByStore };
const gh = inferGithubOwnerRepo();
const owner = gh.owner;
const repo = gh.repo;
const NET_CONCURRENCY = 10;
const limitNet = makeLimiter(NET_CONCURRENCY);
// Fetch newest report once to get the store list (authoritative for the selected file)
if (typeof onStatus === "function") onStatus(`Loading stores…`);
const newestReport = await limitNet(() =>
githubFetchFileAtSha({ owner, repo, sha: latestSha, path: rel })
);
const newestReport = await limitNet(() => githubFetchFileAtSha({ owner, repo, sha: latestSha, path: rel }));
const stores = Array.isArray(newestReport?.stores) ? newestReport.stores.map(String) : [];
if (!stores.length) throw new Error(`No stores found in ${rel} at ${latestSha.slice(0, 7)}`);
@ -189,12 +214,10 @@ async function buildStatsSeries({ group, size, onStatus }) {
const seriesByStore = {};
for (const s of stores) seriesByStore[s] = new Array(labels.length).fill(null);
// Load each day's report and compute that days per-store average % vs median
if (typeof onStatus === "function") onStatus(`Loading ${labels.length} day(s)…`);
// De-dupe by sha (just in case)
const shaByIdx = commits.map((c) => String(c.sha || ""));
const fileJsonCache = new Map(); // sha -> report json
const fileJsonCache = new Map();
async function loadReportAtSha(sha) {
if (fileJsonCache.has(sha)) return fileJsonCache.get(sha);
@ -203,7 +226,6 @@ async function buildStatsSeries({ group, size, onStatus }) {
return obj;
}
// Batch fetch + compute with limited concurrency
let done = 0;
await Promise.all(
shaByIdx.map((sha, idx) =>
@ -217,7 +239,7 @@ async function buildStatsSeries({ group, size, onStatus }) {
seriesByStore[s][idx] = Number.isFinite(v) ? v : null;
}
} catch {
// leave nulls for this day
// leave nulls
} finally {
done++;
if (typeof onStatus === "function" && (done % 10 === 0 || done === shaByIdx.length)) {
@ -266,10 +288,15 @@ export async function renderStats($app) {
<div class="header">
<div class="headerRow1">
<div class="headerLeft">
<button id="back" class="btn"> Back</button>
<h1 class="h1">Store Price Index</h1>
<div class="small" id="statsStatus">Loading</div>
</div>
<div class="headerRight headerButtons">
<button id="back" class="btn"> Back</button>
</div>
</div>
<div class="headerRow2">
<div style="display:flex; gap:10px; flex-wrap:wrap; align-items:center;">
<label class="small" style="display:flex; gap:8px; align-items:center;">
Stores
@ -291,7 +318,6 @@ export async function renderStats($app) {
</div>
</div>
</div>
</div>
<div class="card">
<div style="height:420px;">
@ -337,7 +363,6 @@ export async function renderStats($app) {
onStatus,
});
// Build datasets: one per store
const datasets = stores.map((s) => ({
label: s,
data: Array.isArray(seriesByStore[s]) ? seriesByStore[s] : labels.map(() => null),