diff --git a/scripts/run_daily.sh b/scripts/run_daily.sh index cfe160d..a517ece 100755 --- a/scripts/run_daily.sh +++ b/scripts/run_daily.sh @@ -101,24 +101,21 @@ if [[ $rc -ne 0 ]]; then exit $rc fi +# Build common listings reports FIRST (so commits manifest can see them) +for group in all bc ab; do + for top in 50 250 1000; do + "$NODE_BIN" tools/build_common_listings.js \ + --group "$group" \ + --top "$top" \ + --out "reports/common_listings_${group}_top${top}.json" + done +done + # Build viz artifacts on the data branch "$NODE_BIN" tools/build_viz_index.js "$NODE_BIN" tools/build_viz_commits.js "$NODE_BIN" tools/build_viz_recent.js -# Build common listings artifacts (9 files) -"$NODE_BIN" tools/build_common_listings.js --group all --top 50 --out "reports/common_listings_all_top50.json" -"$NODE_BIN" tools/build_common_listings.js --group all --top 250 --out "reports/common_listings_all_top250.json" -"$NODE_BIN" tools/build_common_listings.js --group all --top 1000 --out "reports/common_listings_all_top1000.json" - -"$NODE_BIN" tools/build_common_listings.js --group bc --top 50 --out "reports/common_listings_bc_top50.json" -"$NODE_BIN" tools/build_common_listings.js --group bc --top 250 --out "reports/common_listings_bc_top250.json" -"$NODE_BIN" tools/build_common_listings.js --group bc --top 1000 --out "reports/common_listings_bc_top1000.json" - -"$NODE_BIN" tools/build_common_listings.js --group ab --top 50 --out "reports/common_listings_ab_top50.json" -"$NODE_BIN" tools/build_common_listings.js --group ab --top 250 --out "reports/common_listings_ab_top250.json" -"$NODE_BIN" tools/build_common_listings.js --group ab --top 1000 --out "reports/common_listings_ab_top1000.json" - # Stage only data/report/viz outputs git add -A data/db reports viz/data diff --git a/tools/build_common_listings.js b/tools/build_common_listings.js index 7dc3fcf..e7abcb8 100644 --- a/tools/build_common_listings.js +++ b/tools/build_common_listings.js @@ -3,17 +3,17 @@ /* Build a report of canonical SKUs and how many STORES carry each one. - - Store = storeLabel (union across categories). + - Store = storeKey (stable id derived from db filename). - Canonicalizes via sku_map. - - Debug output while scanning. - - Writes: reports/common_listings__top.json (or --out) + - Includes per-store numeric price (min live price per store for that SKU). + - Writes one output file (see --out). Flags: --top N --min-stores N --require-all --group all|bc|ab - --out path/to/file.json + --out path */ const fs = require("fs"); @@ -59,6 +59,13 @@ function isSyntheticSkuKey(k) { return String(k || "").startsWith("u:"); } +function storeKeyFromDbPath(abs) { + const base = path.basename(abs); + const m = base.match(/^([^_]+)__.+\.json$/i); + const k = m ? m[1] : base.replace(/\.json$/i, ""); + return String(k || "").toLowerCase(); +} + /* ---------------- sku helpers ---------------- */ function loadSkuMapOrNull() { @@ -93,34 +100,40 @@ function canonicalize(k, skuMap) { return k; } +/* ---------------- grouping ---------------- */ + +const BC_STORE_KEYS = new Set([ + "gull", + "strath", + "bcl", + "legacy", + "legacyliquor", + "tudor", +]); + +function groupAllowsStore(group, storeKey) { + const k = String(storeKey || "").toLowerCase(); + if (group === "bc") return BC_STORE_KEYS.has(k); + if (group === "ab") return !BC_STORE_KEYS.has(k); + return true; // all +} + /* ---------------- args ---------------- */ function parseArgs(argv) { - const out = { - top: 50, - minStores: 2, - requireAll: false, - group: "all", // all|bc|ab - out: "", // optional explicit output path - }; + const out = { top: 50, minStores: 2, requireAll: false, group: "all", out: "" }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || 50; else if (a === "--min-stores" && argv[i + 1]) out.minStores = Number(argv[++i]) || 2; else if (a === "--require-all") out.requireAll = true; - else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all"); + else if (a === "--group" && argv[i + 1]) out.group = String(argv[++i] || "all").toLowerCase(); else if (a === "--out" && argv[i + 1]) out.out = String(argv[++i] || ""); } + if (out.group !== "all" && out.group !== "bc" && out.group !== "ab") out.group = "all"; return out; } -function groupStores(group, allStoresSorted) { - const bc = new Set(["gull", "strath", "bcl", "legacy", "tudor"]); - if (group === "bc") return allStoresSorted.filter((s) => bc.has(s)); - if (group === "ab") return allStoresSorted.filter((s) => !bc.has(s)); - return allStoresSorted; // "all" -} - /* ---------------- main ---------------- */ function main() { @@ -129,6 +142,9 @@ function main() { const reportsDir = path.join(repoRoot, "reports"); ensureDir(reportsDir); + const outPath = args.out ? path.join(repoRoot, args.out) : path.join(reportsDir, "common_listings.json"); + ensureDir(path.dirname(outPath)); + const dbFiles = listDbFiles(); if (!dbFiles.length) { console.error("No DB files found"); @@ -140,8 +156,8 @@ function main() { console.log(`[debug] skuMap: ${skuMap ? "loaded" : "missing"}`); console.log(`[debug] scanning ${dbFiles.length} db files`); - const storeToCanon = new Map(); // storeLabel -> Set(canonSku) - const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, perStore:Map(storeLabel -> {priceNum, item}) } + const storeToCanon = new Map(); // storeKey -> Set(canonSku) + const canonAgg = new Map(); // canonSku -> { stores:Set, listings:[], cheapest, storeMin:Map } let liveRows = 0; let removedRows = 0; @@ -153,14 +169,17 @@ function main() { const storeLabel = String(obj.storeLabel || obj.store || "").trim(); if (!storeLabel) continue; - if (!storeToCanon.has(storeLabel)) { - storeToCanon.set(storeLabel, new Set()); + const storeKey = storeKeyFromDbPath(abs); + if (!groupAllowsStore(args.group, storeKey)) continue; + + if (!storeToCanon.has(storeKey)) { + storeToCanon.set(storeKey, new Set()); } const rel = path.relative(repoRoot, abs).replace(/\\/g, "/"); const items = Array.isArray(obj.items) ? obj.items : []; - console.log(`[debug] ${rel} store="${storeLabel}" items=${items.length}`); + console.log(`[debug] ${rel} storeKey="${storeKey}" storeLabel="${storeLabel}" items=${items.length}`); for (const it of items) { if (!it) continue; @@ -180,17 +199,22 @@ function main() { const canonSku = canonicalize(skuKey, skuMap); if (!canonSku) continue; - storeToCanon.get(storeLabel).add(canonSku); + storeToCanon.get(storeKey).add(canonSku); let agg = canonAgg.get(canonSku); if (!agg) { - agg = { stores: new Set(), listings: [], cheapest: null, perStore: new Map() }; + agg = { stores: new Set(), listings: [], cheapest: null, storeMin: new Map() }; canonAgg.set(canonSku, agg); } - agg.stores.add(storeLabel); + agg.stores.add(storeKey); const priceNum = priceToNumber(it.price); + if (priceNum !== null) { + const prev = agg.storeMin.get(storeKey); + if (prev === undefined || priceNum < prev) agg.storeMin.set(storeKey, priceNum); + } + const listing = { canonSku, skuKey, @@ -199,6 +223,7 @@ function main() { price: String(it.price || ""), priceNum, url: String(it.url || ""), + storeKey, storeLabel, categoryLabel: String(obj.categoryLabel || obj.category || ""), dbFile: rel, @@ -212,24 +237,12 @@ function main() { agg.cheapest = { priceNum, item: listing }; } } - - // per-store numeric price (best/lowest numeric; otherwise first seen) - const prev = agg.perStore.get(storeLabel); - if (priceNum !== null) { - if (!prev || prev.priceNum === null || priceNum < prev.priceNum) { - agg.perStore.set(storeLabel, { priceNum, item: listing }); - } - } else { - if (!prev) agg.perStore.set(storeLabel, { priceNum: null, item: listing }); - } } } - const allStores = [...storeToCanon.keys()].sort(); - const stores = groupStores(String(args.group || "all").toLowerCase(), allStores); + const stores = [...storeToCanon.keys()].sort(); const storeCount = stores.length; - console.log(`[debug] stores(all) (${allStores.length}): ${allStores.join(", ")}`); console.log(`[debug] group="${args.group}" stores(${storeCount}): ${stores.join(", ")}`); console.log(`[debug] liveRows=${liveRows} removedRows=${removedRows} canonSkus=${canonAgg.size}`); @@ -246,29 +259,27 @@ function main() { const rows = []; for (const [canonSku, agg] of canonAgg.entries()) { - const groupStoresPresent = stores.filter((s) => agg.stores.has(s)); - if (groupStoresPresent.length === 0) continue; - const rep = pickRepresentative(agg); const missingStores = stores.filter((s) => !agg.stores.has(s)); const storePrices = {}; for (const s of stores) { - const ps = agg.perStore.get(s); - storePrices[s] = ps ? ps.priceNum : null; + const p = agg.storeMin.get(s); + if (Number.isFinite(p)) storePrices[s] = p; } rows.push({ canonSku, - storeCount: groupStoresPresent.length, - stores: groupStoresPresent.sort(), + storeCount: agg.stores.size, + stores: [...agg.stores].sort(), missingStores, - storePrices, + storePrices, // { [storeKey]: number } min live price per store representative: rep ? { name: rep.name, price: rep.price, priceNum: rep.priceNum, + storeKey: rep.storeKey, storeLabel: rep.storeLabel, skuRaw: rep.skuRaw, skuKey: rep.skuKey, @@ -281,14 +292,14 @@ function main() { ? { price: agg.cheapest.item.price, priceNum: agg.cheapest.priceNum, - storeLabel: agg.cheapest.item.storeLabel, + storeKey: agg.cheapest.item.storeKey, url: agg.cheapest.item.url, } : null, }); } - // stable-ish ordering: primary by store coverage, tie-break by canonSku + // Stable-ish sort: storeCount desc, then canonSku asc (stable diffs over time) rows.sort((a, b) => { if (b.storeCount !== a.storeCount) return b.storeCount - a.storeCount; return String(a.canonSku).localeCompare(String(b.canonSku)); @@ -300,11 +311,15 @@ function main() { const top = filtered.slice(0, args.top); - const safeGroup = String(args.group || "all").toLowerCase(); - const payload = { generatedAt: new Date().toISOString(), - args: { ...args, group: safeGroup }, + args: { + top: args.top, + minStores: args.minStores, + requireAll: args.requireAll, + group: args.group, + out: path.relative(repoRoot, outPath).replace(/\\/g, "/"), + }, storeCount, stores, totals: { @@ -316,9 +331,6 @@ function main() { rows: top, }; - const defaultName = `common_listings_${safeGroup}_top${args.top}.json`; - const outPath = args.out ? path.resolve(repoRoot, args.out) : path.join(reportsDir, defaultName); - fs.writeFileSync(outPath, JSON.stringify(payload, null, 2) + "\n", "utf8"); console.log(`Wrote ${path.relative(repoRoot, outPath)} (${top.length} rows)`); } diff --git a/viz/app/stats_page.js b/viz/app/stats_page.js index b3db6f2..f413b74 100644 --- a/viz/app/stats_page.js +++ b/viz/app/stats_page.js @@ -1,5 +1,5 @@ import { esc } from "./dom.js"; -import { fetchJson, inferGithubOwnerRepo, githubFetchFileAtSha } from "./api.js"; +import { fetchJson, inferGithubOwnerRepo, githubFetchFileAtSha, githubListCommits } from "./api.js"; let _chart = null; @@ -24,7 +24,12 @@ function ensureChartJs() { }); } -/* ---------------- small helpers ---------------- */ +/* ---------------- helpers ---------------- */ + +function dateOnly(iso) { + const m = String(iso ?? "").match(/^(\d{4}-\d{2}-\d{2})/); + return m ? m[1] : ""; +} function medianOfSorted(nums) { const n = nums.length; @@ -88,34 +93,17 @@ function saveStatsCache(group, size, latestSha, payload) { } catch {} } -/* ---------------- data loading ---------------- */ - -let COMMON_COMMITS = null; - -async function loadCommonCommitsManifest() { - if (COMMON_COMMITS) return COMMON_COMMITS; - try { - COMMON_COMMITS = await fetchJson("./data/common_listings_commits.json"); - return COMMON_COMMITS; - } catch { - COMMON_COMMITS = null; - return null; - } -} - function relReportPath(group, size) { return `reports/common_listings_${group}_top${size}.json`; } -// Computes per-store daily metric: // avg over SKUs that store has a price for: ((storePrice - medianPrice) / medianPrice) * 100 function computeDailyStoreSeriesFromReport(report) { const stores = Array.isArray(report?.stores) ? report.stores.map(String) : []; const rows = Array.isArray(report?.rows) ? report.rows : []; - const sum = new Map(); // store -> sumPct - const cnt = new Map(); // store -> count - + const sum = new Map(); + const cnt = new Map(); for (const s of stores) { sum.set(s, 0); cnt.set(s, 0); @@ -152,15 +140,59 @@ function computeDailyStoreSeriesFromReport(report) { return { stores, valuesByStore: out }; } -async function buildStatsSeries({ group, size, onStatus }) { - const manifest = await loadCommonCommitsManifest(); - if (!manifest?.files) throw new Error("Missing common_listings_commits.json (viz/data)"); +/* ---------------- commits manifest ---------------- */ +let COMMON_COMMITS = null; + +async function loadCommonCommitsManifest() { + if (COMMON_COMMITS) return COMMON_COMMITS; + try { + COMMON_COMMITS = await fetchJson("./data/common_listings_commits.json"); + return COMMON_COMMITS; + } catch { + COMMON_COMMITS = null; + return null; + } +} + +// Fallback: GitHub API commits for a path, collapsed to one commit per day (newest that day), +// returned oldest -> newest, same shape as manifest entries. +async function loadCommitsFallback({ owner, repo, branch, relPath }) { + let apiCommits = await githubListCommits({ owner, repo, branch, path: relPath }); + apiCommits = Array.isArray(apiCommits) ? apiCommits : []; + + // newest -> oldest from API; we want newest-per-day then oldest -> newest + const byDate = new Map(); + for (const c of apiCommits) { + const sha = String(c?.sha || ""); + const ts = String(c?.commit?.committer?.date || c?.commit?.author?.date || ""); + const d = dateOnly(ts); + if (!sha || !d) continue; + if (!byDate.has(d)) byDate.set(d, { sha, date: d, ts }); + } + + return [...byDate.values()].reverse(); +} + +async function buildStatsSeries({ group, size, onStatus }) { const rel = relReportPath(group, size); - const commits = Array.isArray(manifest.files[rel]) ? manifest.files[rel] : null; + const gh = inferGithubOwnerRepo(); + const owner = gh.owner; + const repo = gh.repo; + const branch = "data"; + + const manifest = await loadCommonCommitsManifest(); + + let commits = Array.isArray(manifest?.files?.[rel]) ? manifest.files[rel] : null; + + // Fallback if manifest missing/empty + if (!commits || !commits.length) { + if (typeof onStatus === "function") onStatus(`Commits manifest missing for ${rel}; using GitHub API fallback…`); + commits = await loadCommitsFallback({ owner, repo, branch, relPath: rel }); + } + if (!commits || !commits.length) throw new Error(`No commits tracked for ${rel}`); - // commits are oldest -> newest in the manifest const latest = commits[commits.length - 1]; const latestSha = String(latest?.sha || ""); if (!latestSha) throw new Error(`Invalid latest sha for ${rel}`); @@ -168,18 +200,11 @@ async function buildStatsSeries({ group, size, onStatus }) { const cached = loadStatsCache(group, size, latestSha); if (cached) return { latestSha, labels: cached.labels, stores: cached.stores, seriesByStore: cached.seriesByStore }; - const gh = inferGithubOwnerRepo(); - const owner = gh.owner; - const repo = gh.repo; - const NET_CONCURRENCY = 10; const limitNet = makeLimiter(NET_CONCURRENCY); - // Fetch newest report once to get the store list (authoritative for the selected file) if (typeof onStatus === "function") onStatus(`Loading stores…`); - const newestReport = await limitNet(() => - githubFetchFileAtSha({ owner, repo, sha: latestSha, path: rel }) - ); + const newestReport = await limitNet(() => githubFetchFileAtSha({ owner, repo, sha: latestSha, path: rel })); const stores = Array.isArray(newestReport?.stores) ? newestReport.stores.map(String) : []; if (!stores.length) throw new Error(`No stores found in ${rel} at ${latestSha.slice(0, 7)}`); @@ -189,12 +214,10 @@ async function buildStatsSeries({ group, size, onStatus }) { const seriesByStore = {}; for (const s of stores) seriesByStore[s] = new Array(labels.length).fill(null); - // Load each day's report and compute that day’s per-store average % vs median if (typeof onStatus === "function") onStatus(`Loading ${labels.length} day(s)…`); - // De-dupe by sha (just in case) const shaByIdx = commits.map((c) => String(c.sha || "")); - const fileJsonCache = new Map(); // sha -> report json + const fileJsonCache = new Map(); async function loadReportAtSha(sha) { if (fileJsonCache.has(sha)) return fileJsonCache.get(sha); @@ -203,7 +226,6 @@ async function buildStatsSeries({ group, size, onStatus }) { return obj; } - // Batch fetch + compute with limited concurrency let done = 0; await Promise.all( shaByIdx.map((sha, idx) => @@ -217,7 +239,7 @@ async function buildStatsSeries({ group, size, onStatus }) { seriesByStore[s][idx] = Number.isFinite(v) ? v : null; } } catch { - // leave nulls for this day + // leave nulls } finally { done++; if (typeof onStatus === "function" && (done % 10 === 0 || done === shaByIdx.length)) { @@ -266,29 +288,33 @@ export async function renderStats($app) {
-

Store Price Index

Loading…
-
- +
+ +
+
- -
+
+
+ + +
@@ -337,7 +363,6 @@ export async function renderStats($app) { onStatus, }); - // Build datasets: one per store const datasets = stores.map((s) => ({ label: s, data: Array.isArray(seriesByStore[s]) ? seriesByStore[s] : labels.map(() => null),