diff --git a/tools/build_email_alert.js b/tools/build_email_alert.js index f16208b..827d58f 100755 --- a/tools/build_email_alert.js +++ b/tools/build_email_alert.js @@ -11,6 +11,10 @@ B) this store is currently the cheapest for that canonical SKU (ties allowed) - If nothing matches, do not send email. + NEW CHANGE (2026-01): + - If a store/category DB file is completely new in this commit (file did not exist in previous commit), + then ALL of its "new" rows are ignored for the email alert (but still appear in report text elsewhere). + Outputs: reports/alert.html reports/alert_subject.txt @@ -41,6 +45,18 @@ function gitShowJson(sha, filePath) { } } +function gitFileExistsAtSha(sha, filePath) { + if (!sha) return false; + try { + execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], { + stdio: ["ignore", "ignore", "ignore"], + }); + return true; + } catch { + return false; + } +} + function readJson(filePath) { try { return JSON.parse(fs.readFileSync(filePath, "utf8")); @@ -121,7 +137,6 @@ function listDbFilesOnDisk() { // We reuse your existing canonical SKU mapping logic. function loadSkuMapOrNull() { try { - // exists on data branch because you merge main -> data before committing runs // eslint-disable-next-line node/no-missing-require const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map")); return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") }); @@ -137,7 +152,6 @@ function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) { const k = normalizeSkuKey(skuRaw, { storeLabel, url }); return k ? String(k) : ""; } catch { - // fallback: use 6-digit SKU if present; else url hash-ish (still stable enough for 1 run) const m = String(skuRaw ?? "").match(/\b(\d{6})\b/); if (m) return m[1]; if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`; @@ -196,12 +210,12 @@ function diffDb(prevObj, nextObj, skuMap) { newItems.push(now); continue; } - // restored not used for now (you didn’t request it) } for (const [canon, now] of nextLive.entries()) { const was = prevLive.get(canon); if (!was) continue; + const a = String(was.price || ""); const b = String(now.price || ""); if (a === b) continue; @@ -238,14 +252,11 @@ function buildCurrentIndexes(skuMap) { if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map()); for (const it of live.values()) { - // availability if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set()); availability.get(it.canonSku).add(storeLabel); - // per-store lookup byStoreCanon.get(storeLabel).set(it.canonSku, it); - // cheapest const p = priceToNumber(it.price); if (p === null) continue; @@ -282,7 +293,9 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl } function card(it, extraHtml) { - const img = it.img ? `` : ""; + const img = it.img + ? `` + : ""; const name = htmlEscape(it.name || ""); const store = htmlEscape(it.storeLabel || ""); const cat = htmlEscape(it.categoryLabel || ""); @@ -355,9 +368,7 @@ function writeGithubOutput(kv) { const outPath = process.env.GITHUB_OUTPUT; if (!outPath) return; const lines = []; - for (const [k, v] of Object.entries(kv)) { - lines.push(`${k}=${String(v)}`); - } + for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`); fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8"); } @@ -383,34 +394,36 @@ function main() { return; } - // Current-state indexes (across ALL stores) from disk const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap); const uniqueNews = []; const bigSales = []; for (const file of changed) { + const existedBefore = gitFileExistsAtSha(parentSha, file); + const existsNow = gitFileExistsAtSha(headSha, file); + + // NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert. + if (!existedBefore && existsNow) { + continue; + } + const prevObj = gitShowJson(parentSha, file); const nextObj = gitShowJson(headSha, file); if (!prevObj && !nextObj) continue; const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap); - // New unique listings (canon sku available at exactly 1 store) for (const it of newItems) { const stores = availability.get(it.canonSku); const storeCount = stores ? stores.size : 0; if (storeCount !== 1) continue; - - // ensure the only store is this one if (!stores.has(it.storeLabel)) continue; - // refresh with current item to get img if present now const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it; uniqueNews.push(cur); } - // Sales: >=20% and cheapest store currently (ties allowed) for (const it of priceDown) { const pct = it.pct; if (!Number.isFinite(pct) || pct < 20) continue; @@ -421,11 +434,9 @@ function main() { const newN = priceToNumber(it.newPrice); if (newN === null) continue; - // must be at cheapest price, and this store among cheapest stores if (best.priceNum !== newN) continue; if (!best.stores.has(it.storeLabel)) continue; - // refresh with current item for img/name/category if needed const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it; bigSales.push({ @@ -437,7 +448,6 @@ function main() { } } - // de-dupe by (canonSku, storeLabel) function dedupe(arr) { const out = []; const seen = new Set(); diff --git a/tools/build_viz_index.js b/tools/build_viz_index.js index cb96fb2..5c01454 100755 --- a/tools/build_viz_index.js +++ b/tools/build_viz_index.js @@ -39,6 +39,7 @@ function main() { ensureDir(outDir); const items = []; + let liveCount = 0; for (const file of listJsonFiles(dbDir)) { const obj = readJson(file); @@ -53,11 +54,14 @@ function main() { const dbFile = path .relative(repoRoot, file) - .replace(/\\/g, "/"); // for GitHub raw paths on Windows too + .replace(/\\/g, "/"); const arr = Array.isArray(obj.items) ? obj.items : []; for (const it of arr) { - if (!it || it.removed) continue; + if (!it) continue; + + const removed = Boolean(it.removed); + if (!removed) liveCount++; const sku = String(it.sku || "").trim(); const name = String(it.name || "").trim(); @@ -71,6 +75,7 @@ function main() { price, url, img, + removed, // NEW (additive): allows viz to show history / removed-only items store, storeLabel, category, @@ -83,14 +88,17 @@ function main() { } items.sort((a, b) => { - const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`; - const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`; + const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`; + const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`; return ak.localeCompare(bk); }); const outObj = { generatedAt: new Date().toISOString(), + // Additive metadata. Old readers can ignore. + includesRemoved: true, count: items.length, + countLive: liveCount, items, }; diff --git a/tools/build_viz_recent.js b/tools/build_viz_recent.js index ea4965d..141b53f 100755 --- a/tools/build_viz_recent.js +++ b/tools/build_viz_recent.js @@ -21,6 +21,17 @@ function gitShowJson(sha, filePath) { } } +function gitFileExistsAtSha(sha, filePath) { + if (!sha) return false; + try { + execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], { + stdio: ["ignore", "ignore", "ignore"], + }); + return true; + } catch { + return false; + } +} function gitListTreeFiles(sha, dirRel) { try { @@ -84,14 +95,13 @@ function keySkuForItem(it, storeLabel) { function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) { const m = new Map(); const items = Array.isArray(obj?.items) ? obj.items : []; - const storeLabel = String(obj?.storeLabel || obj?.store || ""); for (const it of items) { if (!it) continue; const sku = keySkuForItem(it, storeLabel); - if (!sku) continue; // still skip truly keyless rows (no sku + no url) + if (!sku) continue; const removed = Boolean(it.removed); if (!includeRemoved && removed) continue; @@ -119,7 +129,6 @@ function diffDb(prevObj, nextObj) { const removedItems = []; const priceChanges = []; - // NEW + RESTORED for (const [sku, now] of nextLive.entries()) { const had = prevAll.get(sku); if (!had) { @@ -132,7 +141,6 @@ function diffDb(prevObj, nextObj) { } } - // REMOVED for (const [sku, was] of prevLive.entries()) { const nxt = nextAll.get(sku); if (!nxt || nxt.removed) { @@ -140,7 +148,6 @@ function diffDb(prevObj, nextObj) { } } - // PRICE CHANGES for (const [sku, now] of nextLive.entries()) { const was = prevLive.get(sku); if (!was) continue; @@ -184,7 +191,6 @@ function firstParentSha(sha) { try { const out = runGit(["rev-list", "--parents", "-n", "1", sha]); const parts = out.split(/\s+/).filter(Boolean); - // parts[0] is sha, parts[1] is first parent (if any) return parts.length >= 2 ? parts[1] : ""; } catch { return ""; @@ -192,13 +198,11 @@ function firstParentSha(sha) { } function listChangedDbFiles(fromSha, toSha) { - // toSha can be "WORKTREE" if (!fromSha && toSha && toSha !== "WORKTREE") { return gitListTreeFiles(toSha, "data/db"); } if (!fromSha && toSha === "WORKTREE") { - // Fall back: list files on disk try { return fs .readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true }) @@ -234,7 +238,6 @@ function logDbCommitsSince(sinceIso) { const d = dateOnly(ts); arr.push({ sha, ts, date: d }); } - // newest -> oldest from git; convert to oldest -> newest arr.reverse(); return arr; } catch { @@ -258,13 +261,8 @@ function main() { const headSha = getHeadShaOrEmpty(); const items = []; - // Collect committed runs in the last N days (touching data/db) const commits = headSha ? logDbCommitsSince(sinceIso) : []; - // Build diff pairs: - // parent(of first in window) -> first - // then each consecutive commit -> next - // then HEAD -> WORKTREE (so this run shows up before the commit exists) const pairs = []; if (commits.length) { @@ -319,14 +317,24 @@ function main() { if (!prevObj && !nextObj) continue; - const storeLabel = String( - nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "" - ); - const categoryLabel = String( - nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "" - ); + const storeLabel = String(nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || ""); + const categoryLabel = String(nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || ""); - const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj); + // NEW FEATURE: + // If this DB file did not exist at fromSha, then treat it as a "new store/category file" + // and DO NOT emit its "new"/"restored" items into recent.json (frontpage). + // (Report text is unaffected elsewhere.) + const isNewStoreFile = + Boolean(fromSha) && + !gitFileExistsAtSha(fromSha, file) && + (toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file)); + + let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj); + + if (isNewStoreFile) { + newItems = []; + restoredItems = []; + } for (const it of newItems) { items.push({ @@ -399,10 +407,8 @@ function main() { } } - // Newest first items.sort((a, b) => String(b.ts).localeCompare(String(a.ts))); - // Keep file size under control (but still allows multiple runs/day over the window) const trimmed = items.slice(0, maxItems); const payload = { diff --git a/viz/app/catalog.js b/viz/app/catalog.js index 859529e..e9b23b3 100644 --- a/viz/app/catalog.js +++ b/viz/app/catalog.js @@ -14,6 +14,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) { const name = String(r?.name || ""); const url = String(r?.url || ""); const storeLabel = String(r?.storeLabel || r?.store || ""); + const removed = Boolean(r?.removed); const img = normImg(r?.img || r?.image || r?.thumb || ""); @@ -29,18 +30,22 @@ export function aggregateBySku(listings, canonicalizeSkuFn) { cheapestPriceStr: pStr || "", cheapestPriceNum: pNum, cheapestStoreLabel: storeLabel || "", - stores: new Set(), + stores: new Set(), // LIVE stores only + storesEver: new Set(), // live + removed presence (history) sampleUrl: url || "", _searchParts: [], searchText: "", - _imgByName: new Map(), // name -> img + _imgByName: new Map(), _imgAny: "", }; bySku.set(sku, agg); } - if (storeLabel) agg.stores.add(storeLabel); + if (storeLabel) { + agg.storesEver.add(storeLabel); + if (!removed) agg.stores.add(storeLabel); + } if (!agg.sampleUrl && url) agg.sampleUrl = url; // Keep first non-empty name, but keep thumbnail aligned to chosen name @@ -56,8 +61,8 @@ export function aggregateBySku(listings, canonicalizeSkuFn) { if (name) agg._imgByName.set(name, img); } - // cheapest (across all merged rows) - if (pNum !== null) { + // cheapest across LIVE rows only (so removed history doesn't "win") + if (!removed && pNum !== null) { if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) { agg.cheapestPriceNum = pNum; agg.cheapestPriceStr = pStr || ""; @@ -71,6 +76,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) { if (name) agg._searchParts.push(name); if (url) agg._searchParts.push(url); if (storeLabel) agg._searchParts.push(storeLabel); + if (removed) agg._searchParts.push("removed"); } const out = [...bySku.values()]; @@ -85,11 +91,14 @@ export function aggregateBySku(listings, canonicalizeSkuFn) { delete it._imgByName; delete it._imgAny; + it.storeCount = it.stores.size; + it.storeCountEver = it.storesEver.size; + it.removedEverywhere = it.storeCount === 0; + it._searchParts.push(it.sku); it._searchParts.push(it.name || ""); it._searchParts.push(it.sampleUrl || ""); it._searchParts.push(it.cheapestStoreLabel || ""); - it.searchText = normSearchText(it._searchParts.join(" | ")); delete it._searchParts; } diff --git a/viz/app/item_page.js b/viz/app/item_page.js index 7abb1ef..b46ac7a 100644 --- a/viz/app/item_page.js +++ b/viz/app/item_page.js @@ -169,24 +169,31 @@ export async function renderItem($app, skuInput) { // include toSku + all fromSkus mapped to it const skuGroup = rules.groupForCanonical(sku); - const cur = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || ""))); + // IMPORTANT CHANGE: + // index.json now includes removed rows too. Split live vs all. + const allRows = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || ""))); + const liveRows = allRows.filter((x) => !Boolean(x?.removed)); - if (!cur.length) { - $title.textContent = "Item not found in current index"; - $status.textContent = "Tip: index.json only includes current (non-removed) items."; + if (!allRows.length) { + $title.textContent = "Item not found"; + $status.textContent = "No matching SKU in index."; if ($thumbBox) $thumbBox.innerHTML = `
`; return; } - // pick bestName by most common across merged rows + const isRemovedEverywhere = liveRows.length === 0; + + // pick bestName by most common across LIVE rows (fallback to allRows) + const basisForName = liveRows.length ? liveRows : allRows; + const nameCounts = new Map(); - for (const r of cur) { + for (const r of basisForName) { const n = String(r.name || ""); if (!n) continue; nameCounts.set(n, (nameCounts.get(n) || 0) + 1); } - let bestName = cur[0].name || `(SKU ${sku})`; + let bestName = basisForName[0].name || `(SKU ${sku})`; let bestCount = -1; for (const [n, c] of nameCounts.entries()) { if (c > bestCount) { @@ -196,11 +203,13 @@ export async function renderItem($app, skuInput) { } $title.textContent = bestName; - // choose thumbnail from cheapest listing across merged rows (fallback: first that matches name) + // choose thumbnail from cheapest LIVE listing (fallback: any matching name; fallback: any) let bestImg = ""; let bestPrice = null; - for (const r of cur) { + const basisForThumb = liveRows.length ? liveRows : allRows; + + for (const r of basisForThumb) { const p = parsePriceToNumber(r.price); const img = String(r?.img || "").trim(); if (p !== null && img) { @@ -211,7 +220,7 @@ export async function renderItem($app, skuInput) { } } if (!bestImg) { - for (const r of cur) { + for (const r of basisForThumb) { if (String(r?.name || "") === String(bestName || "") && String(r?.img || "").trim()) { bestImg = String(r.img).trim(); break; @@ -219,7 +228,7 @@ export async function renderItem($app, skuInput) { } } if (!bestImg) { - for (const r of cur) { + for (const r of basisForThumb) { if (String(r?.img || "").trim()) { bestImg = String(r.img).trim(); break; @@ -229,28 +238,35 @@ export async function renderItem($app, skuInput) { $thumbBox.innerHTML = bestImg ? renderThumbHtml(bestImg, "detailThumb") : `
`; - // show store links from merged rows (may include multiple per store; OK) - // show store links from merged rows (may include multiple per store; OK) - // If two identical links exist, only render one. + // Render store links: + // - LIVE stores first (normal) + // - then removed-history stores with a "(removed)" suffix const seenLinks = new Set(); - $links.innerHTML = cur + const linkRows = allRows .slice() - .sort((a, b) => String(a.storeLabel || "").localeCompare(String(b.storeLabel || ""))) + .sort((a, b) => { + const ar = Boolean(a?.removed) ? 1 : 0; + const br = Boolean(b?.removed) ? 1 : 0; + if (ar !== br) return ar - br; // live first + return String(a.storeLabel || "").localeCompare(String(b.storeLabel || "")); + }) .filter((r) => { const href = String(r?.url || "").trim(); const text = String(r?.storeLabel || r?.store || "Store").trim(); if (!href) return false; - - // "identical" = same href + same rendered text - const key = `${href}|${text}`; + const suffix = Boolean(r?.removed) ? " (removed)" : ""; + const key = `${href}|${text}${suffix}`; if (seenLinks.has(key)) return false; seenLinks.add(key); return true; - }) + }); + + $links.innerHTML = linkRows .map((r) => { const href = String(r.url || "").trim(); const text = String(r.storeLabel || r.store || "Store").trim(); - return `${esc(text)}`; + const suffix = Boolean(r?.removed) ? " (removed)" : ""; + return `${esc(text + suffix)}`; }) .join(""); @@ -259,17 +275,19 @@ export async function renderItem($app, skuInput) { const repo = gh.repo; const branch = "data"; - // dbFile -> rows (because merged skus can exist in same dbFile) - const byDbFile = new Map(); - for (const r of cur) { + // Group DB files by historical presence (LIVE or REMOVED rows). + const byDbFileAll = new Map(); + for (const r of allRows) { if (!r.dbFile) continue; const k = String(r.dbFile); - if (!byDbFile.has(k)) byDbFile.set(k, []); - byDbFile.get(k).push(r); + if (!byDbFileAll.has(k)) byDbFileAll.set(k, []); + byDbFileAll.get(k).push(r); } - const dbFiles = [...byDbFile.keys()].sort(); + const dbFiles = [...byDbFileAll.keys()].sort(); - $status.textContent = `Loading history for ${dbFiles.length} store file(s)…`; + $status.textContent = isRemovedEverywhere + ? `Item is removed everywhere (showing historical chart across ${dbFiles.length} store file(s))…` + : `Loading history for ${dbFiles.length} store file(s)…`; const manifest = await loadDbCommitsManifest(); const allDatesSet = new Set(); @@ -282,8 +300,13 @@ export async function renderItem($app, skuInput) { const skuKeys = [...skuGroup]; for (const dbFile of dbFiles) { - const rows = byDbFile.get(dbFile) || []; - const storeLabel = String(rows[0]?.storeLabel || rows[0]?.store || dbFile); + const rowsAll = byDbFileAll.get(dbFile) || []; + + // Determine current LIVE rows for this dbFile: + // (we don't want to add a "today" point if the listing is removed in this store now) + const rowsLive = rowsAll.filter((r) => !Boolean(r?.removed)); + + const storeLabel = String(rowsAll[0]?.storeLabel || rowsAll[0]?.store || dbFile); const cached = loadSeriesCache(sku, dbFile, cacheBust); if (cached && Array.isArray(cached.points) && cached.points.length) { @@ -346,6 +369,7 @@ export async function renderItem($app, skuInput) { } } + // findMinPriceForSkuGroupInDb already ignores removed rows inside each DB snapshot. const pNum = findMinPriceForSkuGroupInDb(obj, skuKeys, storeLabel); points.set(d, pNum); @@ -354,17 +378,19 @@ export async function renderItem($app, skuInput) { compactPoints.push({ date: d, price: pNum }); } - // Always add "today" from current index (min across merged rows in this store/dbFile) - let curMin = null; - for (const r of rows) { - const p = parsePriceToNumber(r.price); - if (p !== null) curMin = curMin === null ? p : Math.min(curMin, p); - } - if (curMin !== null) { - points.set(today, curMin); - values.push(curMin); - allDatesSet.add(today); - compactPoints.push({ date: today, price: curMin }); + // Add "today" point ONLY if listing currently exists in this store/dbFile (live rows present) + if (rowsLive.length) { + let curMin = null; + for (const r of rowsLive) { + const p = parsePriceToNumber(r.price); + if (p !== null) curMin = curMin === null ? p : Math.min(curMin, p); + } + if (curMin !== null) { + points.set(today, curMin); + values.push(curMin); + allDatesSet.add(today); + compactPoints.push({ date: today, price: curMin }); + } } saveSeriesCache(sku, dbFile, cacheBust, compactPoints); @@ -416,6 +442,10 @@ export async function renderItem($app, skuInput) { }); $status.textContent = manifest - ? `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.` - : `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`; + ? (isRemovedEverywhere + ? `History loaded (removed everywhere). Source=prebuilt manifest. Points=${labels.length}.` + : `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.`) + : (isRemovedEverywhere + ? `History loaded (removed everywhere). Source=GitHub API fallback. Points=${labels.length}.` + : `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`); }