diff --git a/tools/build_email_alert.js b/tools/build_email_alert.js
index f16208b..827d58f 100755
--- a/tools/build_email_alert.js
+++ b/tools/build_email_alert.js
@@ -11,6 +11,10 @@
B) this store is currently the cheapest for that canonical SKU (ties allowed)
- If nothing matches, do not send email.
+ NEW CHANGE (2026-01):
+ - If a store/category DB file is completely new in this commit (file did not exist in previous commit),
+ then ALL of its "new" rows are ignored for the email alert (but still appear in report text elsewhere).
+
Outputs:
reports/alert.html
reports/alert_subject.txt
@@ -41,6 +45,18 @@ function gitShowJson(sha, filePath) {
}
}
+function gitFileExistsAtSha(sha, filePath) {
+ if (!sha) return false;
+ try {
+ execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
+ stdio: ["ignore", "ignore", "ignore"],
+ });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
function readJson(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
@@ -121,7 +137,6 @@ function listDbFilesOnDisk() {
// We reuse your existing canonical SKU mapping logic.
function loadSkuMapOrNull() {
try {
- // exists on data branch because you merge main -> data before committing runs
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
@@ -137,7 +152,6 @@ function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
- // fallback: use 6-digit SKU if present; else url hash-ish (still stable enough for 1 run)
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
@@ -196,12 +210,12 @@ function diffDb(prevObj, nextObj, skuMap) {
newItems.push(now);
continue;
}
- // restored not used for now (you didn’t request it)
}
for (const [canon, now] of nextLive.entries()) {
const was = prevLive.get(canon);
if (!was) continue;
+
const a = String(was.price || "");
const b = String(now.price || "");
if (a === b) continue;
@@ -238,14 +252,11 @@ function buildCurrentIndexes(skuMap) {
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
for (const it of live.values()) {
- // availability
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
availability.get(it.canonSku).add(storeLabel);
- // per-store lookup
byStoreCanon.get(storeLabel).set(it.canonSku, it);
- // cheapest
const p = priceToNumber(it.price);
if (p === null) continue;
@@ -282,7 +293,9 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
}
function card(it, extraHtml) {
- const img = it.img ? `` : "";
+ const img = it.img
+ ? `
`
+ : "";
const name = htmlEscape(it.name || "");
const store = htmlEscape(it.storeLabel || "");
const cat = htmlEscape(it.categoryLabel || "");
@@ -355,9 +368,7 @@ function writeGithubOutput(kv) {
const outPath = process.env.GITHUB_OUTPUT;
if (!outPath) return;
const lines = [];
- for (const [k, v] of Object.entries(kv)) {
- lines.push(`${k}=${String(v)}`);
- }
+ for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
}
@@ -383,34 +394,36 @@ function main() {
return;
}
- // Current-state indexes (across ALL stores) from disk
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
const uniqueNews = [];
const bigSales = [];
for (const file of changed) {
+ const existedBefore = gitFileExistsAtSha(parentSha, file);
+ const existsNow = gitFileExistsAtSha(headSha, file);
+
+ // NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
+ if (!existedBefore && existsNow) {
+ continue;
+ }
+
const prevObj = gitShowJson(parentSha, file);
const nextObj = gitShowJson(headSha, file);
if (!prevObj && !nextObj) continue;
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
- // New unique listings (canon sku available at exactly 1 store)
for (const it of newItems) {
const stores = availability.get(it.canonSku);
const storeCount = stores ? stores.size : 0;
if (storeCount !== 1) continue;
-
- // ensure the only store is this one
if (!stores.has(it.storeLabel)) continue;
- // refresh with current item to get img if present now
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
uniqueNews.push(cur);
}
- // Sales: >=20% and cheapest store currently (ties allowed)
for (const it of priceDown) {
const pct = it.pct;
if (!Number.isFinite(pct) || pct < 20) continue;
@@ -421,11 +434,9 @@ function main() {
const newN = priceToNumber(it.newPrice);
if (newN === null) continue;
- // must be at cheapest price, and this store among cheapest stores
if (best.priceNum !== newN) continue;
if (!best.stores.has(it.storeLabel)) continue;
- // refresh with current item for img/name/category if needed
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
bigSales.push({
@@ -437,7 +448,6 @@ function main() {
}
}
- // de-dupe by (canonSku, storeLabel)
function dedupe(arr) {
const out = [];
const seen = new Set();
diff --git a/tools/build_viz_index.js b/tools/build_viz_index.js
index cb96fb2..5c01454 100755
--- a/tools/build_viz_index.js
+++ b/tools/build_viz_index.js
@@ -39,6 +39,7 @@ function main() {
ensureDir(outDir);
const items = [];
+ let liveCount = 0;
for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file);
@@ -53,11 +54,14 @@ function main() {
const dbFile = path
.relative(repoRoot, file)
- .replace(/\\/g, "/"); // for GitHub raw paths on Windows too
+ .replace(/\\/g, "/");
const arr = Array.isArray(obj.items) ? obj.items : [];
for (const it of arr) {
- if (!it || it.removed) continue;
+ if (!it) continue;
+
+ const removed = Boolean(it.removed);
+ if (!removed) liveCount++;
const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim();
@@ -71,6 +75,7 @@ function main() {
price,
url,
img,
+ removed, // NEW (additive): allows viz to show history / removed-only items
store,
storeLabel,
category,
@@ -83,14 +88,17 @@ function main() {
}
items.sort((a, b) => {
- const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`;
- const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`;
+ const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
+ const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
const outObj = {
generatedAt: new Date().toISOString(),
+ // Additive metadata. Old readers can ignore.
+ includesRemoved: true,
count: items.length,
+ countLive: liveCount,
items,
};
diff --git a/tools/build_viz_recent.js b/tools/build_viz_recent.js
index ea4965d..141b53f 100755
--- a/tools/build_viz_recent.js
+++ b/tools/build_viz_recent.js
@@ -21,6 +21,17 @@ function gitShowJson(sha, filePath) {
}
}
+function gitFileExistsAtSha(sha, filePath) {
+ if (!sha) return false;
+ try {
+ execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
+ stdio: ["ignore", "ignore", "ignore"],
+ });
+ return true;
+ } catch {
+ return false;
+ }
+}
function gitListTreeFiles(sha, dirRel) {
try {
@@ -84,14 +95,13 @@ function keySkuForItem(it, storeLabel) {
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
-
const storeLabel = String(obj?.storeLabel || obj?.store || "");
for (const it of items) {
if (!it) continue;
const sku = keySkuForItem(it, storeLabel);
- if (!sku) continue; // still skip truly keyless rows (no sku + no url)
+ if (!sku) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
@@ -119,7 +129,6 @@ function diffDb(prevObj, nextObj) {
const removedItems = [];
const priceChanges = [];
- // NEW + RESTORED
for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku);
if (!had) {
@@ -132,7 +141,6 @@ function diffDb(prevObj, nextObj) {
}
}
- // REMOVED
for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) {
@@ -140,7 +148,6 @@ function diffDb(prevObj, nextObj) {
}
}
- // PRICE CHANGES
for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku);
if (!was) continue;
@@ -184,7 +191,6 @@ function firstParentSha(sha) {
try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean);
- // parts[0] is sha, parts[1] is first parent (if any)
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
@@ -192,13 +198,11 @@ function firstParentSha(sha) {
}
function listChangedDbFiles(fromSha, toSha) {
- // toSha can be "WORKTREE"
if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db");
}
if (!fromSha && toSha === "WORKTREE") {
- // Fall back: list files on disk
try {
return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
@@ -234,7 +238,6 @@ function logDbCommitsSince(sinceIso) {
const d = dateOnly(ts);
arr.push({ sha, ts, date: d });
}
- // newest -> oldest from git; convert to oldest -> newest
arr.reverse();
return arr;
} catch {
@@ -258,13 +261,8 @@ function main() {
const headSha = getHeadShaOrEmpty();
const items = [];
- // Collect committed runs in the last N days (touching data/db)
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
- // Build diff pairs:
- // parent(of first in window) -> first
- // then each consecutive commit -> next
- // then HEAD -> WORKTREE (so this run shows up before the commit exists)
const pairs = [];
if (commits.length) {
@@ -319,14 +317,24 @@ function main() {
if (!prevObj && !nextObj) continue;
- const storeLabel = String(
- nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || ""
- );
- const categoryLabel = String(
- nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || ""
- );
+ const storeLabel = String(nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "");
+ const categoryLabel = String(nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "");
- const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
+ // NEW FEATURE:
+ // If this DB file did not exist at fromSha, then treat it as a "new store/category file"
+ // and DO NOT emit its "new"/"restored" items into recent.json (frontpage).
+ // (Report text is unaffected elsewhere.)
+ const isNewStoreFile =
+ Boolean(fromSha) &&
+ !gitFileExistsAtSha(fromSha, file) &&
+ (toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file));
+
+ let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
+
+ if (isNewStoreFile) {
+ newItems = [];
+ restoredItems = [];
+ }
for (const it of newItems) {
items.push({
@@ -399,10 +407,8 @@ function main() {
}
}
- // Newest first
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
- // Keep file size under control (but still allows multiple runs/day over the window)
const trimmed = items.slice(0, maxItems);
const payload = {
diff --git a/viz/app/catalog.js b/viz/app/catalog.js
index 859529e..e9b23b3 100644
--- a/viz/app/catalog.js
+++ b/viz/app/catalog.js
@@ -14,6 +14,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
const name = String(r?.name || "");
const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || "");
+ const removed = Boolean(r?.removed);
const img = normImg(r?.img || r?.image || r?.thumb || "");
@@ -29,18 +30,22 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "",
- stores: new Set(),
+ stores: new Set(), // LIVE stores only
+ storesEver: new Set(), // live + removed presence (history)
sampleUrl: url || "",
_searchParts: [],
searchText: "",
- _imgByName: new Map(), // name -> img
+ _imgByName: new Map(),
_imgAny: "",
};
bySku.set(sku, agg);
}
- if (storeLabel) agg.stores.add(storeLabel);
+ if (storeLabel) {
+ agg.storesEver.add(storeLabel);
+ if (!removed) agg.stores.add(storeLabel);
+ }
if (!agg.sampleUrl && url) agg.sampleUrl = url;
// Keep first non-empty name, but keep thumbnail aligned to chosen name
@@ -56,8 +61,8 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
if (name) agg._imgByName.set(name, img);
}
- // cheapest (across all merged rows)
- if (pNum !== null) {
+ // cheapest across LIVE rows only (so removed history doesn't "win")
+ if (!removed && pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || "";
@@ -71,6 +76,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel);
+ if (removed) agg._searchParts.push("removed");
}
const out = [...bySku.values()];
@@ -85,11 +91,14 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
delete it._imgByName;
delete it._imgAny;
+ it.storeCount = it.stores.size;
+ it.storeCountEver = it.storesEver.size;
+ it.removedEverywhere = it.storeCount === 0;
+
it._searchParts.push(it.sku);
it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || "");
-
it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts;
}
diff --git a/viz/app/item_page.js b/viz/app/item_page.js
index 7abb1ef..b46ac7a 100644
--- a/viz/app/item_page.js
+++ b/viz/app/item_page.js
@@ -169,24 +169,31 @@ export async function renderItem($app, skuInput) {
// include toSku + all fromSkus mapped to it
const skuGroup = rules.groupForCanonical(sku);
- const cur = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || "")));
+ // IMPORTANT CHANGE:
+ // index.json now includes removed rows too. Split live vs all.
+ const allRows = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || "")));
+ const liveRows = allRows.filter((x) => !Boolean(x?.removed));
- if (!cur.length) {
- $title.textContent = "Item not found in current index";
- $status.textContent = "Tip: index.json only includes current (non-removed) items.";
+ if (!allRows.length) {
+ $title.textContent = "Item not found";
+ $status.textContent = "No matching SKU in index.";
if ($thumbBox) $thumbBox.innerHTML = `