Feat: Refactor for removed items and new stores

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-24 11:48:58 -08:00
parent 13c82a74ba
commit 0659775fb0
5 changed files with 158 additions and 95 deletions

View file

@ -11,6 +11,10 @@
B) this store is currently the cheapest for that canonical SKU (ties allowed)
- If nothing matches, do not send email.
NEW CHANGE (2026-01):
- If a store/category DB file is completely new in this commit (file did not exist in previous commit),
then ALL of its "new" rows are ignored for the email alert (but still appear in report text elsewhere).
Outputs:
reports/alert.html
reports/alert_subject.txt
@ -41,6 +45,18 @@ function gitShowJson(sha, filePath) {
}
}
function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
}
function readJson(filePath) {
try {
return JSON.parse(fs.readFileSync(filePath, "utf8"));
@ -121,7 +137,6 @@ function listDbFilesOnDisk() {
// We reuse your existing canonical SKU mapping logic.
function loadSkuMapOrNull() {
try {
// exists on data branch because you merge main -> data before committing runs
// eslint-disable-next-line node/no-missing-require
const { loadSkuMap } = require(path.join(process.cwd(), "src", "utils", "sku_map"));
return loadSkuMap({ dbDir: path.join(process.cwd(), "data", "db") });
@ -137,7 +152,6 @@ function normalizeSkuKeyOrEmpty({ skuRaw, storeLabel, url }) {
const k = normalizeSkuKey(skuRaw, { storeLabel, url });
return k ? String(k) : "";
} catch {
// fallback: use 6-digit SKU if present; else url hash-ish (still stable enough for 1 run)
const m = String(skuRaw ?? "").match(/\b(\d{6})\b/);
if (m) return m[1];
if (url) return `u:${normToken(storeLabel)}:${normToken(url)}`;
@ -196,12 +210,12 @@ function diffDb(prevObj, nextObj, skuMap) {
newItems.push(now);
continue;
}
// restored not used for now (you didnt request it)
}
for (const [canon, now] of nextLive.entries()) {
const was = prevLive.get(canon);
if (!was) continue;
const a = String(was.price || "");
const b = String(now.price || "");
if (a === b) continue;
@ -238,14 +252,11 @@ function buildCurrentIndexes(skuMap) {
if (!byStoreCanon.has(storeLabel)) byStoreCanon.set(storeLabel, new Map());
for (const it of live.values()) {
// availability
if (!availability.has(it.canonSku)) availability.set(it.canonSku, new Set());
availability.get(it.canonSku).add(storeLabel);
// per-store lookup
byStoreCanon.get(storeLabel).set(it.canonSku, it);
// cheapest
const p = priceToNumber(it.price);
if (p === null) continue;
@ -282,7 +293,9 @@ function renderHtml({ title, subtitle, uniqueNews, bigSales, commitUrl, pagesUrl
}
function card(it, extraHtml) {
const img = it.img ? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />` : "";
const img = it.img
? `<img src="${htmlEscape(it.img)}" width="84" height="84" style="object-fit:contain;border-radius:8px;border:1px solid #eee;background:#fff" />`
: "";
const name = htmlEscape(it.name || "");
const store = htmlEscape(it.storeLabel || "");
const cat = htmlEscape(it.categoryLabel || "");
@ -355,9 +368,7 @@ function writeGithubOutput(kv) {
const outPath = process.env.GITHUB_OUTPUT;
if (!outPath) return;
const lines = [];
for (const [k, v] of Object.entries(kv)) {
lines.push(`${k}=${String(v)}`);
}
for (const [k, v] of Object.entries(kv)) lines.push(`${k}=${String(v)}`);
fs.appendFileSync(outPath, lines.join("\n") + "\n", "utf8");
}
@ -383,34 +394,36 @@ function main() {
return;
}
// Current-state indexes (across ALL stores) from disk
const { availability, cheapest, byStoreCanon } = buildCurrentIndexes(skuMap);
const uniqueNews = [];
const bigSales = [];
for (const file of changed) {
const existedBefore = gitFileExistsAtSha(parentSha, file);
const existsNow = gitFileExistsAtSha(headSha, file);
// NEW FEATURE: if this DB file is brand new, ignore its "new items" for alert.
if (!existedBefore && existsNow) {
continue;
}
const prevObj = gitShowJson(parentSha, file);
const nextObj = gitShowJson(headSha, file);
if (!prevObj && !nextObj) continue;
const { newItems, priceDown } = diffDb(prevObj, nextObj, skuMap);
// New unique listings (canon sku available at exactly 1 store)
for (const it of newItems) {
const stores = availability.get(it.canonSku);
const storeCount = stores ? stores.size : 0;
if (storeCount !== 1) continue;
// ensure the only store is this one
if (!stores.has(it.storeLabel)) continue;
// refresh with current item to get img if present now
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
uniqueNews.push(cur);
}
// Sales: >=20% and cheapest store currently (ties allowed)
for (const it of priceDown) {
const pct = it.pct;
if (!Number.isFinite(pct) || pct < 20) continue;
@ -421,11 +434,9 @@ function main() {
const newN = priceToNumber(it.newPrice);
if (newN === null) continue;
// must be at cheapest price, and this store among cheapest stores
if (best.priceNum !== newN) continue;
if (!best.stores.has(it.storeLabel)) continue;
// refresh with current item for img/name/category if needed
const cur = (byStoreCanon.get(it.storeLabel) || new Map()).get(it.canonSku) || it;
bigSales.push({
@ -437,7 +448,6 @@ function main() {
}
}
// de-dupe by (canonSku, storeLabel)
function dedupe(arr) {
const out = [];
const seen = new Set();

View file

@ -39,6 +39,7 @@ function main() {
ensureDir(outDir);
const items = [];
let liveCount = 0;
for (const file of listJsonFiles(dbDir)) {
const obj = readJson(file);
@ -53,11 +54,14 @@ function main() {
const dbFile = path
.relative(repoRoot, file)
.replace(/\\/g, "/"); // for GitHub raw paths on Windows too
.replace(/\\/g, "/");
const arr = Array.isArray(obj.items) ? obj.items : [];
for (const it of arr) {
if (!it || it.removed) continue;
if (!it) continue;
const removed = Boolean(it.removed);
if (!removed) liveCount++;
const sku = String(it.sku || "").trim();
const name = String(it.name || "").trim();
@ -71,6 +75,7 @@ function main() {
price,
url,
img,
removed, // NEW (additive): allows viz to show history / removed-only items
store,
storeLabel,
category,
@ -83,14 +88,17 @@ function main() {
}
items.sort((a, b) => {
const ak = `${a.sku}|${a.storeLabel}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.name}|${b.url}`;
const ak = `${a.sku}|${a.storeLabel}|${a.removed ? 1 : 0}|${a.name}|${a.url}`;
const bk = `${b.sku}|${b.storeLabel}|${b.removed ? 1 : 0}|${b.name}|${b.url}`;
return ak.localeCompare(bk);
});
const outObj = {
generatedAt: new Date().toISOString(),
// Additive metadata. Old readers can ignore.
includesRemoved: true,
count: items.length,
countLive: liveCount,
items,
};

View file

@ -21,6 +21,17 @@ function gitShowJson(sha, filePath) {
}
}
function gitFileExistsAtSha(sha, filePath) {
if (!sha) return false;
try {
execFileSync("git", ["cat-file", "-e", `${sha}:${filePath}`], {
stdio: ["ignore", "ignore", "ignore"],
});
return true;
} catch {
return false;
}
}
function gitListTreeFiles(sha, dirRel) {
try {
@ -84,14 +95,13 @@ function keySkuForItem(it, storeLabel) {
function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
const m = new Map();
const items = Array.isArray(obj?.items) ? obj.items : [];
const storeLabel = String(obj?.storeLabel || obj?.store || "");
for (const it of items) {
if (!it) continue;
const sku = keySkuForItem(it, storeLabel);
if (!sku) continue; // still skip truly keyless rows (no sku + no url)
if (!sku) continue;
const removed = Boolean(it.removed);
if (!includeRemoved && removed) continue;
@ -119,7 +129,6 @@ function diffDb(prevObj, nextObj) {
const removedItems = [];
const priceChanges = [];
// NEW + RESTORED
for (const [sku, now] of nextLive.entries()) {
const had = prevAll.get(sku);
if (!had) {
@ -132,7 +141,6 @@ function diffDb(prevObj, nextObj) {
}
}
// REMOVED
for (const [sku, was] of prevLive.entries()) {
const nxt = nextAll.get(sku);
if (!nxt || nxt.removed) {
@ -140,7 +148,6 @@ function diffDb(prevObj, nextObj) {
}
}
// PRICE CHANGES
for (const [sku, now] of nextLive.entries()) {
const was = prevLive.get(sku);
if (!was) continue;
@ -184,7 +191,6 @@ function firstParentSha(sha) {
try {
const out = runGit(["rev-list", "--parents", "-n", "1", sha]);
const parts = out.split(/\s+/).filter(Boolean);
// parts[0] is sha, parts[1] is first parent (if any)
return parts.length >= 2 ? parts[1] : "";
} catch {
return "";
@ -192,13 +198,11 @@ function firstParentSha(sha) {
}
function listChangedDbFiles(fromSha, toSha) {
// toSha can be "WORKTREE"
if (!fromSha && toSha && toSha !== "WORKTREE") {
return gitListTreeFiles(toSha, "data/db");
}
if (!fromSha && toSha === "WORKTREE") {
// Fall back: list files on disk
try {
return fs
.readdirSync(path.join(process.cwd(), "data", "db"), { withFileTypes: true })
@ -234,7 +238,6 @@ function logDbCommitsSince(sinceIso) {
const d = dateOnly(ts);
arr.push({ sha, ts, date: d });
}
// newest -> oldest from git; convert to oldest -> newest
arr.reverse();
return arr;
} catch {
@ -258,13 +261,8 @@ function main() {
const headSha = getHeadShaOrEmpty();
const items = [];
// Collect committed runs in the last N days (touching data/db)
const commits = headSha ? logDbCommitsSince(sinceIso) : [];
// Build diff pairs:
// parent(of first in window) -> first
// then each consecutive commit -> next
// then HEAD -> WORKTREE (so this run shows up before the commit exists)
const pairs = [];
if (commits.length) {
@ -319,14 +317,24 @@ function main() {
if (!prevObj && !nextObj) continue;
const storeLabel = String(
nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || ""
);
const categoryLabel = String(
nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || ""
);
const storeLabel = String(nextObj?.storeLabel || nextObj?.store || prevObj?.storeLabel || prevObj?.store || "");
const categoryLabel = String(nextObj?.categoryLabel || nextObj?.category || prevObj?.categoryLabel || prevObj?.category || "");
const { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
// NEW FEATURE:
// If this DB file did not exist at fromSha, then treat it as a "new store/category file"
// and DO NOT emit its "new"/"restored" items into recent.json (frontpage).
// (Report text is unaffected elsewhere.)
const isNewStoreFile =
Boolean(fromSha) &&
!gitFileExistsAtSha(fromSha, file) &&
(toSha === "WORKTREE" ? fs.existsSync(path.join(repoRoot, file)) : gitFileExistsAtSha(toSha, file));
let { newItems, restoredItems, removedItems, priceChanges } = diffDb(prevObj, nextObj);
if (isNewStoreFile) {
newItems = [];
restoredItems = [];
}
for (const it of newItems) {
items.push({
@ -399,10 +407,8 @@ function main() {
}
}
// Newest first
items.sort((a, b) => String(b.ts).localeCompare(String(a.ts)));
// Keep file size under control (but still allows multiple runs/day over the window)
const trimmed = items.slice(0, maxItems);
const payload = {

View file

@ -14,6 +14,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
const name = String(r?.name || "");
const url = String(r?.url || "");
const storeLabel = String(r?.storeLabel || r?.store || "");
const removed = Boolean(r?.removed);
const img = normImg(r?.img || r?.image || r?.thumb || "");
@ -29,18 +30,22 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
cheapestPriceStr: pStr || "",
cheapestPriceNum: pNum,
cheapestStoreLabel: storeLabel || "",
stores: new Set(),
stores: new Set(), // LIVE stores only
storesEver: new Set(), // live + removed presence (history)
sampleUrl: url || "",
_searchParts: [],
searchText: "",
_imgByName: new Map(), // name -> img
_imgByName: new Map(),
_imgAny: "",
};
bySku.set(sku, agg);
}
if (storeLabel) agg.stores.add(storeLabel);
if (storeLabel) {
agg.storesEver.add(storeLabel);
if (!removed) agg.stores.add(storeLabel);
}
if (!agg.sampleUrl && url) agg.sampleUrl = url;
// Keep first non-empty name, but keep thumbnail aligned to chosen name
@ -56,8 +61,8 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
if (name) agg._imgByName.set(name, img);
}
// cheapest (across all merged rows)
if (pNum !== null) {
// cheapest across LIVE rows only (so removed history doesn't "win")
if (!removed && pNum !== null) {
if (agg.cheapestPriceNum === null || pNum < agg.cheapestPriceNum) {
agg.cheapestPriceNum = pNum;
agg.cheapestPriceStr = pStr || "";
@ -71,6 +76,7 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
if (name) agg._searchParts.push(name);
if (url) agg._searchParts.push(url);
if (storeLabel) agg._searchParts.push(storeLabel);
if (removed) agg._searchParts.push("removed");
}
const out = [...bySku.values()];
@ -85,11 +91,14 @@ export function aggregateBySku(listings, canonicalizeSkuFn) {
delete it._imgByName;
delete it._imgAny;
it.storeCount = it.stores.size;
it.storeCountEver = it.storesEver.size;
it.removedEverywhere = it.storeCount === 0;
it._searchParts.push(it.sku);
it._searchParts.push(it.name || "");
it._searchParts.push(it.sampleUrl || "");
it._searchParts.push(it.cheapestStoreLabel || "");
it.searchText = normSearchText(it._searchParts.join(" | "));
delete it._searchParts;
}

View file

@ -169,24 +169,31 @@ export async function renderItem($app, skuInput) {
// include toSku + all fromSkus mapped to it
const skuGroup = rules.groupForCanonical(sku);
const cur = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || "")));
// IMPORTANT CHANGE:
// index.json now includes removed rows too. Split live vs all.
const allRows = all.filter((x) => skuGroup.has(String(keySkuForRow(x) || "")));
const liveRows = allRows.filter((x) => !Boolean(x?.removed));
if (!cur.length) {
$title.textContent = "Item not found in current index";
$status.textContent = "Tip: index.json only includes current (non-removed) items.";
if (!allRows.length) {
$title.textContent = "Item not found";
$status.textContent = "No matching SKU in index.";
if ($thumbBox) $thumbBox.innerHTML = `<div class="thumbPlaceholder"></div>`;
return;
}
// pick bestName by most common across merged rows
const isRemovedEverywhere = liveRows.length === 0;
// pick bestName by most common across LIVE rows (fallback to allRows)
const basisForName = liveRows.length ? liveRows : allRows;
const nameCounts = new Map();
for (const r of cur) {
for (const r of basisForName) {
const n = String(r.name || "");
if (!n) continue;
nameCounts.set(n, (nameCounts.get(n) || 0) + 1);
}
let bestName = cur[0].name || `(SKU ${sku})`;
let bestName = basisForName[0].name || `(SKU ${sku})`;
let bestCount = -1;
for (const [n, c] of nameCounts.entries()) {
if (c > bestCount) {
@ -196,11 +203,13 @@ export async function renderItem($app, skuInput) {
}
$title.textContent = bestName;
// choose thumbnail from cheapest listing across merged rows (fallback: first that matches name)
// choose thumbnail from cheapest LIVE listing (fallback: any matching name; fallback: any)
let bestImg = "";
let bestPrice = null;
for (const r of cur) {
const basisForThumb = liveRows.length ? liveRows : allRows;
for (const r of basisForThumb) {
const p = parsePriceToNumber(r.price);
const img = String(r?.img || "").trim();
if (p !== null && img) {
@ -211,7 +220,7 @@ export async function renderItem($app, skuInput) {
}
}
if (!bestImg) {
for (const r of cur) {
for (const r of basisForThumb) {
if (String(r?.name || "") === String(bestName || "") && String(r?.img || "").trim()) {
bestImg = String(r.img).trim();
break;
@ -219,7 +228,7 @@ export async function renderItem($app, skuInput) {
}
}
if (!bestImg) {
for (const r of cur) {
for (const r of basisForThumb) {
if (String(r?.img || "").trim()) {
bestImg = String(r.img).trim();
break;
@ -229,28 +238,35 @@ export async function renderItem($app, skuInput) {
$thumbBox.innerHTML = bestImg ? renderThumbHtml(bestImg, "detailThumb") : `<div class="thumbPlaceholder"></div>`;
// show store links from merged rows (may include multiple per store; OK)
// show store links from merged rows (may include multiple per store; OK)
// If two identical links exist, only render one.
// Render store links:
// - LIVE stores first (normal)
// - then removed-history stores with a "(removed)" suffix
const seenLinks = new Set();
$links.innerHTML = cur
const linkRows = allRows
.slice()
.sort((a, b) => String(a.storeLabel || "").localeCompare(String(b.storeLabel || "")))
.sort((a, b) => {
const ar = Boolean(a?.removed) ? 1 : 0;
const br = Boolean(b?.removed) ? 1 : 0;
if (ar !== br) return ar - br; // live first
return String(a.storeLabel || "").localeCompare(String(b.storeLabel || ""));
})
.filter((r) => {
const href = String(r?.url || "").trim();
const text = String(r?.storeLabel || r?.store || "Store").trim();
if (!href) return false;
// "identical" = same href + same rendered text
const key = `${href}|${text}`;
const suffix = Boolean(r?.removed) ? " (removed)" : "";
const key = `${href}|${text}${suffix}`;
if (seenLinks.has(key)) return false;
seenLinks.add(key);
return true;
})
});
$links.innerHTML = linkRows
.map((r) => {
const href = String(r.url || "").trim();
const text = String(r.storeLabel || r.store || "Store").trim();
return `<a href="${esc(href)}" target="_blank" rel="noopener noreferrer">${esc(text)}</a>`;
const suffix = Boolean(r?.removed) ? " (removed)" : "";
return `<a href="${esc(href)}" target="_blank" rel="noopener noreferrer">${esc(text + suffix)}</a>`;
})
.join("");
@ -259,17 +275,19 @@ export async function renderItem($app, skuInput) {
const repo = gh.repo;
const branch = "data";
// dbFile -> rows (because merged skus can exist in same dbFile)
const byDbFile = new Map();
for (const r of cur) {
// Group DB files by historical presence (LIVE or REMOVED rows).
const byDbFileAll = new Map();
for (const r of allRows) {
if (!r.dbFile) continue;
const k = String(r.dbFile);
if (!byDbFile.has(k)) byDbFile.set(k, []);
byDbFile.get(k).push(r);
if (!byDbFileAll.has(k)) byDbFileAll.set(k, []);
byDbFileAll.get(k).push(r);
}
const dbFiles = [...byDbFile.keys()].sort();
const dbFiles = [...byDbFileAll.keys()].sort();
$status.textContent = `Loading history for ${dbFiles.length} store file(s)…`;
$status.textContent = isRemovedEverywhere
? `Item is removed everywhere (showing historical chart across ${dbFiles.length} store file(s))…`
: `Loading history for ${dbFiles.length} store file(s)…`;
const manifest = await loadDbCommitsManifest();
const allDatesSet = new Set();
@ -282,8 +300,13 @@ export async function renderItem($app, skuInput) {
const skuKeys = [...skuGroup];
for (const dbFile of dbFiles) {
const rows = byDbFile.get(dbFile) || [];
const storeLabel = String(rows[0]?.storeLabel || rows[0]?.store || dbFile);
const rowsAll = byDbFileAll.get(dbFile) || [];
// Determine current LIVE rows for this dbFile:
// (we don't want to add a "today" point if the listing is removed in this store now)
const rowsLive = rowsAll.filter((r) => !Boolean(r?.removed));
const storeLabel = String(rowsAll[0]?.storeLabel || rowsAll[0]?.store || dbFile);
const cached = loadSeriesCache(sku, dbFile, cacheBust);
if (cached && Array.isArray(cached.points) && cached.points.length) {
@ -346,6 +369,7 @@ export async function renderItem($app, skuInput) {
}
}
// findMinPriceForSkuGroupInDb already ignores removed rows inside each DB snapshot.
const pNum = findMinPriceForSkuGroupInDb(obj, skuKeys, storeLabel);
points.set(d, pNum);
@ -354,17 +378,19 @@ export async function renderItem($app, skuInput) {
compactPoints.push({ date: d, price: pNum });
}
// Always add "today" from current index (min across merged rows in this store/dbFile)
let curMin = null;
for (const r of rows) {
const p = parsePriceToNumber(r.price);
if (p !== null) curMin = curMin === null ? p : Math.min(curMin, p);
}
if (curMin !== null) {
points.set(today, curMin);
values.push(curMin);
allDatesSet.add(today);
compactPoints.push({ date: today, price: curMin });
// Add "today" point ONLY if listing currently exists in this store/dbFile (live rows present)
if (rowsLive.length) {
let curMin = null;
for (const r of rowsLive) {
const p = parsePriceToNumber(r.price);
if (p !== null) curMin = curMin === null ? p : Math.min(curMin, p);
}
if (curMin !== null) {
points.set(today, curMin);
values.push(curMin);
allDatesSet.add(today);
compactPoints.push({ date: today, price: curMin });
}
}
saveSeriesCache(sku, dbFile, cacheBust, compactPoints);
@ -416,6 +442,10 @@ export async function renderItem($app, skuInput) {
});
$status.textContent = manifest
? `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.`
: `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`;
? (isRemovedEverywhere
? `History loaded (removed everywhere). Source=prebuilt manifest. Points=${labels.length}.`
: `History loaded from prebuilt manifest (1 point/day) + current run. Points=${labels.length}.`)
: (isRemovedEverywhere
? `History loaded (removed everywhere). Source=GitHub API fallback. Points=${labels.length}.`
: `History loaded (GitHub API fallback; 1 point/day) + current run. Points=${labels.length}.`);
}