feat: Stores will upgrade SKUs where possible

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-31 14:27:42 -08:00
parent 7df4e48b9f
commit 8b17d94516

View file

@ -1,7 +1,7 @@
// src/tracker/merge.js // src/tracker/merge.js
"use strict"; "use strict";
const { normalizeSkuKey, normalizeCspc } = require("../utils/sku"); const { normalizeSkuKey, normalizeCspc, pickBetterSku } = require("../utils/sku");
const { normPrice } = require("../utils/price"); const { normPrice } = require("../utils/price");
function normImg(v) { function normImg(v) {
@ -14,13 +14,7 @@ function normImg(v) {
function dbStoreLabel(prevDb) { function dbStoreLabel(prevDb) {
return String(prevDb?.storeLabel || prevDb?.store || "").trim(); return String(prevDb?.storeLabel || prevDb?.store || "").trim();
} }
function isRealSku(v) {
return Boolean(normalizeCspc(v));
}
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) { function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim(); const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
if (!effectiveStoreLabel) { if (!effectiveStoreLabel) {
@ -28,6 +22,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'" "mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'"
); );
} }
function normalizeSkuForDb(raw, url) { function normalizeSkuForDb(raw, url) {
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url }); return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
} }
@ -58,49 +53,51 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b }; return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
} }
// Index active items by real SKU; also track *all* urls per SKU to cleanup dupes. // Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
const prevByRealSku = new Map(); // sku6 -> { url, item } (best) // Also track *all* urls per skuKey to cleanup dupes.
const prevUrlsByRealSku = new Map(); // sku6 -> Set(urls) const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
for (const [url, it] of prevDb.byUrl.entries()) { for (const [url, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue; if (!it || it.removed) continue;
const sku6 = normalizeCspc(it.sku);
if (!sku6) continue;
let set = prevUrlsByRealSku.get(sku6); const skuKey = normalizeSkuForDb(it.sku, url);
if (!set) prevUrlsByRealSku.set(sku6, (set = new Set())); if (!skuKey || /^u:/i.test(skuKey)) continue;
let set = prevUrlsBySkuKey.get(skuKey);
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
set.add(url); set.add(url);
const cur = prevByRealSku.get(sku6); const cur = prevBySkuKey.get(skuKey);
const next = { url, item: it }; const next = { url, item: it };
if (!cur) prevByRealSku.set(sku6, next); if (!cur) prevBySkuKey.set(skuKey, next);
else prevByRealSku.set(sku6, pickBetter(cur, next)); else prevBySkuKey.set(skuKey, pickBetter(cur, next));
} }
const matchedPrevUrls = new Set(); // old URLs we "found" via SKU even if URL changed const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
for (const [url, nowRaw] of discovered.entries()) { for (const [url, nowRaw] of discovered.entries()) {
let prev = prevDb.byUrl.get(url); let prev = prevDb.byUrl.get(url);
let prevUrlForThisItem = url; let prevUrlForThisItem = url;
// URL not found in previous DB: try to match by *real* SKU. // URL not found in previous DB: try to match by non-synthetic skuKey.
if (!prev) { if (!prev) {
const nowSku6 = normalizeCspc(nowRaw.sku); const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
if (nowSku6) { if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
const hit = prevByRealSku.get(nowSku6); const hit = prevBySkuKey.get(nowSkuKey);
if (hit && hit.url && hit.url !== url) { if (hit && hit.url && hit.url !== url) {
prev = hit.item; prev = hit.item;
prevUrlForThisItem = hit.url; prevUrlForThisItem = hit.url;
// Mark ALL prior URLs for this SKU as matched, so we don't later "remove" them. // Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
const allOld = prevUrlsByRealSku.get(nowSku6); const allOld = prevUrlsBySkuKey.get(nowSkuKey);
if (allOld) { if (allOld) {
for (const u of allOld) matchedPrevUrls.add(u); for (const u of allOld) matchedPrevUrls.add(u);
} else { } else {
matchedPrevUrls.add(hit.url); matchedPrevUrls.add(hit.url);
} }
// Cleanup: remove any existing active duplicates for this SKU from the merged map. // Cleanup: remove any existing active duplicates for this skuKey from the merged map.
// We'll re-add the chosen record at the new URL below. // We'll re-add the chosen record at the new URL below.
if (allOld) { if (allOld) {
for (const u of allOld) { for (const u of allOld) {
@ -113,11 +110,12 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
} }
} }
// Truly new (no URL match, no real-SKU match) // Truly new (no URL match, no skuKey match)
if (!prev) { if (!prev) {
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
const now = { const now = {
...nowRaw, ...nowRaw,
sku: normalizeSkuForDb(nowRaw.sku, url), sku: nowSku,
img: normImg(nowRaw.img), img: normImg(nowRaw.img),
removed: false, removed: false,
}; };
@ -128,31 +126,36 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored). // If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
if (prevUrlForThisItem === url && prev.removed) { if (prevUrlForThisItem === url && prev.removed) {
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
const now = { const now = {
...nowRaw, ...nowRaw,
sku: sku: nowSku,
normalizeSkuForDb(nowRaw.sku, url) ||
normalizeSkuForDb(prev.sku, prev.url),
img: normImg(nowRaw.img) || normImg(prev.img), img: normImg(nowRaw.img) || normImg(prev.img),
removed: false, removed: false,
}; };
restoredItems.push({ restoredItems.push({
url, url,
name: now.name || prev.name || "", name: now.name || prev.name || "",
price: now.price || prev.price || "", price: now.price || prev.price || "",
sku: now.sku || "", sku: now.sku || "",
}); });
merged.set(url, now); merged.set(url, now);
continue; continue;
} }
// Update-in-place (or URL-move-with-real-SKU): update DB, report price changes normally. // Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
const prevPrice = normPrice(prev.price); const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price); const nowPrice = normPrice(nowRaw.price);
const prevSku = normalizeSkuForDb(prev.sku, prev.url); const prevSku = normalizeSkuForDb(prev.sku, prev.url);
const nowSku = normalizeSkuForDb(nowRaw.sku, url) || prevSku; const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
const nowSku = pickBetterSku(rawNowSku, prevSku);
const prevImg = normImg(prev.img); const prevImg = normImg(prev.img);
let nowImg = normImg(nowRaw.img); let nowImg = normImg(nowRaw.img);
if (!nowImg) nowImg = prevImg; if (!nowImg) nowImg = prevImg;
@ -179,7 +182,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
for (const [url, prev] of prevDb.byUrl.entries()) { for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue; if (discovered.has(url)) continue;
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for real-SKU items (and cleanup dupes) if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
if (!prev.removed) { if (!prev.removed) {
const removed = { ...prev, removed: true }; const removed = { ...prev, removed: true };
merged.set(url, removed); merged.set(url, removed);