feat: URL and img changes

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-24 11:02:56 -08:00
parent 30bf9ed983
commit 140b73af37

View file

@ -1,3 +1,4 @@
// src/tracker/merge.js
"use strict";
const { normalizeCspc } = require("../utils/sku");
@ -10,6 +11,10 @@ function normImg(v) {
return s;
}
function isRealSku(v) {
return Boolean(normalizeCspc(v));
}
function mergeDiscoveredIntoDb(prevDb, discovered) {
const merged = new Map(prevDb.byUrl);
@ -18,9 +23,40 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
const removedItems = [];
const restoredItems = [];
for (const [url, nowRaw] of discovered.entries()) {
const prev = prevDb.byUrl.get(url);
// If a product's URL changes but it has a *real* SKU, treat it as the same product:
// update DB entry (and URL key) but do NOT count it as New/Removed.
const prevByRealSku = new Map(); // sku6 -> { url, item }
for (const [url, it] of prevDb.byUrl.entries()) {
if (!it || it.removed) continue;
const sku6 = normalizeCspc(it.sku);
if (!sku6) continue;
// If dup SKUs exist, keep the first one we saw (stable enough).
if (!prevByRealSku.has(sku6)) prevByRealSku.set(sku6, { url, item: it });
}
const matchedPrevUrls = new Set(); // old URLs we "found" via SKU even if URL changed
for (const [url, nowRaw] of discovered.entries()) {
let prev = prevDb.byUrl.get(url);
let prevUrlForThisItem = url;
// URL not found in previous DB: try to match by *real* SKU.
if (!prev) {
const nowSku6 = normalizeCspc(nowRaw.sku);
if (nowSku6) {
const hit = prevByRealSku.get(nowSku6);
if (hit && hit.url && hit.url !== url) {
prev = hit.item;
prevUrlForThisItem = hit.url;
matchedPrevUrls.add(hit.url);
// Move record key from old URL -> new URL in DB map (no New/Removed noise)
if (merged.has(hit.url)) merged.delete(hit.url);
}
}
}
// Truly new (no URL match, no real-SKU match)
if (!prev) {
const now = {
...nowRaw,
@ -33,7 +69,9 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
continue;
}
if (prev.removed) {
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
// Note: if it "came back" under a different URL, we only de-dupe New/Removed for URL changes on active items.
if (prevUrlForThisItem === url && prev.removed) {
const now = {
...nowRaw,
sku: normalizeCspc(nowRaw.sku) || normalizeCspc(prev.sku),
@ -50,6 +88,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
continue;
}
// Update-in-place (or URL-move-with-real-SKU): update DB, report price changes normally.
const prevPrice = normPrice(prev.price);
const nowPrice = normPrice(nowRaw.price);
@ -65,7 +104,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
const skuChanged = prevSku !== nowSku;
const imgChanged = prevImg !== nowImg;
if (nameChanged || priceChanged || skuChanged || imgChanged) {
if (nameChanged || priceChanged || skuChanged || imgChanged || prevUrlForThisItem !== url) {
merged.set(url, { ...nowRaw, sku: nowSku, img: nowImg, removed: false });
}
@ -82,6 +121,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
for (const [url, prev] of prevDb.byUrl.entries()) {
if (discovered.has(url)) continue;
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for real-SKU items
if (!prev.removed) {
const removed = { ...prev, removed: true };
merged.set(url, removed);