mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
feat: Fixes to SKU merging
This commit is contained in:
parent
05c97be12b
commit
d47f2fe9ee
2 changed files with 73 additions and 12 deletions
|
|
@ -23,15 +23,42 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
|
||||||
const removedItems = [];
|
const removedItems = [];
|
||||||
const restoredItems = [];
|
const restoredItems = [];
|
||||||
|
|
||||||
// If a product's URL changes but it has a *real* SKU, treat it as the same product:
|
// Choose a deterministic "best" record among dup active SKU rows.
|
||||||
// update DB entry (and URL key) but do NOT count it as New/Removed.
|
// Prefer: more complete fields, then lexicographically smallest URL.
|
||||||
const prevByRealSku = new Map(); // sku6 -> { url, item }
|
function scoreItem(it) {
|
||||||
|
if (!it) return 0;
|
||||||
|
const name = String(it.name || "").trim();
|
||||||
|
const price = String(it.price || "").trim();
|
||||||
|
const url = String(it.url || "").trim();
|
||||||
|
const img = String(it.img || "").trim();
|
||||||
|
return (name ? 1 : 0) + (price ? 1 : 0) + (url ? 1 : 0) + (img ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickBetter({ url: urlA, item: a }, { url: urlB, item: b }) {
|
||||||
|
const sa = scoreItem(a);
|
||||||
|
const sb = scoreItem(b);
|
||||||
|
if (sa !== sb) return sa > sb ? { url: urlA, item: a } : { url: urlB, item: b };
|
||||||
|
// tie-breaker: stable + deterministic
|
||||||
|
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index active items by real SKU; also track *all* urls per SKU to cleanup dupes.
|
||||||
|
const prevByRealSku = new Map(); // sku6 -> { url, item } (best)
|
||||||
|
const prevUrlsByRealSku = new Map(); // sku6 -> Set(urls)
|
||||||
|
|
||||||
for (const [url, it] of prevDb.byUrl.entries()) {
|
for (const [url, it] of prevDb.byUrl.entries()) {
|
||||||
if (!it || it.removed) continue;
|
if (!it || it.removed) continue;
|
||||||
const sku6 = normalizeCspc(it.sku);
|
const sku6 = normalizeCspc(it.sku);
|
||||||
if (!sku6) continue;
|
if (!sku6) continue;
|
||||||
// If dup SKUs exist, keep the first one we saw (stable enough).
|
|
||||||
if (!prevByRealSku.has(sku6)) prevByRealSku.set(sku6, { url, item: it });
|
let set = prevUrlsByRealSku.get(sku6);
|
||||||
|
if (!set) prevUrlsByRealSku.set(sku6, (set = new Set()));
|
||||||
|
set.add(url);
|
||||||
|
|
||||||
|
const cur = prevByRealSku.get(sku6);
|
||||||
|
const next = { url, item: it };
|
||||||
|
if (!cur) prevByRealSku.set(sku6, next);
|
||||||
|
else prevByRealSku.set(sku6, pickBetter(cur, next));
|
||||||
}
|
}
|
||||||
|
|
||||||
const matchedPrevUrls = new Set(); // old URLs we "found" via SKU even if URL changed
|
const matchedPrevUrls = new Set(); // old URLs we "found" via SKU even if URL changed
|
||||||
|
|
@ -48,10 +75,24 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
|
||||||
if (hit && hit.url && hit.url !== url) {
|
if (hit && hit.url && hit.url !== url) {
|
||||||
prev = hit.item;
|
prev = hit.item;
|
||||||
prevUrlForThisItem = hit.url;
|
prevUrlForThisItem = hit.url;
|
||||||
matchedPrevUrls.add(hit.url);
|
|
||||||
|
|
||||||
// Move record key from old URL -> new URL in DB map (no New/Removed noise)
|
// Mark ALL prior URLs for this SKU as matched, so we don't later "remove" them.
|
||||||
if (merged.has(hit.url)) merged.delete(hit.url);
|
const allOld = prevUrlsByRealSku.get(nowSku6);
|
||||||
|
if (allOld) {
|
||||||
|
for (const u of allOld) matchedPrevUrls.add(u);
|
||||||
|
} else {
|
||||||
|
matchedPrevUrls.add(hit.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup: remove any existing active duplicates for this SKU from the merged map.
|
||||||
|
// We'll re-add the chosen record at the new URL below.
|
||||||
|
if (allOld) {
|
||||||
|
for (const u of allOld) {
|
||||||
|
if (u !== url && merged.has(u)) merged.delete(u);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (merged.has(hit.url)) merged.delete(hit.url);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -70,7 +111,6 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
||||||
// Note: if it "came back" under a different URL, we only de-dupe New/Removed for URL changes on active items.
|
|
||||||
if (prevUrlForThisItem === url && prev.removed) {
|
if (prevUrlForThisItem === url && prev.removed) {
|
||||||
const now = {
|
const now = {
|
||||||
...nowRaw,
|
...nowRaw,
|
||||||
|
|
@ -121,7 +161,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered) {
|
||||||
|
|
||||||
for (const [url, prev] of prevDb.byUrl.entries()) {
|
for (const [url, prev] of prevDb.byUrl.entries()) {
|
||||||
if (discovered.has(url)) continue;
|
if (discovered.has(url)) continue;
|
||||||
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for real-SKU items
|
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for real-SKU items (and cleanup dupes)
|
||||||
if (!prev.removed) {
|
if (!prev.removed) {
|
||||||
const removed = { ...prev, removed: true };
|
const removed = { ...prev, removed: true };
|
||||||
merged.set(url, removed);
|
merged.set(url, removed);
|
||||||
|
|
|
||||||
|
|
@ -106,14 +106,35 @@ function mapBySku(obj, { includeRemoved } = { includeRemoved: false }) {
|
||||||
const removed = Boolean(it.removed);
|
const removed = Boolean(it.removed);
|
||||||
if (!includeRemoved && removed) continue;
|
if (!includeRemoved && removed) continue;
|
||||||
|
|
||||||
m.set(sku, {
|
const next = {
|
||||||
sku,
|
sku,
|
||||||
name: String(it.name || ""),
|
name: String(it.name || ""),
|
||||||
price: String(it.price || ""),
|
price: String(it.price || ""),
|
||||||
url: String(it.url || ""),
|
url: String(it.url || ""),
|
||||||
removed,
|
removed,
|
||||||
});
|
};
|
||||||
|
|
||||||
|
const prev = m.get(sku);
|
||||||
|
if (!prev) {
|
||||||
|
m.set(sku, next);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefer the non-removed record if both exist.
|
||||||
|
if (prev.removed && !next.removed) {
|
||||||
|
m.set(sku, next);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!prev.removed && next.removed) {
|
||||||
|
continue; // keep the active one
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise keep the “better” one (more complete data), deterministic.
|
||||||
|
const prevScore = (prev.name ? 1 : 0) + (prev.price ? 1 : 0) + (prev.url ? 1 : 0);
|
||||||
|
const nextScore = (next.name ? 1 : 0) + (next.price ? 1 : 0) + (next.url ? 1 : 0);
|
||||||
|
if (nextScore > prevScore) m.set(sku, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue