mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
feat: Stores will upgrade SKUs where possible
This commit is contained in:
parent
7df4e48b9f
commit
8b17d94516
1 changed files with 37 additions and 34 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
// src/tracker/merge.js
|
// src/tracker/merge.js
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const { normalizeSkuKey, normalizeCspc } = require("../utils/sku");
|
const { normalizeSkuKey, normalizeCspc, pickBetterSku } = require("../utils/sku");
|
||||||
const { normPrice } = require("../utils/price");
|
const { normPrice } = require("../utils/price");
|
||||||
|
|
||||||
function normImg(v) {
|
function normImg(v) {
|
||||||
|
|
@ -14,13 +14,7 @@ function normImg(v) {
|
||||||
function dbStoreLabel(prevDb) {
|
function dbStoreLabel(prevDb) {
|
||||||
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
|
return String(prevDb?.storeLabel || prevDb?.store || "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function isRealSku(v) {
|
|
||||||
return Boolean(normalizeCspc(v));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
|
const effectiveStoreLabel = String(storeLabel || dbStoreLabel(prevDb)).trim();
|
||||||
if (!effectiveStoreLabel) {
|
if (!effectiveStoreLabel) {
|
||||||
|
|
@ -28,6 +22,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'"
|
"mergeDiscoveredIntoDb: missing storeLabel; refusing to generate synthetic SKUs with fallback 'store'"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeSkuForDb(raw, url) {
|
function normalizeSkuForDb(raw, url) {
|
||||||
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
|
return normalizeSkuKey(raw, { storeLabel: effectiveStoreLabel, url });
|
||||||
}
|
}
|
||||||
|
|
@ -58,49 +53,51 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
|
return String(urlA || "") <= String(urlB || "") ? { url: urlA, item: a } : { url: urlB, item: b };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index active items by real SKU; also track *all* urls per SKU to cleanup dupes.
|
// Index active items by non-synthetic skuKey (CSPC / id:* / upc:* / etc).
|
||||||
const prevByRealSku = new Map(); // sku6 -> { url, item } (best)
|
// Also track *all* urls per skuKey to cleanup dupes.
|
||||||
const prevUrlsByRealSku = new Map(); // sku6 -> Set(urls)
|
const prevBySkuKey = new Map(); // skuKey -> { url, item } (best)
|
||||||
|
const prevUrlsBySkuKey = new Map(); // skuKey -> Set(urls)
|
||||||
|
|
||||||
for (const [url, it] of prevDb.byUrl.entries()) {
|
for (const [url, it] of prevDb.byUrl.entries()) {
|
||||||
if (!it || it.removed) continue;
|
if (!it || it.removed) continue;
|
||||||
const sku6 = normalizeCspc(it.sku);
|
|
||||||
if (!sku6) continue;
|
|
||||||
|
|
||||||
let set = prevUrlsByRealSku.get(sku6);
|
const skuKey = normalizeSkuForDb(it.sku, url);
|
||||||
if (!set) prevUrlsByRealSku.set(sku6, (set = new Set()));
|
if (!skuKey || /^u:/i.test(skuKey)) continue;
|
||||||
|
|
||||||
|
let set = prevUrlsBySkuKey.get(skuKey);
|
||||||
|
if (!set) prevUrlsBySkuKey.set(skuKey, (set = new Set()));
|
||||||
set.add(url);
|
set.add(url);
|
||||||
|
|
||||||
const cur = prevByRealSku.get(sku6);
|
const cur = prevBySkuKey.get(skuKey);
|
||||||
const next = { url, item: it };
|
const next = { url, item: it };
|
||||||
if (!cur) prevByRealSku.set(sku6, next);
|
if (!cur) prevBySkuKey.set(skuKey, next);
|
||||||
else prevByRealSku.set(sku6, pickBetter(cur, next));
|
else prevBySkuKey.set(skuKey, pickBetter(cur, next));
|
||||||
}
|
}
|
||||||
|
|
||||||
const matchedPrevUrls = new Set(); // old URLs we "found" via SKU even if URL changed
|
const matchedPrevUrls = new Set(); // old URLs we "found" via skuKey even if URL changed
|
||||||
|
|
||||||
for (const [url, nowRaw] of discovered.entries()) {
|
for (const [url, nowRaw] of discovered.entries()) {
|
||||||
let prev = prevDb.byUrl.get(url);
|
let prev = prevDb.byUrl.get(url);
|
||||||
let prevUrlForThisItem = url;
|
let prevUrlForThisItem = url;
|
||||||
|
|
||||||
// URL not found in previous DB: try to match by *real* SKU.
|
// URL not found in previous DB: try to match by non-synthetic skuKey.
|
||||||
if (!prev) {
|
if (!prev) {
|
||||||
const nowSku6 = normalizeCspc(nowRaw.sku);
|
const nowSkuKey = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
if (nowSku6) {
|
if (nowSkuKey && !/^u:/i.test(nowSkuKey)) {
|
||||||
const hit = prevByRealSku.get(nowSku6);
|
const hit = prevBySkuKey.get(nowSkuKey);
|
||||||
if (hit && hit.url && hit.url !== url) {
|
if (hit && hit.url && hit.url !== url) {
|
||||||
prev = hit.item;
|
prev = hit.item;
|
||||||
prevUrlForThisItem = hit.url;
|
prevUrlForThisItem = hit.url;
|
||||||
|
|
||||||
// Mark ALL prior URLs for this SKU as matched, so we don't later "remove" them.
|
// Mark ALL prior URLs for this skuKey as matched, so we don't later "remove" them.
|
||||||
const allOld = prevUrlsByRealSku.get(nowSku6);
|
const allOld = prevUrlsBySkuKey.get(nowSkuKey);
|
||||||
if (allOld) {
|
if (allOld) {
|
||||||
for (const u of allOld) matchedPrevUrls.add(u);
|
for (const u of allOld) matchedPrevUrls.add(u);
|
||||||
} else {
|
} else {
|
||||||
matchedPrevUrls.add(hit.url);
|
matchedPrevUrls.add(hit.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cleanup: remove any existing active duplicates for this SKU from the merged map.
|
// Cleanup: remove any existing active duplicates for this skuKey from the merged map.
|
||||||
// We'll re-add the chosen record at the new URL below.
|
// We'll re-add the chosen record at the new URL below.
|
||||||
if (allOld) {
|
if (allOld) {
|
||||||
for (const u of allOld) {
|
for (const u of allOld) {
|
||||||
|
|
@ -113,11 +110,12 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truly new (no URL match, no real-SKU match)
|
// Truly new (no URL match, no skuKey match)
|
||||||
if (!prev) {
|
if (!prev) {
|
||||||
|
const nowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
const now = {
|
const now = {
|
||||||
...nowRaw,
|
...nowRaw,
|
||||||
sku: normalizeSkuForDb(nowRaw.sku, url),
|
sku: nowSku,
|
||||||
img: normImg(nowRaw.img),
|
img: normImg(nowRaw.img),
|
||||||
removed: false,
|
removed: false,
|
||||||
};
|
};
|
||||||
|
|
@ -128,31 +126,36 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
|
|
||||||
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
// If the previous record was removed and we found it by the SAME URL, keep current behavior (restored).
|
||||||
if (prevUrlForThisItem === url && prev.removed) {
|
if (prevUrlForThisItem === url && prev.removed) {
|
||||||
|
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
||||||
|
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
|
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
||||||
|
|
||||||
const now = {
|
const now = {
|
||||||
...nowRaw,
|
...nowRaw,
|
||||||
sku:
|
sku: nowSku,
|
||||||
normalizeSkuForDb(nowRaw.sku, url) ||
|
|
||||||
normalizeSkuForDb(prev.sku, prev.url),
|
|
||||||
img: normImg(nowRaw.img) || normImg(prev.img),
|
img: normImg(nowRaw.img) || normImg(prev.img),
|
||||||
removed: false,
|
removed: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
restoredItems.push({
|
restoredItems.push({
|
||||||
url,
|
url,
|
||||||
name: now.name || prev.name || "",
|
name: now.name || prev.name || "",
|
||||||
price: now.price || prev.price || "",
|
price: now.price || prev.price || "",
|
||||||
sku: now.sku || "",
|
sku: now.sku || "",
|
||||||
});
|
});
|
||||||
|
|
||||||
merged.set(url, now);
|
merged.set(url, now);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update-in-place (or URL-move-with-real-SKU): update DB, report price changes normally.
|
// Update-in-place (or URL-move-with-skuKey): update DB, report price changes normally.
|
||||||
const prevPrice = normPrice(prev.price);
|
const prevPrice = normPrice(prev.price);
|
||||||
const nowPrice = normPrice(nowRaw.price);
|
const nowPrice = normPrice(nowRaw.price);
|
||||||
|
|
||||||
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
const prevSku = normalizeSkuForDb(prev.sku, prev.url);
|
||||||
const nowSku = normalizeSkuForDb(nowRaw.sku, url) || prevSku;
|
const rawNowSku = normalizeSkuForDb(nowRaw.sku, url);
|
||||||
|
const nowSku = pickBetterSku(rawNowSku, prevSku);
|
||||||
|
|
||||||
const prevImg = normImg(prev.img);
|
const prevImg = normImg(prev.img);
|
||||||
let nowImg = normImg(nowRaw.img);
|
let nowImg = normImg(nowRaw.img);
|
||||||
if (!nowImg) nowImg = prevImg;
|
if (!nowImg) nowImg = prevImg;
|
||||||
|
|
@ -179,7 +182,7 @@ function mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel } = {}) {
|
||||||
|
|
||||||
for (const [url, prev] of prevDb.byUrl.entries()) {
|
for (const [url, prev] of prevDb.byUrl.entries()) {
|
||||||
if (discovered.has(url)) continue;
|
if (discovered.has(url)) continue;
|
||||||
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for real-SKU items (and cleanup dupes)
|
if (matchedPrevUrls.has(url)) continue; // de-dupe URL changes for skuKey items (and cleanup dupes)
|
||||||
if (!prev.removed) {
|
if (!prev.removed) {
|
||||||
const removed = { ...prev, removed: true };
|
const removed = { ...prev, removed: true };
|
||||||
merged.set(url, removed);
|
merged.set(url, removed);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue