diff --git a/src/utils/sku.js b/src/utils/sku.js index 5dd0bda..8182a74 100644 --- a/src/utils/sku.js +++ b/src/utils/sku.js @@ -10,6 +10,13 @@ function fnv1a32(str) { return (h >>> 0).toString(16).padStart(8, "0"); } +function idToCspc6(idDigits) { + const s = String(idDigits || "").trim(); + if (!/^\d{1,6}$/.test(s)) return ""; + return s.padStart(6, "0"); +} + + function normalizeCspc(v) { const m = String(v ?? "").match(/\b(\d{6})\b/); return m ? m[1] : ""; @@ -84,7 +91,9 @@ function normalizeSkuKey(v, { storeLabel, url } = {}) { } if (/^id:/i.test(raw)) { const id = normalizeIdDigits(raw); - return id ? `id:${id}` : ""; + if (!id) return ""; + const cspc = idToCspc6(id); + return cspc ? cspc : `id:${id}`; } if (raw.startsWith("u:")) return raw; diff --git a/src/utils/sku_map.js b/src/utils/sku_map.js index 9385725..7596af0 100644 --- a/src/utils/sku_map.js +++ b/src/utils/sku_map.js @@ -135,6 +135,13 @@ function findSkuLinksFile({ dbDir, mappingFile } = {}) { return ""; } +function normalizeImplicitSkuKey(k) { + const s = String(k || "").trim(); + const m = s.match(/^id:(\d{1,6})$/i); + if (m) return String(m[1]).padStart(6, "0"); + return s; +} + /* ---------------- Public API ---------------- */ function buildSkuMapFromLinksArray(links) { @@ -142,8 +149,8 @@ function buildSkuMapFromLinksArray(links) { const all = new Set(); for (const x of Array.isArray(links) ? links : []) { - const a = String(x?.fromSku || "").trim(); - const b = String(x?.toSku || "").trim(); + const a = normalizeImplicitSkuKey(x?.fromSku); + const b = normalizeImplicitSkuKey(x?.toSku); if (!a || !b) continue; all.add(a); @@ -179,7 +186,7 @@ function buildSkuMapFromLinksArray(links) { } function canonicalSku(sku) { - const s = String(sku || "").trim(); + const s = normalizeImplicitSkuKey(sku); if (!s) return s; return canonBySku.get(s) || s; } diff --git a/tools/dedupe_skulinks.js b/tools/dedupe_skulinks.js index f23a813..7dbca67 100644 --- a/tools/dedupe_skulinks.js +++ b/tools/dedupe_skulinks.js @@ -1,4 +1,5 @@ #!/usr/bin/env node +"use strict"; const fs = require("fs"); const path = require("path"); @@ -6,7 +7,14 @@ const path = require("path"); const DB_DIR = path.join(__dirname, "../data/db"); const LINKS_FILE = path.join(__dirname, "../data/sku_links.json"); -// collect all valid SKUs from db files +function normalizeImplicitSkuKey(k) { + const s = String(k || "").trim(); + const m = s.match(/^id:(\d{1,6})$/i); + if (m) return String(m[1]).padStart(6, "0"); + return s; +} + +// collect all valid SKUs from db files (normalized) const validSkus = new Set(); for (const file of fs.readdirSync(DB_DIR)) { @@ -14,26 +22,63 @@ for (const file of fs.readdirSync(DB_DIR)) { const data = JSON.parse(fs.readFileSync(path.join(DB_DIR, file), "utf8")); if (!Array.isArray(data.items)) continue; for (const item of data.items) { - if (item.sku) validSkus.add(String(item.sku)); + if (!item || !item.sku) continue; + const k = normalizeImplicitSkuKey(item.sku); + if (k) validSkus.add(k); } } // load links const linksData = JSON.parse(fs.readFileSync(LINKS_FILE, "utf8")); -const originalCount = linksData.links.length; +const originalCount = Array.isArray(linksData.links) ? linksData.links.length : 0; -// keep only links where BOTH skus exist -linksData.links = linksData.links.filter( - ({ fromSku, toSku }) => - validSkus.has(String(fromSku)) && validSkus.has(String(toSku)) -); +let prunedMissing = 0; +let prunedAuto = 0; +let prunedDup = 0; + +const seen = new Set(); // dedupe after normalization + +const nextLinks = []; +for (const x of Array.isArray(linksData.links) ? linksData.links : []) { + const a = normalizeImplicitSkuKey(x?.fromSku); + const b = normalizeImplicitSkuKey(x?.toSku); + if (!a || !b) { + prunedMissing++; + continue; + } + + // drop links that are now implicit (id:1234 <-> 001234 etc) + if (a === b) { + prunedAuto++; + continue; + } + + // keep only links where BOTH normalized skus exist in db + if (!validSkus.has(a) || !validSkus.has(b)) { + prunedMissing++; + continue; + } + + // dedupe (undirected) after normalization + const key = a < b ? `${a}|${b}` : `${b}|${a}`; + if (seen.has(key)) { + prunedDup++; + continue; + } + seen.add(key); + + nextLinks.push({ fromSku: a, toSku: b }); +} + +linksData.links = nextLinks; // write back in place -fs.writeFileSync( - LINKS_FILE, - JSON.stringify(linksData, null, 2) + "\n" -); +fs.writeFileSync(LINKS_FILE, JSON.stringify(linksData, null, 2) + "\n"); -console.log( - `Pruned ${originalCount - linksData.links.length} invalid links` -); +const totalPruned = originalCount - linksData.links.length; + +console.log(`Pruned ${totalPruned} total links`); +console.log(`- ${prunedAuto} now-implicit (id:<1-6> ↔ CSPC)`); +console.log(`- ${prunedMissing} missing/invalid vs db`); +console.log(`- ${prunedDup} duplicates after normalization`); +console.log(`Remaining ${linksData.links.length}`); diff --git a/viz/app/mapping.js b/viz/app/mapping.js index 0781078..cfefdb5 100644 --- a/viz/app/mapping.js +++ b/viz/app/mapping.js @@ -8,9 +8,16 @@ export function clearSkuRulesCache() { CACHED = null; } +function normalizeImplicitSkuKey(k) { + const s = String(k || "").trim(); + const m = s.match(/^id:(\d{1,6})$/i); + if (m) return String(m[1]).padStart(6, "0"); + return s; +} + function canonicalPairKey(a, b) { - const x = String(a || ""); - const y = String(b || ""); + const x = normalizeImplicitSkuKey(a); + const y = normalizeImplicitSkuKey(b); if (!x || !y) return ""; return x < y ? `${x}|${y}` : `${y}|${x}`; } @@ -19,8 +26,8 @@ function buildForwardMap(links) { // Keep this for reference/debug; grouping no longer depends on direction. const m = new Map(); for (const x of Array.isArray(links) ? links : []) { - const fromSku = String(x?.fromSku || "").trim(); - const toSku = String(x?.toSku || "").trim(); + const fromSku = normalizeImplicitSkuKey(x?.fromSku); + const toSku = normalizeImplicitSkuKey(x?.toSku); if (fromSku && toSku && fromSku !== toSku) m.set(fromSku, toSku); } return m; @@ -120,8 +127,8 @@ function buildGroupsAndCanonicalMap(links) { const all = new Set(); for (const x of Array.isArray(links) ? links : []) { - const a = String(x?.fromSku || "").trim(); - const b = String(x?.toSku || "").trim(); + const a = normalizeImplicitSkuKey(x?.fromSku); + const b = normalizeImplicitSkuKey(x?.toSku); if (!a || !b) continue; all.add(a); all.add(b); @@ -187,7 +194,7 @@ export async function loadSkuRules() { const ignoreSet = buildIgnoreSet(ignores); function canonicalSku(sku) { - const s = String(sku || "").trim(); + const s = normalizeImplicitSkuKey(sku); if (!s) return s; return canonBySku.get(s) || s; } diff --git a/viz/app/sku.js b/viz/app/sku.js index 4dcdf8a..4c0ef54 100644 --- a/viz/app/sku.js +++ b/viz/app/sku.js @@ -21,8 +21,12 @@ export function parsePriceToNumber(v) { } export function keySkuForRow(r) { - const real = String(r?.sku || "").trim(); - return real ? real : makeSyntheticSku(r); + const real0 = String(r?.sku || "").trim(); + if (real0) { + const m = real0.match(/^id:(\d{1,6})$/i); + return m ? String(m[1]).padStart(6, "0") : real0; + } + return makeSyntheticSku(r); } export function displaySku(key) {