fix: UPC Normalization

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-29 12:51:02 -08:00
parent e66c0ebec1
commit 4b896fd90f
2 changed files with 36 additions and 54 deletions

View file

@ -161,10 +161,16 @@ function productFromApi(p) {
p?.CountDetails?.PriceText || p?.CountDetails?.PriceText ||
(Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : ""); (Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
const upc = String(p.UPC || "").trim(); const upc = String(p.UPC || "").trim();
const rawKey = upc || String(p.ProductStoreID || "").trim() || String(p.ProductID || "").trim();
let rawKey = "";
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url }); const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
const img = normalizeAbsUrl(p.ImageURL); const img = normalizeAbsUrl(p.ImageURL);
return { return {

View file

@ -1,11 +1,6 @@
// src/utils/sku.js
"use strict"; "use strict";
// Alberta CSPC / product code is 6 digits. Some stores label it "SKU".
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function fnv1a32(str) { function fnv1a32(str) {
let h = 0x811c9dc5; let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
@ -15,71 +10,52 @@ function fnv1a32(str) {
return (h >>> 0).toString(16).padStart(8, "0"); return (h >>> 0).toString(16).padStart(8, "0");
} }
function normalizeUpc(v) { function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function normalizeUpcDigits(v) {
const m = String(v ?? "").match(/\b(\d{12,14})\b/); const m = String(v ?? "").match(/\b(\d{12,14})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
// Other stable-ish numeric IDs (e.g. ProductStoreID), keep bounded function normalizeIdDigits(v) {
function normalizeNumericId(v) {
const m = String(v ?? "").match(/\b(\d{4,11})\b/); const m = String(v ?? "").match(/\b(\d{4,11})\b/);
return m ? m[1] : ""; return m ? m[1] : "";
} }
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
function makeSyntheticSkuKey({ storeLabel, url }) { function makeSyntheticSkuKey({ storeLabel, url }) {
const store = String(storeLabel || "store").trim().toLowerCase(); const store = String(storeLabel || "store");
let u = String(url || "").trim(); const u = String(url || "");
if (!u) return ""; if (!u) return "";
// Normalize common "same product, different slug" cases.
// This is intentionally conservative and forward-only: it only changes the
// *synthetic* ID when we otherwise have no real SKU.
try {
const U = new URL(u);
// drop query/hash
U.search = "";
U.hash = "";
// normalize path
let p = U.pathname || "";
// Common pattern: /product/preorder-<slug>/ becomes /product/<slug>/
p = p.replace(/\/product\/preorder-([a-z0-9-]+)\/?$/i, "/product/$1/");
// also normalize trailing slash
if (!p.endsWith("/")) p += "/";
U.pathname = p;
u = U.toString();
} catch {
// If URL() parsing fails, do a minimal string normalize.
u = u.replace(/\/product\/preorder-([a-z0-9-]+)\/?$/i, "/product/$1/");
}
return `u:${fnv1a32(`${store}|${u}`)}`; return `u:${fnv1a32(`${store}|${u}`)}`;
} }
/** /**
* For DB + comparisons: * Behavior:
* - If we can extract a real 6-digit SKU, use it. * - CSPC 6-digit => "123456"
* - Else if UPC-ish digits exist, store as upc:<digits> (low priority but stable) * - explicit upc:id => "upc:012345678901"
* - Else if other numeric id exists, store as id:<digits> * - explicit id: => "id:12345"
* - Else if v already looks like u:xxxx, keep it. * - existing u: => keep
* - Else if sku missing, generate u:hash(store|url) if possible. * - else => u:<fnv(store|url)> (old recipe)
*/ */
function normalizeSkuKey(v, { storeLabel, url } = {}) { function normalizeSkuKey(v, { storeLabel, url } = {}) {
const raw = String(v ?? "").trim(); const raw = String(v ?? "").trim();
const cspc = normalizeCspc(raw); const cspc = normalizeCspc(raw);
if (cspc) return cspc; if (cspc) return cspc;
const upc = normalizeUpc(raw); // NEW: only if explicitly tagged, so legacy behavior doesn't change
if (upc) return `upc:${upc}`; if (/^upc:/i.test(raw)) {
const upc = normalizeUpcDigits(raw);
const nid = normalizeNumericId(raw); return upc ? `upc:${upc}` : "";
if (nid) return `id:${nid}`; }
if (/^id:/i.test(raw)) {
const id = normalizeIdDigits(raw);
return id ? `id:${id}` : "";
}
if (raw.startsWith("u:")) return raw; if (raw.startsWith("u:")) return raw;
@ -87,4 +63,4 @@ function normalizeSkuKey(v, { storeLabel, url } = {}) {
return syn || ""; return syn || "";
} }
module.exports = { normalizeCspc, normalizeUpc, normalizeSkuKey, makeSyntheticSkuKey }; module.exports = { normalizeCspc, normalizeSkuKey, makeSyntheticSkuKey };