fix: UPC Normalization

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-29 12:51:02 -08:00
parent e66c0ebec1
commit 4b896fd90f
2 changed files with 36 additions and 54 deletions

View file

@ -161,10 +161,16 @@ function productFromApi(p) {
p?.CountDetails?.PriceText ||
(Number.isFinite(p?.Price) ? `$${Number(p.Price).toFixed(2)}` : "");
const upc = String(p.UPC || "").trim();
const rawKey = upc || String(p.ProductStoreID || "").trim() || String(p.ProductID || "").trim();
let rawKey = "";
if (upc) rawKey = `upc:${upc}`;
else if (p.ProductStoreID) rawKey = `id:${String(p.ProductStoreID).trim()}`;
else if (p.ProductID) rawKey = `id:${String(p.ProductID).trim()}`;
const sku = normalizeSkuKey(rawKey, { storeLabel: "Co-op World of Whisky", url });
const img = normalizeAbsUrl(p.ImageURL);
return {

View file

@ -1,11 +1,6 @@
// src/utils/sku.js
"use strict";
// Alberta CSPC / product code is 6 digits. Some stores label it "SKU".
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function fnv1a32(str) {
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
@ -15,71 +10,52 @@ function fnv1a32(str) {
return (h >>> 0).toString(16).padStart(8, "0");
}
function normalizeUpc(v) {
function normalizeCspc(v) {
const m = String(v ?? "").match(/\b(\d{6})\b/);
return m ? m[1] : "";
}
function normalizeUpcDigits(v) {
const m = String(v ?? "").match(/\b(\d{12,14})\b/);
return m ? m[1] : "";
}
// Other stable-ish numeric IDs (e.g. ProductStoreID), keep bounded
function normalizeNumericId(v) {
function normalizeIdDigits(v) {
const m = String(v ?? "").match(/\b(\d{4,11})\b/);
return m ? m[1] : "";
}
// IMPORTANT: keep old behavior exactly (no lowercasing, no url canonicalization)
function makeSyntheticSkuKey({ storeLabel, url }) {
const store = String(storeLabel || "store").trim().toLowerCase();
let u = String(url || "").trim();
const store = String(storeLabel || "store");
const u = String(url || "");
if (!u) return "";
// Normalize common "same product, different slug" cases.
// This is intentionally conservative and forward-only: it only changes the
// *synthetic* ID when we otherwise have no real SKU.
try {
const U = new URL(u);
// drop query/hash
U.search = "";
U.hash = "";
// normalize path
let p = U.pathname || "";
// Common pattern: /product/preorder-<slug>/ becomes /product/<slug>/
p = p.replace(/\/product\/preorder-([a-z0-9-]+)\/?$/i, "/product/$1/");
// also normalize trailing slash
if (!p.endsWith("/")) p += "/";
U.pathname = p;
u = U.toString();
} catch {
// If URL() parsing fails, do a minimal string normalize.
u = u.replace(/\/product\/preorder-([a-z0-9-]+)\/?$/i, "/product/$1/");
}
return `u:${fnv1a32(`${store}|${u}`)}`;
}
/**
* For DB + comparisons:
* - If we can extract a real 6-digit SKU, use it.
* - Else if UPC-ish digits exist, store as upc:<digits> (low priority but stable)
* - Else if other numeric id exists, store as id:<digits>
* - Else if v already looks like u:xxxx, keep it.
* - Else if sku missing, generate u:hash(store|url) if possible.
* Behavior:
* - CSPC 6-digit => "123456"
* - explicit upc:id => "upc:012345678901"
* - explicit id: => "id:12345"
* - existing u: => keep
* - else => u:<fnv(store|url)> (old recipe)
*/
function normalizeSkuKey(v, { storeLabel, url } = {}) {
const raw = String(v ?? "").trim();
const cspc = normalizeCspc(raw);
if (cspc) return cspc;
const upc = normalizeUpc(raw);
if (upc) return `upc:${upc}`;
const nid = normalizeNumericId(raw);
if (nid) return `id:${nid}`;
// NEW: only if explicitly tagged, so legacy behavior doesn't change
if (/^upc:/i.test(raw)) {
const upc = normalizeUpcDigits(raw);
return upc ? `upc:${upc}` : "";
}
if (/^id:/i.test(raw)) {
const id = normalizeIdDigits(raw);
return id ? `id:${id}` : "";
}
if (raw.startsWith("u:")) return raw;
@ -87,4 +63,4 @@ function normalizeSkuKey(v, { storeLabel, url } = {}) {
return syn || "";
}
module.exports = { normalizeCspc, normalizeUpc, normalizeSkuKey, makeSyntheticSkuKey };
module.exports = { normalizeCspc, normalizeSkuKey, makeSyntheticSkuKey };