spirit-tracker/viz/app/linker/suggestions.js
Brennan Wilkes (Text Groove) 13e691f0d0 UX Improvements
2026-02-09 22:07:13 -08:00

608 lines
20 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// viz/app/linker/suggestions.js
import { tokenizeQuery, normSearchText } from "../sku.js";
import {
smwsKeyFromName,
extractAgeFromText,
filterSimTokens,
tokenContainmentScore,
fastSimilarityScore,
similarityScore,
} from "./similarity.js";
/* ---------------- Randomization helpers ---------------- */
function mulberry32(seed) {
let t = seed >>> 0;
return function () {
t += 0x6d2b79f5;
let x = Math.imul(t ^ (t >>> 15), 1 | t);
x ^= x + Math.imul(x ^ (x >>> 7), 61 | x);
return ((x ^ (x >>> 14)) >>> 0) / 4294967296;
};
}
function shuffleInPlace(arr, rnd) {
for (let i = arr.length - 1; i > 0; i--) {
const j = (rnd() * (i + 1)) | 0;
const tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
return arr;
}
/* ---------------- Suggestion helpers ---------------- */
export function topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus) {
const scored = [];
for (const it of allAgg) {
if (!it) continue;
// if (mappedSkus && mappedSkus.has(String(it.sku))) continue;
if (otherPinnedSku && String(it.sku) === String(otherPinnedSku)) continue;
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum !== null ? 1 : 0;
const hasName = it.name ? 1 : 0;
const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0;
scored.push({ it, s: stores * 2 + hasPrice * 1.2 + hasName * 1.0 + unknown * 0.6 });
}
scored.sort((a, b) => b.s - a.s);
return scored.slice(0, limit).map((x) => x.it);
}
// viz/app/linker/suggestions.js
// (requires fnv1a32u(str) helper to exist in this file)
export function recommendSimilar(
allAgg,
pinned,
limit,
otherPinnedSku,
mappedSkus,
isIgnoredPairFn,
sizePenaltyFn,
pricePenaltyFn,
sameStoreFn,
sameGroupFn
) {
if (!pinned || !pinned.name) return topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus);
const pinnedSku = String(pinned.sku || "");
const otherSku = otherPinnedSku ? String(otherPinnedSku) : "";
const base = String(pinned.name || "");
const pinNorm = normSearchText(pinned.name || "");
const pinRawToks = tokenizeQuery(pinNorm);
const pinToks = filterSimTokens(pinRawToks);
const pinBrand = pinToks[0] || "";
const pinAge = extractAgeFromText(pinNorm);
const pinnedSmws = smwsKeyFromName(pinned.name || "");
// ---- Tuning knobs ----
const MAX_SCAN = 5000; // cap for huge catalogs
const FULL_SCAN_UNDER = 12000; // ✅ scan everything if catalog is "small"
const MAX_CHEAP_KEEP = 320; // keep top candidates from cheap stage
const MAX_FINE = 70; // expensive score only on top-N
// ----------------------
// Faster "topK" keeper: only sorts occasionally.
function pushTopK(arr, item, k) {
arr.push(item);
if (arr.length >= k * 2) {
arr.sort((a, b) => b.s - a.s);
arr.length = k;
}
}
const cheap = [];
const nAll = allAgg.length || 0;
if (!nAll) return [];
// ✅ scan whole catalog when it's not huge
const scanN = nAll <= FULL_SCAN_UNDER ? nAll : Math.min(MAX_SCAN, nAll);
// ✅ rotate start to avoid alphabetical bias, but still cover scanN sequentially
const start = (fnv1a32u(pinnedSku || pinNorm) % nAll) >>> 0;
// Optional debug: uncomment to verify were actually hitting the region you expect
// console.log("[linker] recommendSimilar scan2", { pinnedSku, nAll, scanN, start, startName: allAgg[start]?.name });
for (let i = 0; i < scanN; i++) {
const it = allAgg[(start + i) % nAll];
if (!it) continue;
const itSku = String(it.sku || "");
if (!itSku) continue;
if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue;
// HARD BLOCKS ONLY:
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
// (Optional) original mapped exclusion lives here in your codebase.
// Keep it if you want, but it wasn't your issue:
// if (mappedSkus && mappedSkus.has(itSku)) continue;
// SMWS exact NUM.NUM match => keep at top
if (pinnedSmws) {
const k = smwsKeyFromName(it.name || "");
if (k && k === pinnedSmws) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
pushTopK(
cheap,
{ it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null },
MAX_CHEAP_KEEP
);
continue;
}
}
const itNorm = normSearchText(it.name || "");
if (!itNorm) continue;
const itRawToks = tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks);
if (!itToks.length) continue;
const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
// Cheap score first (no Levenshtein)
let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm);
if (s0 <= 0) s0 = 0.01 + 0.25 * contain;
// Soft first-token mismatch penalty (never blocks)
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s0 *= Math.min(1.0, mult);
}
// Size penalty early
if (typeof sizePenaltyFn === "function") {
s0 *= sizePenaltyFn(pinnedSku, itSku);
}
// Price penalty early
if (typeof pricePenaltyFn === "function") {
s0 *= pricePenaltyFn(pinnedSku, itSku);
}
// Age handling early
const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) {
if (pinAge === itAge) s0 *= 1.6;
else s0 *= 0.22;
}
// Unknown boost
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s0 *= 1.08;
pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP);
}
// Final trim/sort for cheap stage
cheap.sort((a, b) => b.s - a.s);
if (cheap.length > MAX_CHEAP_KEEP) cheap.length = MAX_CHEAP_KEEP;
// Fine stage: expensive scoring only on top candidates
const fine = [];
for (const x of cheap.slice(0, MAX_FINE)) {
const it = x.it;
const itSku = String(it.sku || "");
let s = similarityScore(base, it.name || "");
if (s <= 0) continue;
const itNorm = x.itNorm || normSearchText(it.name || "");
const itRawToks = x.itRawToks || tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks);
const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s *= Math.min(1.0, mult);
if (s <= 0) continue;
}
if (typeof sizePenaltyFn === "function") {
s *= sizePenaltyFn(pinnedSku, itSku);
if (s <= 0) continue;
}
if (typeof pricePenaltyFn === "function") {
s *= pricePenaltyFn(pinnedSku, itSku);
if (s <= 0) continue;
}
const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) {
if (pinAge === itAge) s *= 2.0;
else s *= 0.15;
}
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s *= 1.12;
fine.push({ it, s });
}
fine.sort((a, b) => b.s - a.s);
const out = fine.slice(0, limit).map((x) => x.it);
if (out.length) return out;
// Fallback (unchanged)
const fallback = [];
for (const it of allAgg) {
if (!it) continue;
const itSku = String(it.sku || "");
if (!itSku) continue;
if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue;
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum !== null ? 1 : 0;
const hasName = it.name ? 1 : 0;
fallback.push({ it, s: stores * 2 + hasPrice * 1.2 + hasName * 1.0 });
if (fallback.length >= 250) break;
}
fallback.sort((a, b) => b.s - a.s);
return fallback.slice(0, limit).map((x) => x.it);
}
export function computeInitialPairsFast(
allAgg,
mappedSkus,
limitPairs,
isIgnoredPairFn,
sameStoreFn,
sameGroupFn, // ✅ NEW
sizePenaltyFn,
pricePenaltyFn
) {
const itemsAll = allAgg.filter((it) => !!it);
const seed = (Date.now() ^ ((Math.random() * 1e9) | 0)) >>> 0;
const rnd = mulberry32(seed);
const itemsShuf = itemsAll.slice();
shuffleInPlace(itemsShuf, rnd);
// Bigger cap is fine; still bounded
const WORK_CAP = Math.min(9000, itemsShuf.length);
const workAll = itemsShuf.length > WORK_CAP ? itemsShuf.slice(0, WORK_CAP) : itemsShuf;
// Unmapped-only view for normal similarity stage
const work = workAll.filter((it) => {
if (!it) return false;
return !(mappedSkus && mappedSkus.has(String(it.sku)));
});
function itemRank(it) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
const hasName = it.name ? 1 : 0;
const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0;
return stores * 3 + hasPrice * 2 + hasName * 0.5 + unknown * 0.25;
}
// --- SMWS exact-code pairs first (now blocks sameGroup + mapped) ---
function smwsPairsFirst(workArr, limit) {
const buckets = new Map(); // code -> items[]
for (const it of workArr) {
if (!it) continue;
const sku = String(it.sku || "");
if (!sku) continue;
// ✅ NEW: keep SMWS stage unmapped-only
if (mappedSkus && mappedSkus.has(sku)) continue;
const code = smwsKeyFromName(it.name || "");
if (!code) continue;
let arr = buckets.get(code);
if (!arr) buckets.set(code, (arr = []));
arr.push(it);
}
const candPairs = [];
for (const arr0 of buckets.values()) {
if (!arr0 || arr0.length < 2) continue;
const arr = arr0
.slice()
.sort((a, b) => itemRank(b) - itemRank(a))
.slice(0, 80);
const anchor = arr.slice().sort((a, b) => itemRank(b) - itemRank(a))[0];
if (!anchor) continue;
for (const u of arr) {
if (u === anchor) continue;
const a = anchor;
const b = u;
const aSku = String(a.sku || "");
const bSku = String(b.sku || "");
if (!aSku || !bSku || aSku === bSku) continue;
if (typeof sameStoreFn === "function" && sameStoreFn(aSku, bSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) continue;
// ✅ NEW: do not suggest if already linked
if (typeof sameGroupFn === "function" && sameGroupFn(aSku, bSku)) continue;
// ✅ NEW: extra safety (should already be unmapped-only, but keep)
if (mappedSkus && (mappedSkus.has(aSku) || mappedSkus.has(bSku))) continue;
const s = 1e9 + itemRank(a) + itemRank(b);
candPairs.push({ a, b, score: s });
}
}
candPairs.sort((x, y) => y.score - x.score);
const usedUnmapped = new Set();
const out0 = [];
for (const p of candPairs) {
const bSku = String(p.b.sku || "");
if (!bSku) continue;
if (usedUnmapped.has(bSku)) continue;
usedUnmapped.add(bSku);
out0.push(p);
if (out0.length >= limit) break;
}
return { pairs: out0, usedUnmapped };
}
// ✅ CHANGED: SMWS stage now runs on `work` (unmapped-only), not `workAll`
const smwsFirst = smwsPairsFirst(work, limitPairs);
const used = new Set(smwsFirst.usedUnmapped);
const out = smwsFirst.pairs.slice();
if (out.length >= limitPairs) return out.slice(0, limitPairs);
// --- Improved general pairing logic ---
const seeds = topSuggestions(work, Math.min(220, work.length), "", mappedSkus).filter(
(it) => !used.has(String(it?.sku || ""))
);
// Build token buckets over normalized names
const TOKEN_BUCKET_CAP = 700;
const tokMap = new Map(); // token -> items[]
const itemRawToks = new Map(); // sku -> raw tokens
const itemNorm = new Map(); // sku -> norm name
const itemFilt = new Map(); // sku -> filtered tokens
for (const it of work) {
const sku = String(it.sku || "");
if (!sku) continue;
const n = normSearchText(it.name || "");
const raw = tokenizeQuery(n);
const filt = filterSimTokens(raw);
itemNorm.set(sku, n);
itemRawToks.set(sku, raw);
itemFilt.set(sku, filt);
for (const t of filt.slice(0, 12)) {
let arr = tokMap.get(t);
if (!arr) tokMap.set(t, (arr = []));
if (arr.length < TOKEN_BUCKET_CAP) arr.push(it);
}
}
const bestByPair = new Map();
const MAX_CAND_TOTAL = 450;
const MAX_FINE = 18;
for (const a of seeds) {
const aSku = String(a.sku || "");
if (!aSku || used.has(aSku)) continue;
const aNorm = itemNorm.get(aSku) || normSearchText(a.name || "");
const aRaw = itemRawToks.get(aSku) || tokenizeQuery(aNorm);
const aFilt = itemFilt.get(aSku) || filterSimTokens(aRaw);
if (!aFilt.length) continue;
const aBrand = aFilt[0] || "";
const aAge = extractAgeFromText(aNorm);
// Gather candidates from token buckets
const cand = new Map();
for (const t of aFilt.slice(0, 10)) {
const arr = tokMap.get(t);
if (!arr) continue;
for (let i = 0; i < arr.length && cand.size < MAX_CAND_TOTAL; i++) {
const b = arr[i];
if (!b) continue;
const bSku = String(b.sku || "");
if (!bSku || bSku === aSku) continue;
if (used.has(bSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) continue;
if (typeof sameStoreFn === "function" && sameStoreFn(aSku, bSku)) continue;
// ✅ NEW: block already-linked groups here too
if (typeof sameGroupFn === "function" && sameGroupFn(aSku, bSku)) continue;
cand.set(bSku, b);
}
if (cand.size >= MAX_CAND_TOTAL) break;
}
if (!cand.size) continue;
// Cheap score stage
const cheap = [];
for (const b of cand.values()) {
const bSku = String(b.sku || "");
const bNorm = itemNorm.get(bSku) || normSearchText(b.name || "");
const bRaw = itemRawToks.get(bSku) || tokenizeQuery(bNorm);
const bFilt = itemFilt.get(bSku) || filterSimTokens(bRaw);
if (!bFilt.length) continue;
const contain = tokenContainmentScore(aRaw, bRaw);
const bBrand = bFilt[0] || "";
const firstMatch = aBrand && bBrand && aBrand === bBrand;
let s = fastSimilarityScore(aRaw, bRaw, aNorm, bNorm);
if (s <= 0) s = 0.01 + 0.25 * contain;
if (!firstMatch) {
const smallN = Math.min(aFilt.length || 0, bFilt.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s *= Math.min(1.0, mult);
}
if (typeof sizePenaltyFn === "function") s *= sizePenaltyFn(aSku, bSku);
if (typeof pricePenaltyFn === "function") s *= pricePenaltyFn(aSku, bSku);
const bAge = extractAgeFromText(bNorm);
if (aAge && bAge) {
if (aAge === bAge) s *= 1.6;
else s *= 0.22;
}
if (String(aSku).startsWith("u:") || String(bSku).startsWith("u:")) s *= 1.06;
if (s > 0) cheap.push({ b, s, bNorm, bRaw, bFilt, contain, firstMatch, bAge });
}
if (!cheap.length) continue;
cheap.sort((x, y) => y.s - x.s);
// Fine stage
let bestB = null;
let bestS = 0;
for (const x of cheap.slice(0, MAX_FINE)) {
const b = x.b;
const bSku = String(b.sku || "");
let s = similarityScore(a.name || "", b.name || "");
if (s <= 0) continue;
if (!x.firstMatch) {
const smallN = Math.min(aFilt.length || 0, (x.bFilt || []).length || 0);
let mult = 0.10 + 0.95 * x.contain;
if (smallN <= 3 && x.contain < 0.78) mult *= 0.22;
s *= Math.min(1.0, mult);
if (s <= 0) continue;
}
if (typeof sizePenaltyFn === "function") {
s *= sizePenaltyFn(aSku, bSku);
if (s <= 0) continue;
}
if (typeof pricePenaltyFn === "function") {
s *= pricePenaltyFn(aSku, bSku);
if (s <= 0) continue;
}
if (aAge && x.bAge) {
if (aAge === x.bAge) s *= 2.0;
else s *= 0.15;
}
if (String(aSku).startsWith("u:") || String(bSku).startsWith("u:")) s *= 1.10;
if (s > bestS) {
bestS = s;
bestB = b;
}
}
if (!bestB || bestS < 0.50) continue;
const bSku = String(bestB.sku || "");
if (!bSku || used.has(bSku)) continue;
const key = aSku < bSku ? `${aSku}|${bSku}` : `${bSku}|${aSku}`;
const prev = bestByPair.get(key);
if (!prev || bestS > prev.score) bestByPair.set(key, { a, b: bestB, score: bestS });
}
const pairs = Array.from(bestByPair.values());
pairs.sort((x, y) => y.score - x.score);
// ---- light randomness inside a top band ----
const need = Math.max(0, limitPairs - out.length);
if (!need) return out.slice(0, limitPairs);
const TOP_BAND = Math.min(700, pairs.length);
const JITTER = 0.08;
const band = pairs.slice(0, TOP_BAND).map((p) => {
const jitter = (rnd() - 0.5) * JITTER;
return { ...p, _rank: p.score * (1 + jitter) };
});
band.sort((a, b) => b._rank - a._rank);
function tryTake(p) {
const aSku = String(p.a.sku || "");
const bSku = String(p.b.sku || "");
if (!aSku || !bSku || aSku === bSku) return false;
if (used.has(aSku) || used.has(bSku)) return false;
if (typeof sameStoreFn === "function" && sameStoreFn(aSku, bSku)) return false;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) return false;
// ✅ NEW: block already-linked groups here too
if (typeof sameGroupFn === "function" && sameGroupFn(aSku, bSku)) return false;
used.add(aSku);
used.add(bSku);
out.push({ a: p.a, b: p.b, score: p.score });
return true;
}
for (const p of band) {
if (out.length >= limitPairs) break;
tryTake(p);
}
if (out.length < limitPairs) {
for (let i = TOP_BAND; i < pairs.length; i++) {
if (out.length >= limitPairs) break;
tryTake(pairs[i]);
}
}
return out.slice(0, limitPairs);
}
function fnv1a32u(str) {
let h = 0x811c9dc5;
str = String(str || "");
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return h >>> 0;
}