This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-01 11:17:17 -08:00
parent 7b341c2e07
commit d7b6323e27

View file

@ -51,7 +51,6 @@ export function topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus) {
return scored.slice(0, limit).map((x) => x.it); return scored.slice(0, limit).map((x) => x.it);
} }
// same behavior guarantees as your comment in linker_page.js
export function recommendSimilar( export function recommendSimilar(
allAgg, allAgg,
pinned, pinned,
@ -62,7 +61,7 @@ export function recommendSimilar(
sizePenaltyFn, sizePenaltyFn,
sameStoreFn, sameStoreFn,
sameGroupFn sameGroupFn
) { ) {
if (!pinned || !pinned.name) return topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus); if (!pinned || !pinned.name) return topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus);
const pinnedSku = String(pinned.sku || ""); const pinnedSku = String(pinned.sku || "");
@ -76,9 +75,11 @@ export function recommendSimilar(
const pinAge = extractAgeFromText(pinNorm); const pinAge = extractAgeFromText(pinNorm);
const pinnedSmws = smwsKeyFromName(pinned.name || ""); const pinnedSmws = smwsKeyFromName(pinned.name || "");
// ---- Tuning knobs (performance + not-overzealous) ----
const MAX_SCAN = 5000; const MAX_SCAN = 5000;
const MAX_CHEAP_KEEP = 320; const MAX_CHEAP_KEEP = 320;
const MAX_FINE = 70; const MAX_FINE = 70;
// ------------------------------------------------------
function pushTopK(arr, item, k) { function pushTopK(arr, item, k) {
arr.push(item); arr.push(item);
@ -89,11 +90,18 @@ export function recommendSimilar(
} }
const cheap = []; const cheap = [];
let scanned = 0;
for (const it of allAgg) { // ✅ FIX: avoid alphabetical bias by scanning a rotating window
const nAll = allAgg.length || 0;
const scanN = Math.min(MAX_SCAN, nAll);
const start = nAll ? (fnv1a32u(pinnedSku || pinNorm) % nAll) : 0;
// Optional debug:
// console.log("[linker] recommendSimilar scan", { pinnedSku, nAll, scanN, start, startName: allAgg[start]?.name });
for (let i = 0; i < scanN; i++) {
const it = allAgg[(start + i) % nAll];
if (!it) continue; if (!it) continue;
if (scanned++ > MAX_SCAN) break;
const itSku = String(it.sku || ""); const itSku = String(it.sku || "");
if (!itSku) continue; if (!itSku) continue;
@ -101,16 +109,22 @@ export function recommendSimilar(
if (itSku === pinnedSku) continue; if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue; if (otherSku && itSku === otherSku) continue;
// HARD BLOCKS ONLY:
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue; if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue; if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue; if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
// SMWS exact NUM.NUM match => keep at top
if (pinnedSmws) { if (pinnedSmws) {
const k = smwsKeyFromName(it.name || ""); const k = smwsKeyFromName(it.name || "");
if (k && k === pinnedSmws) { if (k && k === pinnedSmws) {
const stores = it.stores ? it.stores.size : 0; const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0; const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
pushTopK(cheap, { it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null }, MAX_CHEAP_KEEP); pushTopK(
cheap,
{ it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null },
MAX_CHEAP_KEEP
);
continue; continue;
} }
} }
@ -126,9 +140,13 @@ export function recommendSimilar(
const firstMatch = pinBrand && itBrand && pinBrand === itBrand; const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks); const contain = tokenContainmentScore(pinRawToks, itRawToks);
// Cheap score first (no Levenshtein)
let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm); let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm);
// If fast score is 0, still allow it as "least bad"
if (s0 <= 0) s0 = 0.01 + 0.25 * contain; if (s0 <= 0) s0 = 0.01 + 0.25 * contain;
// Soft first-token mismatch penalty (never blocks)
if (!firstMatch) { if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0); const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain; let mult = 0.10 + 0.95 * contain;
@ -136,16 +154,19 @@ export function recommendSimilar(
s0 *= Math.min(1.0, mult); s0 *= Math.min(1.0, mult);
} }
// Size penalty early
if (typeof sizePenaltyFn === "function") { if (typeof sizePenaltyFn === "function") {
s0 *= sizePenaltyFn(pinnedSku, itSku); s0 *= sizePenaltyFn(pinnedSku, itSku);
} }
// Age handling early
const itAge = extractAgeFromText(itNorm); const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) { if (pinAge && itAge) {
if (pinAge === itAge) s0 *= 1.6; if (pinAge === itAge) s0 *= 1.6;
else s0 *= 0.22; else s0 *= 0.22;
} }
// Unknown boost
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s0 *= 1.08; if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s0 *= 1.08;
pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP); pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP);
@ -153,6 +174,7 @@ export function recommendSimilar(
cheap.sort((a, b) => b.s - a.s); cheap.sort((a, b) => b.s - a.s);
// Fine stage: expensive scoring only on top candidates
const fine = []; const fine = [];
for (const x of cheap.slice(0, MAX_FINE)) { for (const x of cheap.slice(0, MAX_FINE)) {
const it = x.it; const it = x.it;
@ -161,6 +183,7 @@ export function recommendSimilar(
let s = similarityScore(base, it.name || ""); let s = similarityScore(base, it.name || "");
if (s <= 0) continue; if (s <= 0) continue;
// Apply soft first-token mismatch penalty again
const itNorm = x.itNorm || normSearchText(it.name || ""); const itNorm = x.itNorm || normSearchText(it.name || "");
const itRawToks = x.itRawToks || tokenizeQuery(itNorm); const itRawToks = x.itRawToks || tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks); const itToks = filterSimTokens(itRawToks);
@ -194,8 +217,10 @@ export function recommendSimilar(
fine.sort((a, b) => b.s - a.s); fine.sort((a, b) => b.s - a.s);
const out = fine.slice(0, limit).map((x) => x.it); const out = fine.slice(0, limit).map((x) => x.it);
if (out.length) return out; if (out.length) return out;
// Fallback: "least bad" options with hard blocks only.
const fallback = []; const fallback = [];
for (const it of allAgg) { for (const it of allAgg) {
if (!it) continue; if (!it) continue;
@ -217,7 +242,7 @@ export function recommendSimilar(
fallback.sort((a, b) => b.s - a.s); fallback.sort((a, b) => b.s - a.s);
return fallback.slice(0, limit).map((x) => x.it); return fallback.slice(0, limit).map((x) => x.it);
} }
export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) { export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) {
const itemsAll = allAgg.filter((it) => !!it); const itemsAll = allAgg.filter((it) => !!it);
@ -456,3 +481,13 @@ export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnore
return out.slice(0, limitPairs); return out.slice(0, limitPairs);
} }
function fnv1a32u(str) {
let h = 0x811c9dc5;
str = String(str || "");
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
return h >>> 0;
}