This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-01 11:22:11 -08:00
parent d7b6323e27
commit 9e59fecc0d

View file

@ -51,6 +51,19 @@ export function topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus) {
return scored.slice(0, limit).map((x) => x.it); return scored.slice(0, limit).map((x) => x.it);
} }
// viz/app/linker/suggestions.js
import { tokenizeQuery, normSearchText } from "../sku.js";
import {
smwsKeyFromName,
extractAgeFromText,
filterSimTokens,
tokenContainmentScore,
fastSimilarityScore,
similarityScore,
} from "./similarity.js";
import { topSuggestions } from "./suggestions.js"; // if in same file, remove this line
export function recommendSimilar( export function recommendSimilar(
allAgg, allAgg,
pinned, pinned,
@ -75,11 +88,12 @@ export function recommendSimilar(
const pinAge = extractAgeFromText(pinNorm); const pinAge = extractAgeFromText(pinNorm);
const pinnedSmws = smwsKeyFromName(pinned.name || ""); const pinnedSmws = smwsKeyFromName(pinned.name || "");
// ---- Tuning knobs (performance + not-overzealous) ---- // ---- Tuning knobs ----
const MAX_SCAN = 5000; const MAX_SCAN = 5000; // total work cap
const MAX_CHEAP_KEEP = 320; const MAX_CHEAP_KEEP = 320;
const MAX_FINE = 70; const MAX_FINE = 70;
// ------------------------------------------------------ const WINDOWS = 4; // scan several windows to cover the catalog
// ----------------------
function pushTopK(arr, item, k) { function pushTopK(arr, item, k) {
arr.push(item); arr.push(item);
@ -91,28 +105,39 @@ export function recommendSimilar(
const cheap = []; const cheap = [];
// ✅ FIX: avoid alphabetical bias by scanning a rotating window
const nAll = allAgg.length || 0; const nAll = allAgg.length || 0;
if (!nAll) return [];
// Multi-window starts: deterministic, spread around the array
const h = fnv1a32u(pinnedSku || pinNorm);
const starts = [
h % nAll,
(Math.imul(h ^ 0x9e3779b9, 0x85ebca6b) >>> 0) % nAll,
(Math.imul(h ^ 0xc2b2ae35, 0x27d4eb2f) >>> 0) % nAll,
((h + (nAll >>> 1)) >>> 0) % nAll,
];
const scanN = Math.min(MAX_SCAN, nAll); const scanN = Math.min(MAX_SCAN, nAll);
const start = nAll ? (fnv1a32u(pinnedSku || pinNorm) % nAll) : 0; const perWin = Math.max(1, Math.floor(scanN / WINDOWS));
// Optional debug: // Optional debug:
// console.log("[linker] recommendSimilar scan", { pinnedSku, nAll, scanN, start, startName: allAgg[start]?.name }); // console.log("[linker] recommendSimilar scan", { pinnedSku, nAll, scanN, perWin, starts: starts.map(s => allAgg[s]?.name) });
for (let i = 0; i < scanN; i++) { let scanned = 0;
const it = allAgg[(start + i) % nAll];
if (!it) continue; function consider(it) {
if (!it) return;
const itSku = String(it.sku || ""); const itSku = String(it.sku || "");
if (!itSku) continue; if (!itSku) return;
if (itSku === pinnedSku) continue; if (itSku === pinnedSku) return;
if (otherSku && itSku === otherSku) continue; if (otherSku && itSku === otherSku) return;
// HARD BLOCKS ONLY: // HARD BLOCKS ONLY:
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue; if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) return;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue; if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) return;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue; if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) return;
// SMWS exact NUM.NUM match => keep at top // SMWS exact NUM.NUM match => keep at top
if (pinnedSmws) { if (pinnedSmws) {
@ -125,16 +150,16 @@ export function recommendSimilar(
{ it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null }, { it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null },
MAX_CHEAP_KEEP MAX_CHEAP_KEEP
); );
continue; return;
} }
} }
const itNorm = normSearchText(it.name || ""); const itNorm = normSearchText(it.name || "");
if (!itNorm) continue; if (!itNorm) return;
const itRawToks = tokenizeQuery(itNorm); const itRawToks = tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks); const itToks = filterSimTokens(itRawToks);
if (!itToks.length) continue; if (!itToks.length) return;
const itBrand = itToks[0] || ""; const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand; const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
@ -142,8 +167,6 @@ export function recommendSimilar(
// Cheap score first (no Levenshtein) // Cheap score first (no Levenshtein)
let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm); let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm);
// If fast score is 0, still allow it as "least bad"
if (s0 <= 0) s0 = 0.01 + 0.25 * contain; if (s0 <= 0) s0 = 0.01 + 0.25 * contain;
// Soft first-token mismatch penalty (never blocks) // Soft first-token mismatch penalty (never blocks)
@ -172,6 +195,18 @@ export function recommendSimilar(
pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP); pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP);
} }
// Scan several windows, total capped at MAX_SCAN
for (let w = 0; w < WINDOWS && scanned < scanN; w++) {
const start = starts[w % starts.length];
const take = Math.min(perWin, scanN - scanned);
for (let i = 0; i < take; i++) {
const it = allAgg[(start + i) % nAll];
consider(it);
}
scanned += take;
}
cheap.sort((a, b) => b.s - a.s); cheap.sort((a, b) => b.s - a.s);
// Fine stage: expensive scoring only on top candidates // Fine stage: expensive scoring only on top candidates
@ -183,7 +218,6 @@ export function recommendSimilar(
let s = similarityScore(base, it.name || ""); let s = similarityScore(base, it.name || "");
if (s <= 0) continue; if (s <= 0) continue;
// Apply soft first-token mismatch penalty again
const itNorm = x.itNorm || normSearchText(it.name || ""); const itNorm = x.itNorm || normSearchText(it.name || "");
const itRawToks = x.itRawToks || tokenizeQuery(itNorm); const itRawToks = x.itRawToks || tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks); const itToks = filterSimTokens(itRawToks);
@ -217,10 +251,9 @@ export function recommendSimilar(
fine.sort((a, b) => b.s - a.s); fine.sort((a, b) => b.s - a.s);
const out = fine.slice(0, limit).map((x) => x.it); const out = fine.slice(0, limit).map((x) => x.it);
if (out.length) return out; if (out.length) return out;
// Fallback: "least bad" options with hard blocks only. // Fallback: hard blocks only
const fallback = []; const fallback = [];
for (const it of allAgg) { for (const it of allAgg) {
if (!it) continue; if (!it) continue;
@ -244,6 +277,7 @@ export function recommendSimilar(
return fallback.slice(0, limit).map((x) => x.it); return fallback.slice(0, limit).map((x) => x.it);
} }
export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) { export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) {
const itemsAll = allAgg.filter((it) => !!it); const itemsAll = allAgg.filter((it) => !!it);