This commit is contained in:
Brennan Wilkes (Text Groove) 2026-02-01 11:25:40 -08:00
parent 04d181da35
commit 2356eb8f8f

View file

@ -51,218 +51,211 @@ export function topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus) {
return scored.slice(0, limit).map((x) => x.it); return scored.slice(0, limit).map((x) => x.it);
} }
// viz/app/linker/suggestions.js
// (requires fnv1a32u(str) helper to exist in this file)
export function recommendSimilar( export function recommendSimilar(
allAgg, allAgg,
pinned, pinned,
limit, limit,
otherPinnedSku, otherPinnedSku,
mappedSkus, mappedSkus,
isIgnoredPairFn, isIgnoredPairFn,
sizePenaltyFn, sizePenaltyFn,
sameStoreFn, sameStoreFn,
sameGroupFn sameGroupFn
) { ) {
if (!pinned || !pinned.name) return topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus); if (!pinned || !pinned.name) return topSuggestions(allAgg, limit, otherPinnedSku, mappedSkus);
const pinnedSku = String(pinned.sku || ""); const pinnedSku = String(pinned.sku || "");
const otherSku = otherPinnedSku ? String(otherPinnedSku) : ""; const otherSku = otherPinnedSku ? String(otherPinnedSku) : "";
const base = String(pinned.name || ""); const base = String(pinned.name || "");
const pinNorm = normSearchText(pinned.name || ""); const pinNorm = normSearchText(pinned.name || "");
const pinRawToks = tokenizeQuery(pinNorm); const pinRawToks = tokenizeQuery(pinNorm);
const pinToks = filterSimTokens(pinRawToks); const pinToks = filterSimTokens(pinRawToks);
const pinBrand = pinToks[0] || ""; const pinBrand = pinToks[0] || "";
const pinAge = extractAgeFromText(pinNorm); const pinAge = extractAgeFromText(pinNorm);
const pinnedSmws = smwsKeyFromName(pinned.name || ""); const pinnedSmws = smwsKeyFromName(pinned.name || "");
// ---- Tuning knobs ---- // ---- Tuning knobs ----
const MAX_SCAN = 5000; // total work cap const MAX_SCAN = 5000; // cap for huge catalogs
const MAX_CHEAP_KEEP = 320; const FULL_SCAN_UNDER = 12000; // ✅ scan everything if catalog is "small"
const MAX_FINE = 70; const MAX_CHEAP_KEEP = 320; // keep top candidates from cheap stage
const WINDOWS = 4; // scan several windows to cover the catalog const MAX_FINE = 70; // expensive score only on top-N
// ---------------------- // ----------------------
function pushTopK(arr, item, k) { // Faster "topK" keeper: only sorts occasionally.
arr.push(item); function pushTopK(arr, item, k) {
if (arr.length > k) { arr.push(item);
arr.sort((a, b) => b.s - a.s); if (arr.length >= k * 2) {
arr.length = k; arr.sort((a, b) => b.s - a.s);
} arr.length = k;
}
const cheap = [];
const nAll = allAgg.length || 0;
if (!nAll) return [];
// Multi-window starts: deterministic, spread around the array
const h = fnv1a32u(pinnedSku || pinNorm);
const starts = [
h % nAll,
(Math.imul(h ^ 0x9e3779b9, 0x85ebca6b) >>> 0) % nAll,
(Math.imul(h ^ 0xc2b2ae35, 0x27d4eb2f) >>> 0) % nAll,
((h + (nAll >>> 1)) >>> 0) % nAll,
];
const scanN = Math.min(MAX_SCAN, nAll);
const perWin = Math.max(1, Math.floor(scanN / WINDOWS));
// Optional debug:
console.log("[linker] recommendSimilar scan", { pinnedSku, nAll, scanN, perWin, starts: starts.map(s => allAgg[s]?.name) });
let scanned = 0;
function consider(it) {
if (!it) return;
const itSku = String(it.sku || "");
if (!itSku) return;
if (itSku === pinnedSku) return;
if (otherSku && itSku === otherSku) return;
// HARD BLOCKS ONLY:
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) return;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) return;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) return;
// SMWS exact NUM.NUM match => keep at top
if (pinnedSmws) {
const k = smwsKeyFromName(it.name || "");
if (k && k === pinnedSmws) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
pushTopK(
cheap,
{ it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null },
MAX_CHEAP_KEEP
);
return;
} }
} }
const itNorm = normSearchText(it.name || ""); const cheap = [];
if (!itNorm) return; const nAll = allAgg.length || 0;
if (!nAll) return [];
const itRawToks = tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks); // ✅ scan whole catalog when it's not huge
if (!itToks.length) return; const scanN = nAll <= FULL_SCAN_UNDER ? nAll : Math.min(MAX_SCAN, nAll);
const itBrand = itToks[0] || ""; // ✅ rotate start to avoid alphabetical bias, but still cover scanN sequentially
const firstMatch = pinBrand && itBrand && pinBrand === itBrand; const start = (fnv1a32u(pinnedSku || pinNorm) % nAll) >>> 0;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
// Optional debug: uncomment to verify were actually hitting the region you expect
// Cheap score first (no Levenshtein) // console.log("[linker] recommendSimilar scan2", { pinnedSku, nAll, scanN, start, startName: allAgg[start]?.name });
let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm);
if (s0 <= 0) s0 = 0.01 + 0.25 * contain; for (let i = 0; i < scanN; i++) {
// Soft first-token mismatch penalty (never blocks)
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s0 *= Math.min(1.0, mult);
}
// Size penalty early
if (typeof sizePenaltyFn === "function") {
s0 *= sizePenaltyFn(pinnedSku, itSku);
}
// Age handling early
const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) {
if (pinAge === itAge) s0 *= 1.6;
else s0 *= 0.22;
}
// Unknown boost
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s0 *= 1.08;
pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP);
}
// Scan several windows, total capped at MAX_SCAN
for (let w = 0; w < WINDOWS && scanned < scanN; w++) {
const start = starts[w % starts.length];
const take = Math.min(perWin, scanN - scanned);
for (let i = 0; i < take; i++) {
const it = allAgg[(start + i) % nAll]; const it = allAgg[(start + i) % nAll];
consider(it); if (!it) continue;
const itSku = String(it.sku || "");
if (!itSku) continue;
if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue;
// HARD BLOCKS ONLY:
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
// (Optional) original mapped exclusion lives here in your codebase.
// Keep it if you want, but it wasn't your issue:
if (mappedSkus && mappedSkus.has(itSku)) continue;
// SMWS exact NUM.NUM match => keep at top
if (pinnedSmws) {
const k = smwsKeyFromName(it.name || "");
if (k && k === pinnedSmws) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
pushTopK(
cheap,
{ it, s: 1e9 + stores * 10 + hasPrice, itNorm: "", itRawToks: null },
MAX_CHEAP_KEEP
);
continue;
}
}
const itNorm = normSearchText(it.name || "");
if (!itNorm) continue;
const itRawToks = tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks);
if (!itToks.length) continue;
const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
// Cheap score first (no Levenshtein)
let s0 = fastSimilarityScore(pinRawToks, itRawToks, pinNorm, itNorm);
if (s0 <= 0) s0 = 0.01 + 0.25 * contain;
// Soft first-token mismatch penalty (never blocks)
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s0 *= Math.min(1.0, mult);
}
// Size penalty early
if (typeof sizePenaltyFn === "function") {
s0 *= sizePenaltyFn(pinnedSku, itSku);
}
// Age handling early
const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) {
if (pinAge === itAge) s0 *= 1.6;
else s0 *= 0.22;
}
// Unknown boost
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s0 *= 1.08;
pushTopK(cheap, { it, s: s0, itNorm, itRawToks }, MAX_CHEAP_KEEP);
} }
scanned += take;
} // Final trim/sort for cheap stage
cheap.sort((a, b) => b.s - a.s);
cheap.sort((a, b) => b.s - a.s); if (cheap.length > MAX_CHEAP_KEEP) cheap.length = MAX_CHEAP_KEEP;
// Fine stage: expensive scoring only on top candidates // Fine stage: expensive scoring only on top candidates
const fine = []; const fine = [];
for (const x of cheap.slice(0, MAX_FINE)) { for (const x of cheap.slice(0, MAX_FINE)) {
const it = x.it; const it = x.it;
const itSku = String(it.sku || ""); const itSku = String(it.sku || "");
let s = similarityScore(base, it.name || ""); let s = similarityScore(base, it.name || "");
if (s <= 0) continue;
const itNorm = x.itNorm || normSearchText(it.name || "");
const itRawToks = x.itRawToks || tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks);
const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s *= Math.min(1.0, mult);
if (s <= 0) continue; if (s <= 0) continue;
const itNorm = x.itNorm || normSearchText(it.name || "");
const itRawToks = x.itRawToks || tokenizeQuery(itNorm);
const itToks = filterSimTokens(itRawToks);
const itBrand = itToks[0] || "";
const firstMatch = pinBrand && itBrand && pinBrand === itBrand;
const contain = tokenContainmentScore(pinRawToks, itRawToks);
if (!firstMatch) {
const smallN = Math.min(pinToks.length || 0, itToks.length || 0);
let mult = 0.10 + 0.95 * contain;
if (smallN <= 3 && contain < 0.78) mult *= 0.22;
s *= Math.min(1.0, mult);
if (s <= 0) continue;
}
if (typeof sizePenaltyFn === "function") {
s *= sizePenaltyFn(pinnedSku, itSku);
if (s <= 0) continue;
}
const itAge = extractAgeFromText(itNorm);
if (pinAge && itAge) {
if (pinAge === itAge) s *= 2.0;
else s *= 0.15;
}
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s *= 1.12;
fine.push({ it, s });
} }
if (typeof sizePenaltyFn === "function") { fine.sort((a, b) => b.s - a.s);
s *= sizePenaltyFn(pinnedSku, itSku); const out = fine.slice(0, limit).map((x) => x.it);
if (s <= 0) continue; if (out.length) return out;
// Fallback (unchanged)
const fallback = [];
for (const it of allAgg) {
if (!it) continue;
const itSku = String(it.sku || "");
if (!itSku) continue;
if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue;
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum !== null ? 1 : 0;
const hasName = it.name ? 1 : 0;
fallback.push({ it, s: stores * 2 + hasPrice * 1.2 + hasName * 1.0 });
if (fallback.length >= 250) break;
} }
const itAge = extractAgeFromText(itNorm); fallback.sort((a, b) => b.s - a.s);
if (pinAge && itAge) { return fallback.slice(0, limit).map((x) => x.it);
if (pinAge === itAge) s *= 2.0;
else s *= 0.15;
}
if (pinnedSku.startsWith("u:") || itSku.startsWith("u:")) s *= 1.12;
if (s > 0) fine.push({ it, s });
} }
fine.sort((a, b) => b.s - a.s);
const out = fine.slice(0, limit).map((x) => x.it);
if (out.length) return out;
// Fallback: hard blocks only
const fallback = [];
for (const it of allAgg) {
if (!it) continue;
const itSku = String(it.sku || "");
if (!itSku) continue;
if (itSku === pinnedSku) continue;
if (otherSku && itSku === otherSku) continue;
if (typeof sameStoreFn === "function" && sameStoreFn(pinnedSku, itSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(pinnedSku, itSku)) continue;
if (typeof sameGroupFn === "function" && sameGroupFn(pinnedSku, itSku)) continue;
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum !== null ? 1 : 0;
const hasName = it.name ? 1 : 0;
fallback.push({ it, s: stores * 2 + hasPrice * 1.2 + hasName * 1.0 });
if (fallback.length >= 250) break;
}
fallback.sort((a, b) => b.s - a.s);
return fallback.slice(0, limit).map((x) => x.it);
}
export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) { export function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn, sameStoreFn) {