From 3d2feaa8eeee41fed64645b5d7d516393c0dd1d2 Mon Sep 17 00:00:00 2001 From: "Brennan Wilkes (Text Groove)" Date: Thu, 22 Jan 2026 16:57:22 -0800 Subject: [PATCH] feat: SMWS suggestions --- viz/app/linker_page.js | 173 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 8 deletions(-) diff --git a/viz/app/linker_page.js b/viz/app/linker_page.js index de669a8..0d0992c 100644 --- a/viz/app/linker_page.js +++ b/viz/app/linker_page.js @@ -348,6 +348,79 @@ function recommendSimilar(allAgg, pinned, limit, otherPinnedSku, mappedSkus, isI return scored.slice(0, limit).map((x) => x.it); } +function computeSmwsPairsFirst(work, mappedSkus, limitPairs, isIgnoredPairFn) { + const buckets = new Map(); // code -> items[] + for (const it of work) { + if (!it) continue; + const sku = String(it.sku || ""); + if (!sku) continue; + if (mappedSkus && mappedSkus.has(sku)) continue; + + const code = smwsKeyFromName(it.name || ""); + if (!code) continue; + + let arr = buckets.get(code); + if (!arr) buckets.set(code, (arr = [])); + arr.push(it); + } + + // score within a code bucket: prefer “better”/more complete listings + function itemRank(it) { + const stores = it.stores ? it.stores.size : 0; + const hasPrice = it.cheapestPriceNum != null ? 1 : 0; + const hasName = it.name ? 1 : 0; + const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0; + return stores * 3 + hasPrice * 2 + hasName * 0.5 + unknown * 0.25; + } + + const candPairs = []; + for (const arr of buckets.values()) { + if (!arr || arr.length < 2) continue; + + // keep buckets bounded + arr.sort((a, b) => itemRank(b) - itemRank(a)); + const top = arr.slice(0, 60); + + // generate pair candidates (bounded) + const MAX_PAIR_TRIES = 800; + let tries = 0; + for (let i = 0; i < top.length && tries < MAX_PAIR_TRIES; i++) { + const a = top[i]; + const aSku = String(a.sku || ""); + for (let j = i + 1; j < top.length && tries < MAX_PAIR_TRIES; j++) { + tries++; + const b = top[j]; + const bSku = String(b.sku || ""); + if (!aSku || !bSku || aSku === bSku) continue; + if (storesOverlap(a, b)) continue; + if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) continue; + + // big base so these sort above non-SMWS + const s = 1e9 + itemRank(a) + itemRank(b); + candPairs.push({ a, b, score: s }); + } + } + } + + candPairs.sort((x, y) => y.score - x.score); + + const used = new Set(); + const out = []; + for (const p of candPairs) { + const aSku = String(p.a.sku || ""); + const bSku = String(p.b.sku || ""); + if (!aSku || !bSku) continue; + if (used.has(aSku) || used.has(bSku)) continue; + if (storesOverlap(p.a, p.b)) continue; + + used.add(aSku); + used.add(bSku); + out.push(p); + if (out.length >= limitPairs) break; + } + + return { pairs: out, used }; +} function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn) { const items = allAgg.filter((it) => { @@ -364,7 +437,89 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn const WORK_CAP = 5000; const work = itemsShuf.length > WORK_CAP ? itemsShuf.slice(0, WORK_CAP) : itemsShuf; - const seeds = topSuggestions(work, Math.min(400, work.length), "", mappedSkus); + // --- NEW: SMWS exact-code pairs first (guaranteed matches) --- + function itemRank(it) { + const stores = it.stores ? it.stores.size : 0; + const hasPrice = it.cheapestPriceNum != null ? 1 : 0; + const hasName = it.name ? 1 : 0; + const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0; + return stores * 3 + hasPrice * 2 + hasName * 0.5 + unknown * 0.25; + } + + function smwsPairsFirst(workArr, limit) { + const buckets = new Map(); // code -> items[] + for (const it of workArr) { + if (!it) continue; + const sku = String(it.sku || ""); + if (!sku) continue; + if (mappedSkus && mappedSkus.has(sku)) continue; + + const code = smwsKeyFromName(it.name || ""); + if (!code) continue; + + let arr = buckets.get(code); + if (!arr) buckets.set(code, (arr = [])); + arr.push(it); + } + + const candPairs = []; + for (const arr of buckets.values()) { + if (!arr || arr.length < 2) continue; + + // Keep bucket bounded (avoid O(n^2) explosions) + arr.sort((a, b) => itemRank(b) - itemRank(a)); + const top = arr.slice(0, 60); + + const MAX_PAIR_TRIES = 800; + let tries = 0; + for (let i = 0; i < top.length && tries < MAX_PAIR_TRIES; i++) { + const a = top[i]; + const aSku = String(a.sku || ""); + for (let j = i + 1; j < top.length && tries < MAX_PAIR_TRIES; j++) { + tries++; + const b = top[j]; + const bSku = String(b.sku || ""); + if (!aSku || !bSku || aSku === bSku) continue; + if (storesOverlap(a, b)) continue; + if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) + continue; + + // Big base so these float above non-SMWS pairs + const s = 1e9 + itemRank(a) + itemRank(b); + candPairs.push({ a, b, score: s }); + } + } + } + + candPairs.sort((x, y) => y.score - x.score); + + const used0 = new Set(); + const out0 = []; + for (const p of candPairs) { + const aSku = String(p.a.sku || ""); + const bSku = String(p.b.sku || ""); + if (!aSku || !bSku) continue; + if (used0.has(aSku) || used0.has(bSku)) continue; + if (storesOverlap(p.a, p.b)) continue; + + used0.add(aSku); + used0.add(bSku); + out0.push(p); + if (out0.length >= limit) break; + } + return { pairs: out0, used: used0 }; + } + + const smwsFirst = smwsPairsFirst(work, limitPairs); + const used = new Set(smwsFirst.used); + const out = smwsFirst.pairs.slice(); + + if (out.length >= limitPairs) return out.slice(0, limitPairs); + + // --- Existing logic continues (fills remaining slots), but avoid reusing SMWS-picked SKUs --- + const seeds = topSuggestions(work, Math.min(400, work.length), "", mappedSkus).filter( + (it) => !used.has(String(it?.sku || "")) + ); const TOKEN_BUCKET_CAP = 500; const tokMap = new Map(); @@ -372,9 +527,7 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn const itemNormName = new Map(); for (const it of work) { - const toks = Array.from(new Set(tokenizeQuery(it.name || ""))) - .filter(Boolean) - .slice(0, 10); + const toks = Array.from(new Set(tokenizeQuery(it.name || ""))).filter(Boolean).slice(0, 10); itemTokens.set(it.sku, toks); itemNormName.set(it.sku, normSearchText(it.name || "")); for (const t of toks) { @@ -390,8 +543,10 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn for (const a of seeds) { const aSku = String(a.sku || ""); + if (!aSku || used.has(aSku)) continue; + const aToks = itemTokens.get(aSku) || []; - if (!aSku || !aToks.length) continue; + if (!aToks.length) continue; const cand = new Map(); for (const t of aToks) { @@ -403,6 +558,7 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn if (!b) continue; const bSku = String(b.sku || ""); if (!bSku || bSku === aSku) continue; + if (used.has(bSku)) continue; if (mappedSkus && mappedSkus.has(bSku)) continue; if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) @@ -440,6 +596,8 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn if (!bestB || bestS < 0.6) continue; const bSku = String(bestB.sku || ""); + if (!bSku || used.has(bSku)) continue; + const key = aSku < bSku ? `${aSku}|${bSku}` : `${bSku}|${aSku}`; const prev = bestByPair.get(key); if (!prev || bestS > prev.score) bestByPair.set(key, { a, b: bestB, score: bestS }); @@ -448,8 +606,6 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn const pairs = Array.from(bestByPair.values()); pairs.sort((x, y) => y.score - x.score); - const used = new Set(); - const out = []; for (const p of pairs) { const aSku = String(p.a.sku || ""); const bSku = String(p.b.sku || ""); @@ -462,7 +618,8 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn out.push({ a: p.a, b: p.b, score: p.score }); if (out.length >= limitPairs) break; } - return out; + + return out.slice(0, limitPairs); } /* ---------------- Page ---------------- */