feat: SMWS suggestions

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-22 16:57:22 -08:00
parent 0d3cc8b70b
commit 3d2feaa8ee

View file

@ -348,6 +348,79 @@ function recommendSimilar(allAgg, pinned, limit, otherPinnedSku, mappedSkus, isI
return scored.slice(0, limit).map((x) => x.it);
}
function computeSmwsPairsFirst(work, mappedSkus, limitPairs, isIgnoredPairFn) {
const buckets = new Map(); // code -> items[]
for (const it of work) {
if (!it) continue;
const sku = String(it.sku || "");
if (!sku) continue;
if (mappedSkus && mappedSkus.has(sku)) continue;
const code = smwsKeyFromName(it.name || "");
if (!code) continue;
let arr = buckets.get(code);
if (!arr) buckets.set(code, (arr = []));
arr.push(it);
}
// score within a code bucket: prefer “better”/more complete listings
function itemRank(it) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
const hasName = it.name ? 1 : 0;
const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0;
return stores * 3 + hasPrice * 2 + hasName * 0.5 + unknown * 0.25;
}
const candPairs = [];
for (const arr of buckets.values()) {
if (!arr || arr.length < 2) continue;
// keep buckets bounded
arr.sort((a, b) => itemRank(b) - itemRank(a));
const top = arr.slice(0, 60);
// generate pair candidates (bounded)
const MAX_PAIR_TRIES = 800;
let tries = 0;
for (let i = 0; i < top.length && tries < MAX_PAIR_TRIES; i++) {
const a = top[i];
const aSku = String(a.sku || "");
for (let j = i + 1; j < top.length && tries < MAX_PAIR_TRIES; j++) {
tries++;
const b = top[j];
const bSku = String(b.sku || "");
if (!aSku || !bSku || aSku === bSku) continue;
if (storesOverlap(a, b)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku)) continue;
// big base so these sort above non-SMWS
const s = 1e9 + itemRank(a) + itemRank(b);
candPairs.push({ a, b, score: s });
}
}
}
candPairs.sort((x, y) => y.score - x.score);
const used = new Set();
const out = [];
for (const p of candPairs) {
const aSku = String(p.a.sku || "");
const bSku = String(p.b.sku || "");
if (!aSku || !bSku) continue;
if (used.has(aSku) || used.has(bSku)) continue;
if (storesOverlap(p.a, p.b)) continue;
used.add(aSku);
used.add(bSku);
out.push(p);
if (out.length >= limitPairs) break;
}
return { pairs: out, used };
}
function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn) {
const items = allAgg.filter((it) => {
@ -364,7 +437,89 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
const WORK_CAP = 5000;
const work = itemsShuf.length > WORK_CAP ? itemsShuf.slice(0, WORK_CAP) : itemsShuf;
const seeds = topSuggestions(work, Math.min(400, work.length), "", mappedSkus);
// --- NEW: SMWS exact-code pairs first (guaranteed matches) ---
function itemRank(it) {
const stores = it.stores ? it.stores.size : 0;
const hasPrice = it.cheapestPriceNum != null ? 1 : 0;
const hasName = it.name ? 1 : 0;
const unknown = String(it.sku || "").startsWith("u:") ? 1 : 0;
return stores * 3 + hasPrice * 2 + hasName * 0.5 + unknown * 0.25;
}
function smwsPairsFirst(workArr, limit) {
const buckets = new Map(); // code -> items[]
for (const it of workArr) {
if (!it) continue;
const sku = String(it.sku || "");
if (!sku) continue;
if (mappedSkus && mappedSkus.has(sku)) continue;
const code = smwsKeyFromName(it.name || "");
if (!code) continue;
let arr = buckets.get(code);
if (!arr) buckets.set(code, (arr = []));
arr.push(it);
}
const candPairs = [];
for (const arr of buckets.values()) {
if (!arr || arr.length < 2) continue;
// Keep bucket bounded (avoid O(n^2) explosions)
arr.sort((a, b) => itemRank(b) - itemRank(a));
const top = arr.slice(0, 60);
const MAX_PAIR_TRIES = 800;
let tries = 0;
for (let i = 0; i < top.length && tries < MAX_PAIR_TRIES; i++) {
const a = top[i];
const aSku = String(a.sku || "");
for (let j = i + 1; j < top.length && tries < MAX_PAIR_TRIES; j++) {
tries++;
const b = top[j];
const bSku = String(b.sku || "");
if (!aSku || !bSku || aSku === bSku) continue;
if (storesOverlap(a, b)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku))
continue;
// Big base so these float above non-SMWS pairs
const s = 1e9 + itemRank(a) + itemRank(b);
candPairs.push({ a, b, score: s });
}
}
}
candPairs.sort((x, y) => y.score - x.score);
const used0 = new Set();
const out0 = [];
for (const p of candPairs) {
const aSku = String(p.a.sku || "");
const bSku = String(p.b.sku || "");
if (!aSku || !bSku) continue;
if (used0.has(aSku) || used0.has(bSku)) continue;
if (storesOverlap(p.a, p.b)) continue;
used0.add(aSku);
used0.add(bSku);
out0.push(p);
if (out0.length >= limit) break;
}
return { pairs: out0, used: used0 };
}
const smwsFirst = smwsPairsFirst(work, limitPairs);
const used = new Set(smwsFirst.used);
const out = smwsFirst.pairs.slice();
if (out.length >= limitPairs) return out.slice(0, limitPairs);
// --- Existing logic continues (fills remaining slots), but avoid reusing SMWS-picked SKUs ---
const seeds = topSuggestions(work, Math.min(400, work.length), "", mappedSkus).filter(
(it) => !used.has(String(it?.sku || ""))
);
const TOKEN_BUCKET_CAP = 500;
const tokMap = new Map();
@ -372,9 +527,7 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
const itemNormName = new Map();
for (const it of work) {
const toks = Array.from(new Set(tokenizeQuery(it.name || "")))
.filter(Boolean)
.slice(0, 10);
const toks = Array.from(new Set(tokenizeQuery(it.name || ""))).filter(Boolean).slice(0, 10);
itemTokens.set(it.sku, toks);
itemNormName.set(it.sku, normSearchText(it.name || ""));
for (const t of toks) {
@ -390,8 +543,10 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
for (const a of seeds) {
const aSku = String(a.sku || "");
if (!aSku || used.has(aSku)) continue;
const aToks = itemTokens.get(aSku) || [];
if (!aSku || !aToks.length) continue;
if (!aToks.length) continue;
const cand = new Map();
for (const t of aToks) {
@ -403,6 +558,7 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
if (!b) continue;
const bSku = String(b.sku || "");
if (!bSku || bSku === aSku) continue;
if (used.has(bSku)) continue;
if (mappedSkus && mappedSkus.has(bSku)) continue;
if (typeof isIgnoredPairFn === "function" && isIgnoredPairFn(aSku, bSku))
@ -440,6 +596,8 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
if (!bestB || bestS < 0.6) continue;
const bSku = String(bestB.sku || "");
if (!bSku || used.has(bSku)) continue;
const key = aSku < bSku ? `${aSku}|${bSku}` : `${bSku}|${aSku}`;
const prev = bestByPair.get(key);
if (!prev || bestS > prev.score) bestByPair.set(key, { a, b: bestB, score: bestS });
@ -448,8 +606,6 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
const pairs = Array.from(bestByPair.values());
pairs.sort((x, y) => y.score - x.score);
const used = new Set();
const out = [];
for (const p of pairs) {
const aSku = String(p.a.sku || "");
const bSku = String(p.b.sku || "");
@ -462,7 +618,8 @@ function computeInitialPairsFast(allAgg, mappedSkus, limitPairs, isIgnoredPairFn
out.push({ a: p.a, b: p.b, score: p.score });
if (out.length >= limitPairs) break;
}
return out;
return out.slice(0, limitPairs);
}
/* ---------------- Page ---------------- */