mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
feat: Improvements to link suggestions
This commit is contained in:
parent
b8bbb091b7
commit
af6d268560
1 changed files with 156 additions and 33 deletions
|
|
@ -37,9 +37,31 @@ const SIM_STOP_TOKENS = new Set([
|
||||||
"in",
|
"in",
|
||||||
"for",
|
"for",
|
||||||
"with",
|
"with",
|
||||||
|
|
||||||
|
// age words (we extract age separately)
|
||||||
"year",
|
"year",
|
||||||
"years",
|
"years",
|
||||||
|
"yr",
|
||||||
|
"yrs",
|
||||||
"old",
|
"old",
|
||||||
|
|
||||||
|
// whisky noise words
|
||||||
|
"whisky",
|
||||||
|
"whiskey",
|
||||||
|
"scotch",
|
||||||
|
"single",
|
||||||
|
"malt",
|
||||||
|
"cask",
|
||||||
|
"finish",
|
||||||
|
"edition",
|
||||||
|
"release",
|
||||||
|
"batch",
|
||||||
|
"strength",
|
||||||
|
"abv",
|
||||||
|
"proof",
|
||||||
|
|
||||||
|
// helps your Benromach “20th Anniversary” case
|
||||||
|
"anniversary",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const SMWS_WORD_RE = /\bsmws\b/i;
|
const SMWS_WORD_RE = /\bsmws\b/i;
|
||||||
|
|
@ -52,8 +74,34 @@ function smwsKeyFromName(name) {
|
||||||
return m ? m[1] : "";
|
return m ? m[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Treat ordinal tokens like "20th" as numbers, and detect ages explicitly.
|
||||||
|
const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;
|
||||||
|
|
||||||
|
function numKey(t) {
|
||||||
|
const s = String(t || "").trim().toLowerCase();
|
||||||
|
if (!s) return "";
|
||||||
|
if (/^\d+$/.test(s)) return s;
|
||||||
|
const m = s.match(ORDINAL_RE);
|
||||||
|
return m ? m[1] : "";
|
||||||
|
}
|
||||||
|
|
||||||
function isNumberToken(t) {
|
function isNumberToken(t) {
|
||||||
return /^\d+$/.test(String(t || ""));
|
return !!numKey(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractAgeFromText(normName) {
|
||||||
|
const s = String(normName || "");
|
||||||
|
if (!s) return "";
|
||||||
|
|
||||||
|
// "10 years", "10 year", "10 yr", "10 yrs", "aged 10 years"
|
||||||
|
const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
|
||||||
|
if (m && m[1]) return String(parseInt(m[1], 10));
|
||||||
|
|
||||||
|
// "10 yo"
|
||||||
|
const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
|
||||||
|
if (m2 && m2[1]) return String(parseInt(m2[1], 10));
|
||||||
|
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterSimTokens(tokens) {
|
function filterSimTokens(tokens) {
|
||||||
|
|
@ -69,6 +117,7 @@ function filterSimTokens(tokens) {
|
||||||
|
|
||||||
// small safe extras
|
// small safe extras
|
||||||
["whiskey", "whisky"],
|
["whiskey", "whisky"],
|
||||||
|
["whisky", "whisky"],
|
||||||
["bourbon", "bourbon"],
|
["bourbon", "bourbon"],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
@ -93,6 +142,9 @@ function filterSimTokens(tokens) {
|
||||||
let t = String(raw || "").trim().toLowerCase();
|
let t = String(raw || "").trim().toLowerCase();
|
||||||
if (!t) continue;
|
if (!t) continue;
|
||||||
|
|
||||||
|
// Drop tokens that are just punctuation / separators (e.g. "-")
|
||||||
|
if (!/[a-z0-9]/i.test(t)) continue;
|
||||||
|
|
||||||
// Drop inline volume + inline percentages
|
// Drop inline volume + inline percentages
|
||||||
if (VOL_INLINE_RE.test(t)) continue;
|
if (VOL_INLINE_RE.test(t)) continue;
|
||||||
if (PCT_INLINE_RE.test(t)) continue;
|
if (PCT_INLINE_RE.test(t)) continue;
|
||||||
|
|
@ -100,11 +152,15 @@ function filterSimTokens(tokens) {
|
||||||
// Normalize
|
// Normalize
|
||||||
t = SIM_EQUIV.get(t) || t;
|
t = SIM_EQUIV.get(t) || t;
|
||||||
|
|
||||||
|
// Normalize ordinals like "20th" -> "20"
|
||||||
|
const nk = numKey(t);
|
||||||
|
if (nk) t = nk;
|
||||||
|
|
||||||
// Drop unit tokens (ml/l/oz/etc) and ABV-ish
|
// Drop unit tokens (ml/l/oz/etc) and ABV-ish
|
||||||
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
|
if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;
|
||||||
|
|
||||||
// Drop "number + unit" volume patterns: "750 ml", "1.14 l"
|
// Drop "number + unit" volume patterns: "750 ml", "1.14 l"
|
||||||
if (isNumberToken(t)) {
|
if (/^\d+(?:\.\d+)?$/.test(t)) {
|
||||||
const next = String(arr[i + 1] || "").trim().toLowerCase();
|
const next = String(arr[i + 1] || "").trim().toLowerCase();
|
||||||
const nextNorm = SIM_EQUIV.get(next) || next;
|
const nextNorm = SIM_EQUIV.get(next) || next;
|
||||||
if (VOL_UNIT.has(nextNorm)) {
|
if (VOL_UNIT.has(nextNorm)) {
|
||||||
|
|
@ -124,13 +180,12 @@ function filterSimTokens(tokens) {
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function numberMismatchPenalty(aTokens, bTokens) {
|
function numberMismatchPenalty(aTokens, bTokens) {
|
||||||
const aNums = new Set(aTokens.filter(isNumberToken));
|
const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
|
||||||
const bNums = new Set(bTokens.filter(isNumberToken));
|
const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
|
||||||
if (!aNums.size || !bNums.size) return 1.0; // no penalty if either has no numbers
|
if (!aNums.size || !bNums.size) return 1.0; // no penalty if either has no numbers
|
||||||
for (const n of aNums) if (bNums.has(n)) return 1.0; // at least one number matches
|
for (const n of aNums) if (bNums.has(n)) return 1.0; // at least one number matches
|
||||||
return 0.55; // mismatch (e.g. "18" vs "12") => penalize
|
return 0.28; // stronger penalty than before
|
||||||
}
|
}
|
||||||
|
|
||||||
function levenshtein(a, b) {
|
function levenshtein(a, b) {
|
||||||
|
|
@ -158,15 +213,50 @@ function levenshtein(a, b) {
|
||||||
return dp[m];
|
return dp[m];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function tokenContainmentScore(aTokens, bTokens) {
|
||||||
|
// Measures how well the smaller token set is contained in the larger one.
|
||||||
|
// Returns 0..1 (1 = perfect containment).
|
||||||
|
const A = filterSimTokens(aTokens || []);
|
||||||
|
const B = filterSimTokens(bTokens || []);
|
||||||
|
if (!A.length || !B.length) return 0;
|
||||||
|
|
||||||
|
const aSet = new Set(A);
|
||||||
|
const bSet = new Set(B);
|
||||||
|
|
||||||
|
const small = aSet.size <= bSet.size ? aSet : bSet;
|
||||||
|
const big = aSet.size <= bSet.size ? bSet : aSet;
|
||||||
|
|
||||||
|
let hit = 0;
|
||||||
|
for (const t of small) if (big.has(t)) hit++;
|
||||||
|
|
||||||
|
const recall = hit / Math.max(1, small.size);
|
||||||
|
const precision = hit / Math.max(1, big.size);
|
||||||
|
const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);
|
||||||
|
|
||||||
|
return f1;
|
||||||
|
}
|
||||||
|
|
||||||
function similarityScore(aName, bName) {
|
function similarityScore(aName, bName) {
|
||||||
const a = normSearchText(aName);
|
const a = normSearchText(aName);
|
||||||
const b = normSearchText(bName);
|
const b = normSearchText(bName);
|
||||||
if (!a || !b) return 0;
|
if (!a || !b) return 0;
|
||||||
|
|
||||||
const aToks = filterSimTokens(tokenizeQuery(a));
|
// Explicit age handling
|
||||||
const bToks = filterSimTokens(tokenizeQuery(b));
|
const aAge = extractAgeFromText(a);
|
||||||
|
const bAge = extractAgeFromText(b);
|
||||||
|
const ageBoth = !!(aAge && bAge);
|
||||||
|
const ageMatch = ageBoth && aAge === bAge;
|
||||||
|
const ageMismatch = ageBoth && aAge !== bAge;
|
||||||
|
|
||||||
|
const aToksRaw = tokenizeQuery(a);
|
||||||
|
const bToksRaw = tokenizeQuery(b);
|
||||||
|
|
||||||
|
const aToks = filterSimTokens(aToksRaw);
|
||||||
|
const bToks = filterSimTokens(bToksRaw);
|
||||||
if (!aToks.length || !bToks.length) return 0;
|
if (!aToks.length || !bToks.length) return 0;
|
||||||
|
|
||||||
|
const contain = tokenContainmentScore(aToksRaw, bToksRaw);
|
||||||
|
|
||||||
const aFirst = aToks[0] || "";
|
const aFirst = aToks[0] || "";
|
||||||
const bFirst = bToks[0] || "";
|
const bFirst = bToks[0] || "";
|
||||||
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
||||||
|
|
@ -185,17 +275,41 @@ function similarityScore(aName, bName) {
|
||||||
const gate = firstMatch ? 1.0 : 0.12;
|
const gate = firstMatch ? 1.0 : 0.12;
|
||||||
const numGate = numberMismatchPenalty(aToks, bToks);
|
const numGate = numberMismatchPenalty(aToks, bToks);
|
||||||
|
|
||||||
return (
|
let s =
|
||||||
numGate *
|
numGate *
|
||||||
(firstMatch * 3.0 + overlapTail * 2.2 * gate + levSim * (firstMatch ? 1.0 : 0.15))
|
(firstMatch * 3.0 +
|
||||||
);
|
overlapTail * 2.2 * gate +
|
||||||
|
levSim * (firstMatch ? 1.0 : 0.15));
|
||||||
|
|
||||||
|
// Age boosts/penalties
|
||||||
|
if (ageMatch) s *= 2.2;
|
||||||
|
else if (ageMismatch) s *= 0.18;
|
||||||
|
|
||||||
|
// Bundle/containment boost (short name contained in long name)
|
||||||
|
s *= 1 + 0.9 * contain;
|
||||||
|
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
aTokens = filterSimTokens(aTokens);
|
const aTokensRaw = aTokens || [];
|
||||||
bTokens = filterSimTokens(bTokens);
|
const bTokensRaw = bTokens || [];
|
||||||
|
|
||||||
|
aTokens = filterSimTokens(aTokensRaw);
|
||||||
|
bTokens = filterSimTokens(bTokensRaw);
|
||||||
if (!aTokens.length || !bTokens.length) return 0;
|
if (!aTokens.length || !bTokens.length) return 0;
|
||||||
|
|
||||||
|
const a = String(aNormName || "");
|
||||||
|
const b = String(bNormName || "");
|
||||||
|
|
||||||
|
const aAge = extractAgeFromText(a);
|
||||||
|
const bAge = extractAgeFromText(b);
|
||||||
|
const ageBoth = !!(aAge && bAge);
|
||||||
|
const ageMatch = ageBoth && aAge === bAge;
|
||||||
|
const ageMismatch = ageBoth && aAge !== bAge;
|
||||||
|
|
||||||
|
const contain = tokenContainmentScore(aTokensRaw, bTokensRaw);
|
||||||
|
|
||||||
const aFirst = aTokens[0] || "";
|
const aFirst = aTokens[0] || "";
|
||||||
const bFirst = bTokens[0] || "";
|
const bFirst = bTokens[0] || "";
|
||||||
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;
|
||||||
|
|
@ -210,8 +324,6 @@ function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
|
const denom = Math.max(1, Math.max(aTail.length, bTail.length));
|
||||||
const overlapTail = inter / denom;
|
const overlapTail = inter / denom;
|
||||||
|
|
||||||
const a = String(aNormName || "");
|
|
||||||
const b = String(bNormName || "");
|
|
||||||
const pref =
|
const pref =
|
||||||
firstMatch &&
|
firstMatch &&
|
||||||
a.slice(0, 10) &&
|
a.slice(0, 10) &&
|
||||||
|
|
@ -223,7 +335,14 @@ function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
const gate = firstMatch ? 1.0 : 0.12;
|
const gate = firstMatch ? 1.0 : 0.12;
|
||||||
const numGate = numberMismatchPenalty(aTokens, bTokens);
|
const numGate = numberMismatchPenalty(aTokens, bTokens);
|
||||||
|
|
||||||
return numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
|
let s = numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
|
||||||
|
|
||||||
|
if (ageMatch) s *= 2.0;
|
||||||
|
else if (ageMismatch) s *= 0.2;
|
||||||
|
|
||||||
|
s *= 1 + 0.9 * contain;
|
||||||
|
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Store-overlap rule ---------------- */
|
/* ---------------- Store-overlap rule ---------------- */
|
||||||
|
|
@ -261,7 +380,15 @@ function buildMappedSkuSet(links, rules) {
|
||||||
|
|
||||||
function isBCStoreLabel(label) {
|
function isBCStoreLabel(label) {
|
||||||
const s = String(label || "").toLowerCase();
|
const s = String(label || "").toLowerCase();
|
||||||
return s.includes("bcl") || s.includes("strath") || s.includes("gull") || s.includes("legacy") || s.includes("tudor") ||s.includes("vessel") ||s.includes("vintagespirits");
|
return (
|
||||||
|
s.includes("bcl") ||
|
||||||
|
s.includes("strath") ||
|
||||||
|
s.includes("gull") ||
|
||||||
|
s.includes("legacy") ||
|
||||||
|
s.includes("tudor") ||
|
||||||
|
s.includes("vessel") ||
|
||||||
|
s.includes("vintagespirits")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function skuIsBC(allRows, skuKey) {
|
function skuIsBC(allRows, skuKey) {
|
||||||
|
|
@ -285,12 +412,9 @@ function isSoftSkuKey(k) {
|
||||||
return s.startsWith("upc:") || s.startsWith("id:");
|
return s.startsWith("upc:") || s.startsWith("id:");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function isUnknownSkuKey2(k) {
|
function isUnknownSkuKey2(k) {
|
||||||
return String(k || "").trim().startsWith("u:");
|
return String(k || "").trim().startsWith("u:");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function isABStoreLabel(label) {
|
function isABStoreLabel(label) {
|
||||||
const s = String(label || "").toLowerCase();
|
const s = String(label || "").toLowerCase();
|
||||||
|
|
@ -328,7 +452,7 @@ function scoreCanonical(allRows, skuKey) {
|
||||||
else base = -1000;
|
else base = -1000;
|
||||||
|
|
||||||
return base + ab * 25 - bc * 10;
|
return base + ab * 25 - bc * 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickPreferredCanonical(allRows, skuKeys) {
|
function pickPreferredCanonical(allRows, skuKeys) {
|
||||||
let best = "";
|
let best = "";
|
||||||
|
|
@ -812,7 +936,6 @@ export async function renderSkuLinker($app) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const allAgg = aggregateBySku(allRows, (x) => x);
|
const allAgg = aggregateBySku(allRows, (x) => x);
|
||||||
|
|
||||||
const meta = await loadSkuMetaBestEffort();
|
const meta = await loadSkuMetaBestEffort();
|
||||||
|
|
@ -894,7 +1017,8 @@ export async function renderSkuLinker($app) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// auto-suggestions: never include mapped skus
|
// auto-suggestions: never include mapped skus
|
||||||
if (otherPinned) return recommendSimilar(allAgg, otherPinned, 60, otherSku, mappedSkus, isIgnoredPair);
|
if (otherPinned)
|
||||||
|
return recommendSimilar(allAgg, otherPinned, 60, otherSku, mappedSkus, isIgnoredPair);
|
||||||
|
|
||||||
if (initialPairs && initialPairs.length) {
|
if (initialPairs && initialPairs.length) {
|
||||||
const list = side === "L" ? initialPairs.map((p) => p.a) : initialPairs.map((p) => p.b);
|
const list = side === "L" ? initialPairs.map((p) => p.a) : initialPairs.map((p) => p.b);
|
||||||
|
|
@ -1117,18 +1241,18 @@ export async function renderSkuLinker($app) {
|
||||||
function findAggForPreselectSku(rawSku) {
|
function findAggForPreselectSku(rawSku) {
|
||||||
const want = String(rawSku || "").trim();
|
const want = String(rawSku || "").trim();
|
||||||
if (!want) return null;
|
if (!want) return null;
|
||||||
|
|
||||||
// exact match first
|
// exact match first
|
||||||
let it = allAgg.find((x) => String(x?.sku || "") === want);
|
let it = allAgg.find((x) => String(x?.sku || "") === want);
|
||||||
if (it) return it;
|
if (it) return it;
|
||||||
|
|
||||||
// try canonical group match
|
// try canonical group match
|
||||||
const canonWant = String(rules.canonicalSku(want) || want).trim();
|
const canonWant = String(rules.canonicalSku(want) || want).trim();
|
||||||
if (!canonWant) return null;
|
if (!canonWant) return null;
|
||||||
|
|
||||||
it = allAgg.find((x) => String(x?.sku || "") === canonWant);
|
it = allAgg.find((x) => String(x?.sku || "") === canonWant);
|
||||||
if (it) return it;
|
if (it) return it;
|
||||||
|
|
||||||
// any member whose canonicalSku matches
|
// any member whose canonicalSku matches
|
||||||
return (
|
return (
|
||||||
allAgg.find((x) => String(rules.canonicalSku(String(x?.sku || "")) || "") === canonWant) ||
|
allAgg.find((x) => String(rules.canonicalSku(String(x?.sku || "")) || "") === canonWant) ||
|
||||||
|
|
@ -1142,7 +1266,7 @@ export async function renderSkuLinker($app) {
|
||||||
// (works with your router because "link" stays as the first path segment)
|
// (works with your router because "link" stays as the first path segment)
|
||||||
if (!updateAll._didPreselect) {
|
if (!updateAll._didPreselect) {
|
||||||
updateAll._didPreselect = true;
|
updateAll._didPreselect = true;
|
||||||
|
|
||||||
const h = String(location.hash || "");
|
const h = String(location.hash || "");
|
||||||
const qi = h.indexOf("?");
|
const qi = h.indexOf("?");
|
||||||
if (qi !== -1) {
|
if (qi !== -1) {
|
||||||
|
|
@ -1154,12 +1278,11 @@ export async function renderSkuLinker($app) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
renderSide("L");
|
renderSide("L");
|
||||||
renderSide("R");
|
renderSide("R");
|
||||||
updateButtons();
|
updateButtons();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let tL = null,
|
let tL = null,
|
||||||
tR = null;
|
tR = null;
|
||||||
|
|
@ -1265,9 +1388,9 @@ export async function renderSkuLinker($app) {
|
||||||
try {
|
try {
|
||||||
for (let i = 0; i < uniq.length; i++) {
|
for (let i = 0; i < uniq.length; i++) {
|
||||||
const w = uniq[i];
|
const w = uniq[i];
|
||||||
$status.textContent = `Writing (${i + 1}/${uniq.length}): ${displaySku(w.fromSku)} → ${displaySku(
|
$status.textContent = `Writing (${i + 1}/${uniq.length}): ${displaySku(
|
||||||
w.toSku
|
w.fromSku
|
||||||
)} …`;
|
)} → ${displaySku(w.toSku)} …`;
|
||||||
await apiWriteSkuLink(w.fromSku, w.toSku);
|
await apiWriteSkuLink(w.fromSku, w.toSku);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue