mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
feat: Better recs
This commit is contained in:
parent
39f460e0b0
commit
35d437f22f
1 changed files with 50 additions and 4 deletions
|
|
@ -25,6 +25,48 @@ import {
|
||||||
} from "./pending.js";
|
} from "./pending.js";
|
||||||
|
|
||||||
/* ---------------- Similarity helpers ---------------- */
|
/* ---------------- Similarity helpers ---------------- */
|
||||||
|
// Ignore ultra-common / low-signal tokens in bottle names.
|
||||||
|
const SIM_STOP_TOKENS = new Set([
|
||||||
|
"the",
|
||||||
|
"a",
|
||||||
|
"an",
|
||||||
|
"and",
|
||||||
|
"of",
|
||||||
|
"to",
|
||||||
|
"in",
|
||||||
|
"for",
|
||||||
|
"with",
|
||||||
|
"year",
|
||||||
|
"years",
|
||||||
|
"old",
|
||||||
|
]);
|
||||||
|
|
||||||
|
function isNumberToken(t) {
|
||||||
|
return /^\d+$/.test(String(t || ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterSimTokens(tokens) {
|
||||||
|
const out = [];
|
||||||
|
const seen = new Set();
|
||||||
|
for (const raw of Array.isArray(tokens) ? tokens : []) {
|
||||||
|
const t = String(raw || "").trim().toLowerCase();
|
||||||
|
if (!t) continue;
|
||||||
|
// keep numbers (we handle mismatch separately)
|
||||||
|
if (!isNumberToken(t) && SIM_STOP_TOKENS.has(t)) continue;
|
||||||
|
if (seen.has(t)) continue;
|
||||||
|
seen.add(t);
|
||||||
|
out.push(t);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function numberMismatchPenalty(aTokens, bTokens) {
|
||||||
|
const aNums = new Set(aTokens.filter(isNumberToken));
|
||||||
|
const bNums = new Set(bTokens.filter(isNumberToken));
|
||||||
|
if (!aNums.size || !bNums.size) return 1.0; // no penalty if either has no numbers
|
||||||
|
for (const n of aNums) if (bNums.has(n)) return 1.0; // at least one number matches
|
||||||
|
return 0.55; // mismatch (e.g. "18" vs "12") => penalize
|
||||||
|
}
|
||||||
|
|
||||||
function levenshtein(a, b) {
|
function levenshtein(a, b) {
|
||||||
a = String(a || "");
|
a = String(a || "");
|
||||||
|
|
@ -56,8 +98,8 @@ function similarityScore(aName, bName) {
|
||||||
const b = normSearchText(bName);
|
const b = normSearchText(bName);
|
||||||
if (!a || !b) return 0;
|
if (!a || !b) return 0;
|
||||||
|
|
||||||
const aToks = tokenizeQuery(a);
|
const aToks = filterSimTokens(tokenizeQuery(a));
|
||||||
const bToks = tokenizeQuery(b);
|
const bToks = filterSimTokens(tokenizeQuery(b));
|
||||||
if (!aToks.length || !bToks.length) return 0;
|
if (!aToks.length || !bToks.length) return 0;
|
||||||
|
|
||||||
const aFirst = aToks[0] || "";
|
const aFirst = aToks[0] || "";
|
||||||
|
|
@ -76,8 +118,9 @@ function similarityScore(aName, bName) {
|
||||||
const levSim = 1 - d / maxLen;
|
const levSim = 1 - d / maxLen;
|
||||||
|
|
||||||
const gate = firstMatch ? 1.0 : 0.12;
|
const gate = firstMatch ? 1.0 : 0.12;
|
||||||
|
const numGate = numberMismatchPenalty(aToks, bToks);
|
||||||
|
|
||||||
return (
|
return numGate * (
|
||||||
firstMatch * 3.0 +
|
firstMatch * 3.0 +
|
||||||
overlapTail * 2.2 * gate +
|
overlapTail * 2.2 * gate +
|
||||||
levSim * (firstMatch ? 1.0 : 0.15)
|
levSim * (firstMatch ? 1.0 : 0.15)
|
||||||
|
|
@ -85,6 +128,8 @@ function similarityScore(aName, bName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
|
aTokens = filterSimTokens(aTokens);
|
||||||
|
bTokens = filterSimTokens(bTokens);
|
||||||
if (!aTokens.length || !bTokens.length) return 0;
|
if (!aTokens.length || !bTokens.length) return 0;
|
||||||
|
|
||||||
const aFirst = aTokens[0] || "";
|
const aFirst = aTokens[0] || "";
|
||||||
|
|
@ -112,8 +157,9 @@ function fastSimilarityScore(aTokens, bTokens, aNormName, bNormName) {
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
const gate = firstMatch ? 1.0 : 0.12;
|
const gate = firstMatch ? 1.0 : 0.12;
|
||||||
|
const numGate = numberMismatchPenalty(aTokens, bTokens);
|
||||||
|
|
||||||
return firstMatch * 2.4 + overlapTail * 2.0 * gate + pref;
|
return numGate * (firstMatch * 2.4 + overlapTail * 2.0 * gate + pref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- Store-overlap rule ---------------- */
|
/* ---------------- Store-overlap rule ---------------- */
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue