mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
UX Improvements
This commit is contained in:
parent
b2a4afa890
commit
c4673a05b0
1 changed files with 261 additions and 48 deletions
|
|
@ -3,20 +3,23 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Print local link URLs for SKUs with largest rank discrepancy between AB and BC lists,
|
Print local link URLs for SKUs with largest rank discrepancy between AB and BC lists,
|
||||||
but ONLY when there exists another *different* listing (not in same linked group)
|
BUT only when there exists another *different* listing (not in same linked group)
|
||||||
with a reasonably high similarity score by name.
|
with a reasonably high similarity score by name.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
node scripts/rank_discrepency_links.js \
|
node ./tools/rank_discrepency.js \
|
||||||
--ab reports/common_listings_ab_top1000.json \
|
--ab reports/common_listings_ab_top1000.json \
|
||||||
--bc reports/common_listings_bc_top1000.json \
|
--bc reports/common_listings_bc_top1000.json \
|
||||||
--meta viz/data/sku_meta.json \
|
--meta viz/data/sku_meta.json \
|
||||||
--min-score 0.75 \
|
--min 10 \
|
||||||
|
--min-score 0.7 \
|
||||||
--top 50 \
|
--top 50 \
|
||||||
--base "http://127.0.0.1:8080/#/link/?left="
|
--base "http://127.0.0.1:8080/#/link/?left=" \
|
||||||
|
--debug
|
||||||
|
|
||||||
Output:
|
Notes:
|
||||||
http://127.0.0.1:8080/#/link/?left=<urlencoded canonSku>
|
- If --meta is not provided, "same-linked" filtering is disabled (each SKU is its own group).
|
||||||
|
- Debug output goes to STDERR so your STDOUT stays as just links.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
|
|
@ -32,13 +35,21 @@ function parseArgs(argv) {
|
||||||
const out = {
|
const out = {
|
||||||
ab: "reports/common_listings_ab_top1000.json",
|
ab: "reports/common_listings_ab_top1000.json",
|
||||||
bc: "reports/common_listings_bc_top1000.json",
|
bc: "reports/common_listings_bc_top1000.json",
|
||||||
meta: "", // optional sku_meta containing {links:[{fromSku,toSku}], ignores:...}
|
meta: "",
|
||||||
|
|
||||||
top: 50,
|
top: 50,
|
||||||
minDiscrep: 1,
|
minDiscrep: 1,
|
||||||
includeMissing: false,
|
includeMissing: false,
|
||||||
|
|
||||||
|
minScore: 0.75,
|
||||||
base: "http://127.0.0.1:8080/#/link/?left=",
|
base: "http://127.0.0.1:8080/#/link/?left=",
|
||||||
minScore: 0.75, // similarity threshold for "reasonably high"
|
|
||||||
|
debug: false,
|
||||||
|
debugN: 20, // how many discrepancy candidates to dump debug lines for
|
||||||
|
debugPayload: false, // show payload structure details
|
||||||
|
dumpScores: false, // dump best match info per emitted link
|
||||||
};
|
};
|
||||||
|
|
||||||
for (let i = 0; i < argv.length; i++) {
|
for (let i = 0; i < argv.length; i++) {
|
||||||
const a = argv[i];
|
const a = argv[i];
|
||||||
if (a === "--ab" && argv[i + 1]) out.ab = argv[++i];
|
if (a === "--ab" && argv[i + 1]) out.ab = argv[++i];
|
||||||
|
|
@ -49,25 +60,69 @@ function parseArgs(argv) {
|
||||||
else if (a === "--min-score" && argv[i + 1]) out.minScore = Number(argv[++i]) || out.minScore;
|
else if (a === "--min-score" && argv[i + 1]) out.minScore = Number(argv[++i]) || out.minScore;
|
||||||
else if (a === "--include-missing") out.includeMissing = true;
|
else if (a === "--include-missing") out.includeMissing = true;
|
||||||
else if (a === "--base" && argv[i + 1]) out.base = String(argv[++i] || out.base);
|
else if (a === "--base" && argv[i + 1]) out.base = String(argv[++i] || out.base);
|
||||||
|
|
||||||
|
else if (a === "--debug") out.debug = true;
|
||||||
|
else if (a === "--debug-n" && argv[i + 1]) out.debugN = Number(argv[++i]) || out.debugN;
|
||||||
|
else if (a === "--debug-payload") out.debugPayload = true;
|
||||||
|
else if (a === "--dump-scores") out.dumpScores = true;
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildRankMap(payload) {
|
/* ---------------- row extraction ---------------- */
|
||||||
const rows = Array.isArray(payload?.rows) ? payload.rows : [];
|
|
||||||
const map = new Map();
|
function extractRows(payload) {
|
||||||
for (let i = 0; i < rows.length; i++) {
|
// Most likely shapes:
|
||||||
const r = rows[i];
|
// - [ ... ]
|
||||||
const k = r?.canonSku;
|
// - { rows: [...] }
|
||||||
if (!k) continue;
|
// - { data: { rows: [...] } }
|
||||||
map.set(String(k), { rank: i + 1, row: r });
|
// - { data: [...] } (sometimes)
|
||||||
}
|
// - { items: [...] } / { results: [...] } etc.
|
||||||
return map;
|
if (Array.isArray(payload)) return payload;
|
||||||
|
|
||||||
|
const candidates = [
|
||||||
|
payload?.rows,
|
||||||
|
payload?.data?.rows,
|
||||||
|
payload?.data,
|
||||||
|
payload?.items,
|
||||||
|
payload?.list,
|
||||||
|
payload?.results,
|
||||||
|
];
|
||||||
|
for (const x of candidates) if (Array.isArray(x)) return x;
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function rowKey(r) {
|
||||||
|
// Prefer canonSku if present (this script works in canonSku space).
|
||||||
|
// Fall back to sku/id-like fields.
|
||||||
|
const k = r?.canonSku ?? r?.sku ?? r?.canon ?? r?.id ?? r?.key;
|
||||||
|
return k ? String(k) : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickName(row) {
|
function pickName(row) {
|
||||||
if (!row) return "";
|
if (!row) return "";
|
||||||
return String(row.name || row.title || row.productName || row.displayName || "");
|
return String(
|
||||||
|
row.name ??
|
||||||
|
row.title ??
|
||||||
|
row.productName ??
|
||||||
|
row.displayName ??
|
||||||
|
row.itemName ??
|
||||||
|
row.text ??
|
||||||
|
""
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRankMap(payload) {
|
||||||
|
const rows = extractRows(payload);
|
||||||
|
const map = new Map();
|
||||||
|
for (let i = 0; i < rows.length; i++) {
|
||||||
|
const r = rows[i];
|
||||||
|
const k = rowKey(r);
|
||||||
|
if (!k) continue;
|
||||||
|
map.set(String(k), { rank: i + 1, row: r });
|
||||||
|
}
|
||||||
|
return { map, rowsLen: rows.length };
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- sku_meta grouping (optional) ---------------- */
|
/* ---------------- sku_meta grouping (optional) ---------------- */
|
||||||
|
|
@ -121,7 +176,6 @@ class DSU {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Choose a stable representative (good enough for filtering “same-linked”)
|
|
||||||
function compareSku(a, b) {
|
function compareSku(a, b) {
|
||||||
a = String(a || "").trim();
|
a = String(a || "").trim();
|
||||||
b = String(b || "").trim();
|
b = String(b || "").trim();
|
||||||
|
|
@ -134,7 +188,8 @@ function compareSku(a, b) {
|
||||||
const aNum = /^\d+$/.test(a);
|
const aNum = /^\d+$/.test(a);
|
||||||
const bNum = /^\d+$/.test(b);
|
const bNum = /^\d+$/.test(b);
|
||||||
if (aNum && bNum) {
|
if (aNum && bNum) {
|
||||||
const na = Number(a), nb = Number(b);
|
const na = Number(a),
|
||||||
|
nb = Number(b);
|
||||||
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
|
||||||
}
|
}
|
||||||
return a < b ? -1 : 1;
|
return a < b ? -1 : 1;
|
||||||
|
|
@ -156,7 +211,6 @@ function buildCanonicalSkuFnFromMeta(meta) {
|
||||||
dsu.union(a, b);
|
dsu.union(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
// root -> members
|
|
||||||
const groupsByRoot = new Map();
|
const groupsByRoot = new Map();
|
||||||
for (const s of all) {
|
for (const s of all) {
|
||||||
const r = dsu.find(s);
|
const r = dsu.find(s);
|
||||||
|
|
@ -166,7 +220,6 @@ function buildCanonicalSkuFnFromMeta(meta) {
|
||||||
set.add(s);
|
set.add(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// root -> representative
|
|
||||||
const repByRoot = new Map();
|
const repByRoot = new Map();
|
||||||
for (const [root, members] of groupsByRoot.entries()) {
|
for (const [root, members] of groupsByRoot.entries()) {
|
||||||
const arr = Array.from(members);
|
const arr = Array.from(members);
|
||||||
|
|
@ -174,7 +227,6 @@ function buildCanonicalSkuFnFromMeta(meta) {
|
||||||
repByRoot.set(root, arr[0] || root);
|
repByRoot.set(root, arr[0] || root);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sku -> rep
|
|
||||||
const canonBySku = new Map();
|
const canonBySku = new Map();
|
||||||
for (const [root, members] of groupsByRoot.entries()) {
|
for (const [root, members] of groupsByRoot.entries()) {
|
||||||
const rep = repByRoot.get(root) || root;
|
const rep = repByRoot.get(root) || root;
|
||||||
|
|
@ -190,7 +242,6 @@ function buildCanonicalSkuFnFromMeta(meta) {
|
||||||
|
|
||||||
/* ---------------- similarity (copied from viz/app) ---------------- */
|
/* ---------------- similarity (copied from viz/app) ---------------- */
|
||||||
|
|
||||||
// Normalize for search: lowercase, punctuation -> space, collapse spaces
|
|
||||||
function normSearchText(s) {
|
function normSearchText(s) {
|
||||||
return String(s ?? "")
|
return String(s ?? "")
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
|
|
@ -205,9 +256,33 @@ function tokenizeQuery(q) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const SIM_STOP_TOKENS = new Set([
|
const SIM_STOP_TOKENS = new Set([
|
||||||
"the","a","an","and","of","to","in","for","with",
|
"the",
|
||||||
"year","years","yr","yrs","old",
|
"a",
|
||||||
"whisky","whiskey","scotch","single","malt","cask","finish","edition","release","batch","strength","abv","proof",
|
"an",
|
||||||
|
"and",
|
||||||
|
"of",
|
||||||
|
"to",
|
||||||
|
"in",
|
||||||
|
"for",
|
||||||
|
"with",
|
||||||
|
"year",
|
||||||
|
"years",
|
||||||
|
"yr",
|
||||||
|
"yrs",
|
||||||
|
"old",
|
||||||
|
"whisky",
|
||||||
|
"whiskey",
|
||||||
|
"scotch",
|
||||||
|
"single",
|
||||||
|
"malt",
|
||||||
|
"cask",
|
||||||
|
"finish",
|
||||||
|
"edition",
|
||||||
|
"release",
|
||||||
|
"batch",
|
||||||
|
"strength",
|
||||||
|
"abv",
|
||||||
|
"proof",
|
||||||
"anniversary",
|
"anniversary",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
@ -248,7 +323,7 @@ function filterSimTokens(tokens) {
|
||||||
["bourbon", "bourbon"],
|
["bourbon", "bourbon"],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const VOL_UNIT = new Set(["ml","l","cl","oz","liter","liters","litre","litres"]);
|
const VOL_UNIT = new Set(["ml", "l", "cl", "oz", "liter", "liters", "litre", "litres"]);
|
||||||
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i;
|
const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i;
|
||||||
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/;
|
const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/;
|
||||||
|
|
||||||
|
|
@ -314,7 +389,8 @@ function tokenContainmentScore(aTokens, bTokens) {
|
||||||
function levenshtein(a, b) {
|
function levenshtein(a, b) {
|
||||||
a = String(a || "");
|
a = String(a || "");
|
||||||
b = String(b || "");
|
b = String(b || "");
|
||||||
const n = a.length, m = b.length;
|
const n = a.length,
|
||||||
|
m = b.length;
|
||||||
if (!n) return m;
|
if (!n) return m;
|
||||||
if (!m) return n;
|
if (!m) return n;
|
||||||
|
|
||||||
|
|
@ -343,7 +419,6 @@ function numberMismatchPenalty(aTokens, bTokens) {
|
||||||
return 0.28;
|
return 0.28;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same structure/weights as viz/app/linker/similarity.js
|
|
||||||
function similarityScore(aName, bName) {
|
function similarityScore(aName, bName) {
|
||||||
const a = normSearchText(aName);
|
const a = normSearchText(aName);
|
||||||
const b = normSearchText(bName);
|
const b = normSearchText(bName);
|
||||||
|
|
@ -379,7 +454,7 @@ function similarityScore(aName, bName) {
|
||||||
const maxLen = Math.max(1, Math.max(a.length, b.length));
|
const maxLen = Math.max(1, Math.max(a.length, b.length));
|
||||||
const levSim = 1 - d / maxLen;
|
const levSim = 1 - d / maxLen;
|
||||||
|
|
||||||
let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
|
let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
|
||||||
|
|
||||||
const smallN = Math.min(aToks.length, bToks.length);
|
const smallN = Math.min(aToks.length, bToks.length);
|
||||||
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
|
||||||
|
|
@ -390,7 +465,7 @@ function similarityScore(aName, bName) {
|
||||||
numGate *
|
numGate *
|
||||||
(firstMatch * 3.0 +
|
(firstMatch * 3.0 +
|
||||||
overlapTail * 2.2 * gate +
|
overlapTail * 2.2 * gate +
|
||||||
levSim * (firstMatch ? 1.0 : (0.10 + 0.70 * contain)));
|
levSim * (firstMatch ? 1.0 : 0.1 + 0.7 * contain));
|
||||||
|
|
||||||
if (ageMatch) s *= 2.2;
|
if (ageMatch) s *= 2.2;
|
||||||
else if (ageMismatch) s *= 0.18;
|
else if (ageMismatch) s *= 0.18;
|
||||||
|
|
@ -400,7 +475,24 @@ function similarityScore(aName, bName) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------- main logic ---------------- */
|
/* ---------------- debug helpers ---------------- */
|
||||||
|
|
||||||
|
function briefObjShape(x) {
|
||||||
|
if (Array.isArray(x)) return { type: "array", len: x.length };
|
||||||
|
if (x && typeof x === "object") return { type: "object", keys: Object.keys(x).slice(0, 30) };
|
||||||
|
return { type: typeof x };
|
||||||
|
}
|
||||||
|
|
||||||
|
function eprintln(...args) {
|
||||||
|
console.error(...args);
|
||||||
|
}
|
||||||
|
|
||||||
|
function truncate(s, n) {
|
||||||
|
s = String(s || "");
|
||||||
|
return s.length <= n ? s : s.slice(0, n - 1) + "…";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------- main ---------------- */
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
const args = parseArgs(process.argv.slice(2));
|
const args = parseArgs(process.argv.slice(2));
|
||||||
|
|
@ -409,7 +501,9 @@ function main() {
|
||||||
const abPath = path.isAbsolute(args.ab) ? args.ab : path.join(repoRoot, args.ab);
|
const abPath = path.isAbsolute(args.ab) ? args.ab : path.join(repoRoot, args.ab);
|
||||||
const bcPath = path.isAbsolute(args.bc) ? args.bc : path.join(repoRoot, args.bc);
|
const bcPath = path.isAbsolute(args.bc) ? args.bc : path.join(repoRoot, args.bc);
|
||||||
const metaPath = args.meta
|
const metaPath = args.meta
|
||||||
? (path.isAbsolute(args.meta) ? args.meta : path.join(repoRoot, args.meta))
|
? path.isAbsolute(args.meta)
|
||||||
|
? args.meta
|
||||||
|
: path.join(repoRoot, args.meta)
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
const ab = readJson(abPath);
|
const ab = readJson(abPath);
|
||||||
|
|
@ -419,8 +513,49 @@ function main() {
|
||||||
? buildCanonicalSkuFnFromMeta(readJson(metaPath))
|
? buildCanonicalSkuFnFromMeta(readJson(metaPath))
|
||||||
: (sku) => normalizeImplicitSkuKey(sku);
|
: (sku) => normalizeImplicitSkuKey(sku);
|
||||||
|
|
||||||
const abMap = buildRankMap(ab);
|
const abBuilt = buildRankMap(ab);
|
||||||
const bcMap = buildRankMap(bc);
|
const bcBuilt = buildRankMap(bc);
|
||||||
|
const abMap = abBuilt.map;
|
||||||
|
const bcMap = bcBuilt.map;
|
||||||
|
|
||||||
|
if (args.debug || args.debugPayload) {
|
||||||
|
eprintln("[rank_discrepency] inputs:", {
|
||||||
|
abPath,
|
||||||
|
bcPath,
|
||||||
|
metaPath: metaPath || "(none)",
|
||||||
|
minDiscrep: args.minDiscrep,
|
||||||
|
minScore: args.minScore,
|
||||||
|
top: args.top,
|
||||||
|
includeMissing: args.includeMissing,
|
||||||
|
});
|
||||||
|
eprintln("[rank_discrepency] payload shapes:", {
|
||||||
|
ab: briefObjShape(ab),
|
||||||
|
bc: briefObjShape(bc),
|
||||||
|
});
|
||||||
|
eprintln("[rank_discrepency] extracted rows:", {
|
||||||
|
abRows: abBuilt.rowsLen,
|
||||||
|
bcRows: bcBuilt.rowsLen,
|
||||||
|
abKeys: abMap.size,
|
||||||
|
bcKeys: bcMap.size,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (args.debugPayload) {
|
||||||
|
// show a tiny sample row keys + fields
|
||||||
|
const abRows = extractRows(ab);
|
||||||
|
const bcRows = extractRows(bc);
|
||||||
|
eprintln("[rank_discrepency] sample AB row[0] keys:", abRows[0] && typeof abRows[0] === "object" ? Object.keys(abRows[0]).slice(0, 40) : abRows[0]);
|
||||||
|
eprintln("[rank_discrepency] sample BC row[0] keys:", bcRows[0] && typeof bcRows[0] === "object" ? Object.keys(bcRows[0]).slice(0, 40) : bcRows[0]);
|
||||||
|
eprintln("[rank_discrepency] sample AB rowKey:", rowKey(abRows[0]));
|
||||||
|
eprintln("[rank_discrepency] sample BC rowKey:", rowKey(bcRows[0]));
|
||||||
|
eprintln("[rank_discrepency] sample AB name:", truncate(pickName(abRows[0]), 120));
|
||||||
|
eprintln("[rank_discrepency] sample BC name:", truncate(pickName(bcRows[0]), 120));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!abMap.size || !bcMap.size) {
|
||||||
|
eprintln("[rank_discrepency] ERROR: empty rank maps. Your JSON shape probably isn't {rows:[...]}. Try --debug-payload.");
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
// Build a flat pool of candidates from AB+BC (unique by canonSku)
|
// Build a flat pool of candidates from AB+BC (unique by canonSku)
|
||||||
const rowBySku = new Map();
|
const rowBySku = new Map();
|
||||||
|
|
@ -446,14 +581,14 @@ function main() {
|
||||||
const rankAB = a ? a.rank : null;
|
const rankAB = a ? a.rank : null;
|
||||||
const rankBC = b ? b.rank : null;
|
const rankBC = b ? b.rank : null;
|
||||||
|
|
||||||
const discrep =
|
const discrep = rankAB !== null && rankBC !== null ? Math.abs(rankAB - rankBC) : Infinity;
|
||||||
rankAB !== null && rankBC !== null ? Math.abs(rankAB - rankBC) : Infinity;
|
|
||||||
|
|
||||||
if (discrep !== Infinity && discrep < args.minDiscrep) continue;
|
if (discrep !== Infinity && discrep < args.minDiscrep) continue;
|
||||||
|
|
||||||
diffs.push({
|
diffs.push({
|
||||||
canonSku,
|
canonSku,
|
||||||
discrep,
|
discrep,
|
||||||
|
rankAB,
|
||||||
|
rankBC,
|
||||||
sumRank: (rankAB ?? 1e9) + (rankBC ?? 1e9),
|
sumRank: (rankAB ?? 1e9) + (rankBC ?? 1e9),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -464,35 +599,113 @@ function main() {
|
||||||
return String(x.canonSku).localeCompare(String(y.canonSku));
|
return String(x.canonSku).localeCompare(String(y.canonSku));
|
||||||
});
|
});
|
||||||
|
|
||||||
// Keep only discrepancies that have a high-scoring "other" candidate not in same linked group
|
if (args.debug) {
|
||||||
|
eprintln("[rank_discrepency] discrepancy candidates:", {
|
||||||
|
unionKeys: keys.size,
|
||||||
|
diffsAfterMin: diffs.length,
|
||||||
|
topDiscrepSample: diffs.slice(0, 5).map((d) => ({
|
||||||
|
sku: d.canonSku,
|
||||||
|
discrep: d.discrep,
|
||||||
|
rankAB: d.rankAB,
|
||||||
|
rankBC: d.rankBC,
|
||||||
|
name: truncate(allNames.get(String(d.canonSku)) || "", 90),
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const filtered = [];
|
const filtered = [];
|
||||||
|
const debugLines = [];
|
||||||
|
|
||||||
for (const d of diffs) {
|
for (const d of diffs) {
|
||||||
const skuA = String(d.canonSku);
|
const skuA = String(d.canonSku);
|
||||||
const nameA = allNames.get(skuA) || pickName(abMap.get(skuA)?.row) || pickName(bcMap.get(skuA)?.row);
|
const nameA =
|
||||||
if (!nameA) continue;
|
allNames.get(skuA) ||
|
||||||
|
pickName(abMap.get(skuA)?.row) ||
|
||||||
|
pickName(bcMap.get(skuA)?.row) ||
|
||||||
|
"";
|
||||||
|
if (!nameA) {
|
||||||
|
if (args.debug && debugLines.length < args.debugN) {
|
||||||
|
debugLines.push({ sku: skuA, reason: "no-name" });
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const groupA = canonicalSku(skuA);
|
const groupA = canonicalSku(skuA);
|
||||||
|
|
||||||
let best = 0;
|
let best = 0;
|
||||||
|
let bestSku = "";
|
||||||
|
let bestName = "";
|
||||||
|
|
||||||
for (const skuB of allSkus) {
|
for (const skuB of allSkus) {
|
||||||
if (skuB === skuA) continue;
|
if (skuB === skuA) continue;
|
||||||
|
|
||||||
// not same-linked group
|
|
||||||
if (canonicalSku(skuB) === groupA) continue;
|
if (canonicalSku(skuB) === groupA) continue;
|
||||||
|
|
||||||
const nameB = allNames.get(skuB) || "";
|
const nameB = allNames.get(skuB) || "";
|
||||||
if (!nameB) continue;
|
if (!nameB) continue;
|
||||||
|
|
||||||
const s = similarityScore(nameA, nameB);
|
const s = similarityScore(nameA, nameB);
|
||||||
if (s > best) best = s;
|
if (s > best) {
|
||||||
|
best = s;
|
||||||
|
bestSku = skuB;
|
||||||
|
bestName = nameB;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (best >= args.minScore) filtered.push(d);
|
const pass = best >= args.minScore;
|
||||||
|
if (args.debug && debugLines.length < args.debugN) {
|
||||||
|
debugLines.push({
|
||||||
|
sku: skuA,
|
||||||
|
discrep: d.discrep,
|
||||||
|
rankAB: d.rankAB,
|
||||||
|
rankBC: d.rankBC,
|
||||||
|
nameA: truncate(nameA, 80),
|
||||||
|
groupA,
|
||||||
|
best,
|
||||||
|
bestSku,
|
||||||
|
bestGroup: bestSku ? canonicalSku(bestSku) : "",
|
||||||
|
bestName: truncate(bestName, 80),
|
||||||
|
pass,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!pass) continue;
|
||||||
|
|
||||||
|
filtered.push({ ...d, best, bestSku, bestName });
|
||||||
if (filtered.length >= args.top) break;
|
if (filtered.length >= args.top) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (args.debug) {
|
||||||
|
eprintln("[rank_discrepency] filter results:", {
|
||||||
|
filtered: filtered.length,
|
||||||
|
minScore: args.minScore,
|
||||||
|
minDiscrep: args.minDiscrep,
|
||||||
|
});
|
||||||
|
eprintln("[rank_discrepency] debug sample (first N checked):");
|
||||||
|
for (const x of debugLines) eprintln(" ", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// STDOUT: links (and optionally score dumps)
|
||||||
for (const d of filtered) {
|
for (const d of filtered) {
|
||||||
console.log(args.base + encodeURIComponent(d.canonSku));
|
if (args.dumpScores) {
|
||||||
|
// keep link first so it's easy to pipe
|
||||||
|
eprintln(
|
||||||
|
"[rank_discrepency] emit",
|
||||||
|
JSON.stringify({
|
||||||
|
sku: d.canonSku,
|
||||||
|
discrep: d.discrep,
|
||||||
|
rankAB: d.rankAB,
|
||||||
|
rankBC: d.rankBC,
|
||||||
|
best: d.best,
|
||||||
|
bestSku: d.bestSku,
|
||||||
|
bestName: truncate(d.bestName, 120),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
console.log(args.base + encodeURIComponent(String(d.canonSku)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.debug) {
|
||||||
|
eprintln("[rank_discrepency] done.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue