From a4dba47295a2c95aa7e9a84e426f75cb1204261f Mon Sep 17 00:00:00 2001
From: "Brennan Wilkes (Text Groove)" <brennan@textgroove.com>
Date: Tue, 3 Feb 2026 10:21:43 -0800
Subject: [PATCH] UX Improvements

---
 tools/rank_discrepency.js | 222 ++++++++++++++++----------------------
 1 file changed, 90 insertions(+), 132 deletions(-)

diff --git a/tools/rank_discrepency.js b/tools/rank_discrepency.js
index 408812e..e316f90 100644
--- a/tools/rank_discrepency.js
+++ b/tools/rank_discrepency.js
@@ -1,16 +1,6 @@
 #!/usr/bin/env node
 "use strict";
 
-/*
-  Rank discrepancy links, filtered by existence of a high-similarity "other" listing
-  that is NOT in the same linked group (using sku_links.json union-find).
-
-  Examples:
-    node ./tools/rank_discrepency.js --debug
-    node ./tools/rank_discrepency.js --min-score 0.35 --top 100 --debug
-    node ./tools/rank_discrepency.js --meta data/sku_links.json --debug-best --debug
-*/
-
 const fs = require("fs");
 const path = require("path");
 
@@ -24,22 +14,26 @@ function parseArgs(argv) {
   const out = {
     ab: "reports/common_listings_ab_top1000.json",
     bc: "reports/common_listings_bc_top1000.json",
-
-    // default to your real links file
     meta: "data/sku_links.json",
 
     top: 50,
     minDiscrep: 1,
     includeMissing: false,
 
-    minScore: 0.75,
+    // IMPORTANT: similarityScore is NOT 0..1. defaults should be high.
+    minScore: 9.0,
+    minContain: 0.75,
+
+    // only consider suggestions from the opposite list (AB->BC or BC->AB)
+    requireCrossGroup: true,
+
     base: "http://127.0.0.1:8080/#/link/?left=",
 
     debug: false,
     debugN: 25,
     debugPayload: false,
-    debugBest: false,   // dump top 5 candidate matches for first discrepancy item
-    dumpScores: false,  // emit per-link score info to STDERR
+    debugBest: false,
+    dumpScores: false,
   };
 
   for (let i = 0; i < argv.length; i++) {
@@ -51,9 +45,13 @@ function parseArgs(argv) {
     else if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || out.top;
     else if (a === "--min" && argv[i + 1]) out.minDiscrep = Number(argv[++i]) || out.minDiscrep;
     else if (a === "--min-score" && argv[i + 1]) out.minScore = Number(argv[++i]) || out.minScore;
+    else if (a === "--min-contain" && argv[i + 1]) out.minContain = Number(argv[++i]) || out.minContain;
+
     else if (a === "--include-missing") out.includeMissing = true;
     else if (a === "--base" && argv[i + 1]) out.base = String(argv[++i] || out.base);
 
+    else if (a === "--no-cross-group") out.requireCrossGroup = false;
+
     else if (a === "--debug") out.debug = true;
     else if (a === "--debug-n" && argv[i + 1]) out.debugN = Number(argv[++i]) || out.debugN;
     else if (a === "--debug-payload") out.debugPayload = true;
@@ -68,17 +66,8 @@ function parseArgs(argv) {
 
 function extractRows(payload) {
   if (Array.isArray(payload)) return payload;
-
-  const candidates = [
-    payload?.rows,
-    payload?.data?.rows,
-    payload?.data,
-    payload?.items,
-    payload?.list,
-    payload?.results,
-  ];
+  const candidates = [payload?.rows, payload?.data?.rows, payload?.data, payload?.items, payload?.list, payload?.results];
   for (const x of candidates) if (Array.isArray(x)) return x;
-
   return [];
 }
 
@@ -99,35 +88,18 @@ function buildRankMap(payload) {
   return { map, rowsLen: rows.length, rows };
 }
 
-/* ---------------- name picking (FIXED) ---------------- */
-
 function pickName(row) {
   if (!row) return "";
-
-  // ✅ common_listings_* puts display name here
   const repName = row?.representative?.name;
   if (typeof repName === "string" && repName.trim()) return repName.trim();
-
-  // fallback: sometimes cheapest has a name (rare)
   const cheapName = row?.cheapest?.name;
   if (typeof cheapName === "string" && cheapName.trim()) return cheapName.trim();
 
-  // old fallbacks (keep)
-  const direct = [
-    "name",
-    "title",
-    "productName",
-    "displayName",
-    "itemName",
-    "label",
-    "desc",
-    "description",
-  ];
+  const direct = ["name","title","productName","displayName","itemName","label","desc","description"];
   for (const k of direct) {
     const v = row[k];
     if (typeof v === "string" && v.trim()) return v.trim();
   }
-
   return "";
 }
 
@@ -245,7 +217,7 @@ function buildCanonicalSkuFnFromMeta(meta) {
   };
 }
 
-/* ---------------- similarity (from viz/app/linker/similarity.js) ---------------- */
+/* ---------------- similarity (same math as viz/app/linker/similarity.js) ---------------- */
 
 function normSearchText(s) {
   return String(s ?? "")
@@ -313,7 +285,6 @@ function filterSimTokens(tokens) {
   for (let i = 0; i < arr.length; i++) {
     let t = String(arr[i] || "").trim().toLowerCase();
     if (!t) continue;
-
     if (!/[a-z0-9]/i.test(t)) continue;
     if (VOL_INLINE_RE.test(t)) continue;
     if (PCT_INLINE_RE.test(t)) continue;
@@ -433,7 +404,6 @@ function similarityScore(aName, bName) {
   const levSim = 1 - d / maxLen;
 
   let gate = firstMatch ? 1.0 : Math.min(0.80, 0.06 + 0.95 * contain);
-
   const smallN = Math.min(aToks.length, bToks.length);
   if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;
 
@@ -455,14 +425,8 @@ function similarityScore(aName, bName) {
 
 /* ---------------- debug helpers ---------------- */
 
-function eprintln(...args) {
-  console.error(...args);
-}
-
-function truncate(s, n) {
-  s = String(s || "");
-  return s.length <= n ? s : s.slice(0, n - 1) + "…";
-}
+function eprintln(...args) { console.error(...args); }
+function truncate(s, n) { s = String(s || ""); return s.length <= n ? s : s.slice(0, n - 1) + "…"; }
 
 /* ---------------- main ---------------- */
 
@@ -472,9 +436,7 @@ function main() {
 
   const abPath = path.isAbsolute(args.ab) ? args.ab : path.join(repoRoot, args.ab);
   const bcPath = path.isAbsolute(args.bc) ? args.bc : path.join(repoRoot, args.bc);
-  const metaPath = args.meta
-    ? (path.isAbsolute(args.meta) ? args.meta : path.join(repoRoot, args.meta))
-    : "";
+  const metaPath = args.meta ? (path.isAbsolute(args.meta) ? args.meta : path.join(repoRoot, args.meta)) : "";
 
   const ab = readJson(abPath);
   const bc = readJson(bcPath);
@@ -484,41 +446,15 @@ function main() {
 
   const abBuilt = buildRankMap(ab);
   const bcBuilt = buildRankMap(bc);
+
   const abMap = abBuilt.map;
   const bcMap = bcBuilt.map;
 
-  if (args.debug) {
-    eprintln("[rank_discrepency] inputs:", {
-      abPath,
-      bcPath,
-      metaPath: metaPath || "(none)",
-      linkCount: Array.isArray(meta?.links) ? meta.links.length : 0,
-      minDiscrep: args.minDiscrep,
-      minScore: args.minScore,
-      top: args.top,
-      includeMissing: args.includeMissing,
-    });
-    eprintln("[rank_discrepency] extracted rows:", {
-      abRows: abBuilt.rowsLen,
-      bcRows: bcBuilt.rowsLen,
-      abKeys: abMap.size,
-      bcKeys: bcMap.size,
-    });
-  }
+  // SKU pools for “cross group” matching
+  const abSkus = new Set(abMap.keys());
+  const bcSkus = new Set(bcMap.keys());
 
-  if (!abMap.size || !bcMap.size) {
-    eprintln("[rank_discrepency] ERROR: empty rank maps.");
-    process.exit(2);
-  }
-
-  if (args.debugPayload) {
-    const ab0 = abBuilt.rows[0];
-    const bc0 = bcBuilt.rows[0];
-    eprintln("[rank_discrepency] sample AB rep.name:", truncate(ab0?.representative?.name || "", 120));
-    eprintln("[rank_discrepency] sample BC rep.name:", truncate(bc0?.representative?.name || "", 120));
-  }
-
-  // Build unique sku pool from AB+BC
+  // union SKU -> row (for name lookup)
   const rowBySku = new Map();
   for (const m of [abMap, bcMap]) {
     for (const [canonSku, v] of m.entries()) {
@@ -536,11 +472,25 @@ function main() {
   }
 
   if (args.debug) {
-    eprintln("[rank_discrepency] name coverage:", {
-      totalSkus: allSkus.length,
-      named: namedCount,
-      unnamed: allSkus.length - namedCount,
+    eprintln("[rank_discrepency] inputs:", {
+      abPath, bcPath, metaPath: metaPath || "(none)",
+      linkCount: Array.isArray(meta?.links) ? meta.links.length : 0,
+      minDiscrep: args.minDiscrep,
+      minScore: args.minScore,
+      minContain: args.minContain,
+      requireCrossGroup: args.requireCrossGroup,
+      top: args.top,
+      includeMissing: args.includeMissing,
     });
+    eprintln("[rank_discrepency] extracted rows:", { abRows: abBuilt.rowsLen, bcRows: bcBuilt.rowsLen, abKeys: abMap.size, bcKeys: bcMap.size });
+    eprintln("[rank_discrepency] name coverage:", { totalSkus: allSkus.length, named: namedCount, unnamed: allSkus.length - namedCount });
+  }
+
+  if (args.debugPayload) {
+    const ab0 = abBuilt.rows[0];
+    const bc0 = bcBuilt.rows[0];
+    eprintln("[rank_discrepency] sample AB rep.name:", truncate(ab0?.representative?.name || "", 120));
+    eprintln("[rank_discrepency] sample BC rep.name:", truncate(bc0?.representative?.name || "", 120));
   }
 
   const keys = new Set([...abMap.keys(), ...bcMap.keys()]);
@@ -549,22 +499,14 @@ function main() {
   for (const canonSku of keys) {
     const a = abMap.get(canonSku);
     const b = bcMap.get(canonSku);
-
     if (!args.includeMissing && (!a || !b)) continue;
 
     const rankAB = a ? a.rank : null;
     const rankBC = b ? b.rank : null;
-
     const discrep = rankAB !== null && rankBC !== null ? Math.abs(rankAB - rankBC) : Infinity;
     if (discrep !== Infinity && discrep < args.minDiscrep) continue;
 
-    diffs.push({
-      canonSku,
-      discrep,
-      rankAB,
-      rankBC,
-      sumRank: (rankAB ?? 1e9) + (rankBC ?? 1e9),
-    });
+    diffs.push({ canonSku, discrep, rankAB, rankBC, sumRank: (rankAB ?? 1e9) + (rankBC ?? 1e9) });
   }
 
   diffs.sort((x, y) => {
@@ -575,42 +517,48 @@ function main() {
 
   if (args.debug) {
     eprintln("[rank_discrepency] diffs:", { unionKeys: keys.size, diffsAfterMin: diffs.length });
-    eprintln(
-      "[rank_discrepency] top discrep sample:",
+    eprintln("[rank_discrepency] top discrep sample:",
       diffs.slice(0, 5).map((d) => ({
-        sku: d.canonSku,
-        discrep: d.discrep,
-        rankAB: d.rankAB,
-        rankBC: d.rankBC,
+        sku: d.canonSku, discrep: d.discrep, rankAB: d.rankAB, rankBC: d.rankBC,
         name: truncate(allNames.get(String(d.canonSku)) || "", 80),
       }))
     );
   }
 
-  // Optional: show top 5 matches for the first discrep SKU (helps tune min-score)
+  // debug-best (top 5) for first discrep SKU, but restricted to cross-group + contain threshold
   if (args.debugBest && diffs.length) {
     const skuA = String(diffs[0].canonSku);
     const nameA = allNames.get(skuA) || "";
     const groupA = canonicalSku(skuA);
+    const aInAB = abSkus.has(skuA);
+    const pool = args.requireCrossGroup ? (aInAB ? bcSkus : abSkus) : new Set(allSkus);
 
+    const aRaw = tokenizeQuery(nameA);
     const scored = [];
-    for (const skuB of allSkus) {
+
+    for (const skuB of pool) {
       if (skuB === skuA) continue;
       if (canonicalSku(skuB) === groupA) continue;
       const nameB = allNames.get(skuB) || "";
       if (!nameB) continue;
+
+      const contain = tokenContainmentScore(aRaw, tokenizeQuery(nameB));
+      if (contain < args.minContain) continue;
+
       const s = similarityScore(nameA, nameB);
-      scored.push({ skuB, s, nameB });
+      scored.push({ skuB, s, contain, nameB });
     }
+
     scored.sort((a, b) => b.s - a.s);
     eprintln("[rank_discrepency] debug-best for first discrep:", {
       skuA,
+      side: aInAB ? "AB" : "BC",
       nameA: truncate(nameA, 120),
-      top5: scored.slice(0, 5).map((x) => ({ sku: x.skuB, score: x.s, name: truncate(x.nameB, 120) })),
+      minContain: args.minContain,
+      top5: scored.slice(0, 5).map((x) => ({ sku: x.skuB, score: x.s, contain: x.contain, name: truncate(x.nameB, 120) })),
     });
   }
 
-  // Filter by “has a high scoring other candidate not in same linked group”
   const filtered = [];
   const debugLines = [];
 
@@ -619,46 +567,56 @@ function main() {
     const nameA = allNames.get(skuA) || "";
     if (!nameA) continue;
 
+    const aInAB = abSkus.has(skuA);
+    const pool = args.requireCrossGroup ? (aInAB ? bcSkus : abSkus) : new Set(allSkus);
+
     const groupA = canonicalSku(skuA);
+    const aRaw = tokenizeQuery(nameA);
 
-    let best = 0;
-    let bestSku = "";
-    let bestName = "";
+    let best = 0, bestSku = "", bestName = "", bestContain = 0;
 
-    for (const skuB of allSkus) {
+    for (const skuB of pool) {
       if (skuB === skuA) continue;
       if (canonicalSku(skuB) === groupA) continue;
 
       const nameB = allNames.get(skuB) || "";
       if (!nameB) continue;
 
+      const contain = tokenContainmentScore(aRaw, tokenizeQuery(nameB));
+      if (contain < args.minContain) continue;
+
       const s = similarityScore(nameA, nameB);
       if (s > best) {
         best = s;
         bestSku = skuB;
         bestName = nameB;
+        bestContain = contain;
       }
     }
 
-    const pass = best >= args.minScore;
+    const pass = bestSku && best >= args.minScore;
 
     if (args.debug && debugLines.length < args.debugN) {
       debugLines.push({
         sku: skuA,
+        side: aInAB ? "AB" : "BC",
         discrep: d.discrep,
         rankAB: d.rankAB,
         rankBC: d.rankBC,
-        nameA: truncate(nameA, 70),
+        nameA: truncate(nameA, 52),
         best,
+        bestContain,
         bestSku,
-        bestName: truncate(bestName, 70),
+        bestSide: abSkus.has(bestSku) ? "AB" : "BC",
+        bestName: truncate(bestName, 52),
+        sameGroupBlocked: bestSku ? (canonicalSku(bestSku) === groupA) : false,
         pass,
       });
     }
 
     if (!pass) continue;
 
-    filtered.push({ ...d, best, bestSku, bestName });
+    filtered.push({ ...d, best, bestSku, bestName, bestContain });
     if (filtered.length >= args.top) break;
   }
 
@@ -666,6 +624,8 @@ function main() {
     eprintln("[rank_discrepency] filter results:", {
       filtered: filtered.length,
       minScore: args.minScore,
+      minContain: args.minContain,
+      requireCrossGroup: args.requireCrossGroup,
       minDiscrep: args.minDiscrep,
     });
     eprintln("[rank_discrepency] debug sample (first N checked):");
@@ -674,18 +634,16 @@ function main() {
 
   for (const d of filtered) {
     if (args.dumpScores) {
-      eprintln(
-        "[rank_discrepency] emit",
-        JSON.stringify({
-          sku: d.canonSku,
-          discrep: d.discrep,
-          rankAB: d.rankAB,
-          rankBC: d.rankBC,
-          best: d.best,
-          bestSku: d.bestSku,
-          bestName: truncate(d.bestName, 120),
-        })
-      );
+      eprintln("[rank_discrepency] emit", JSON.stringify({
+        sku: d.canonSku,
+        discrep: d.discrep,
+        rankAB: d.rankAB,
+        rankBC: d.rankBC,
+        best: d.best,
+        bestContain: d.bestContain,
+        bestSku: d.bestSku,
+        bestName: truncate(d.bestName, 120),
+      }));
     }
     console.log(args.base + encodeURIComponent(String(d.canonSku)));
   }