#!/usr/bin/env node
"use strict";

const fs = require("fs");
const path = require("path");

/* ---------------- IO ---------------- */

function readJson(p) {
	return JSON.parse(fs.readFileSync(p, "utf8"));
}

function parseArgs(argv) {
	const out = {
		ab: "reports/common_listings_ab_top1000.json",
		bc: "reports/common_listings_bc_top1000.json",
		meta: "data/sku_links.json",

		top: 50,
		minDiscrep: 1,
		includeMissing: false,

		// similarityScore is NOT 0..1.
		minScore: 9.0,
		minContain: 0.75,

		// only consider suggestions from the opposite list (AB->BC or BC->AB)
		requireCrossGroup: true,

		base: "http://127.0.0.1:8080/#/link/?left=",

		debug: false,
		debugN: 25,
		debugPayload: false,
		debugBest: false,
		dumpScores: false,
	};

	for (let i = 0; i < argv.length; i++) {
		const a = argv[i];
		if (a === "--ab" && argv[i + 1]) out.ab = argv[++i];
		else if (a === "--bc" && argv[i + 1]) out.bc = argv[++i];
		else if (a === "--meta" && argv[i + 1]) out.meta = argv[++i];
		else if (a === "--top" && argv[i + 1]) out.top = Number(argv[++i]) || out.top;
		else if (a === "--min" && argv[i + 1]) out.minDiscrep = Number(argv[++i]) || out.minDiscrep;
		else if (a === "--min-score" && argv[i + 1]) out.minScore = Number(argv[++i]) || out.minScore;
		else if (a === "--min-contain" && argv[i + 1]) out.minContain = Number(argv[++i]) || out.minContain;
		else if (a === "--include-missing") out.includeMissing = true;
		else if (a === "--base" && argv[i + 1]) out.base = String(argv[++i] || out.base);
		else if (a === "--no-cross-group") out.requireCrossGroup = false;
		else if (a === "--debug") out.debug = true;
		else if (a === "--debug-n" && argv[i + 1]) out.debugN = Number(argv[++i]) || out.debugN;
		else if (a === "--debug-payload") out.debugPayload = true;
		else if (a === "--debug-best") out.debugBest = true;
		else if (a === "--dump-scores") out.dumpScores = true;
	}

	return out;
}

/* ---------------- row extraction ---------------- */

function extractRows(payload) {
	if (Array.isArray(payload)) return payload;
	const candidates = [
		payload?.rows,
		payload?.data?.rows,
		payload?.data,
		payload?.items,
		payload?.list,
		payload?.results,
	];
	for (const x of candidates) if (Array.isArray(x)) return x;
	return [];
}

function rowKey(r) {
	const k = r?.canonSku ?? r?.sku ?? r?.canon ?? r?.id ?? r?.key;
	return k ? String(k) : "";
}

function buildRankMap(payload) {
	const rows = extractRows(payload);
	const map = new Map();
	for (let i = 0; i < rows.length; i++) {
		const r = rows[i];
		const k = rowKey(r);
		if (!k) continue;
		map.set(String(k), { rank: i + 1, row: r });
	}
	return { map, rowsLen: rows.length, rows };
}

function pickName(row) {
	if (!row) return "";
	const repName = row?.representative?.name;
	if (typeof repName === "string" && repName.trim()) return repName.trim();
	const cheapName = row?.cheapest?.name;
	if (typeof cheapName === "string" && cheapName.trim()) return cheapName.trim();

	const direct = ["name", "title", "productName", "displayName", "itemName", "label", "desc", "description"];
	for (const k of direct) {
		const v = row[k];
		if (typeof v === "string" && v.trim()) return v.trim();
	}
	return "";
}

/* ---------------- sku_links union-find grouping + ignores ---------------- */

function normalizeImplicitSkuKey(k) {
	const s = String(k || "").trim();
	const m = s.match(/^id:(\d{1,6})$/i);
	if (m) return String(m[1]).padStart(6, "0");
	return s;
}

function canonicalPairKey(a, b) {
	const x = normalizeImplicitSkuKey(a);
	const y = normalizeImplicitSkuKey(b);
	if (!x || !y) return "";
	return x < y ? `${x}|${y}` : `${y}|${x}`;
}

function buildIgnoreSet(meta) {
	const ignores = Array.isArray(meta?.ignores) ? meta.ignores : [];
	const s = new Set();
	for (const x of ignores) {
		const a = String(x?.skuA || x?.a || x?.left || "").trim();
		const b = String(x?.skuB || x?.b || x?.right || "").trim();
		const k = canonicalPairKey(a, b);
		if (k) s.add(k);
	}
	return s;
}

class DSU {
	constructor() {
		this.parent = new Map();
		this.rank = new Map();
	}
	_add(x) {
		if (!this.parent.has(x)) {
			this.parent.set(x, x);
			this.rank.set(x, 0);
		}
	}
	find(x) {
		x = String(x || "").trim();
		if (!x) return "";
		this._add(x);
		let p = this.parent.get(x);
		if (p !== x) {
			p = this.find(p);
			this.parent.set(x, p);
		}
		return p;
	}
	union(a, b) {
		a = String(a || "").trim();
		b = String(b || "").trim();
		if (!a || !b || a === b) return;
		const ra = this.find(a);
		const rb = this.find(b);
		if (!ra || !rb || ra === rb) return;

		const rka = this.rank.get(ra) || 0;
		const rkb = this.rank.get(rb) || 0;

		if (rka < rkb) this.parent.set(ra, rb);
		else if (rkb < rka) this.parent.set(rb, ra);
		else {
			this.parent.set(rb, ra);
			this.rank.set(ra, rka + 1);
		}
	}
}

function compareSku(a, b) {
	a = String(a || "").trim();
	b = String(b || "").trim();
	if (a === b) return 0;

	const aUnknown = a.startsWith("u:");
	const bUnknown = b.startsWith("u:");
	if (aUnknown !== bUnknown) return aUnknown ? 1 : -1;

	const aNum = /^\d+$/.test(a);
	const bNum = /^\d+$/.test(b);
	if (aNum && bNum) {
		const na = Number(a),
			nb = Number(b);
		if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1;
	}
	return a < b ? -1 : 1;
}

function buildCanonicalSkuFnFromMeta(meta) {
	const links = Array.isArray(meta?.links) ? meta.links : [];
	if (!links.length) return (sku) => normalizeImplicitSkuKey(sku);

	const dsu = new DSU();
	const all = new Set();

	for (const x of links) {
		const a = normalizeImplicitSkuKey(x?.fromSku);
		const b = normalizeImplicitSkuKey(x?.toSku);
		if (!a || !b || a === b) continue;
		all.add(a);
		all.add(b);
		dsu.union(a, b);
	}

	const groupsByRoot = new Map();
	for (const s of all) {
		const r = dsu.find(s);
		if (!r) continue;
		let set = groupsByRoot.get(r);
		if (!set) groupsByRoot.set(r, (set = new Set()));
		set.add(s);
	}

	const repByRoot = new Map();
	for (const [root, members] of groupsByRoot.entries()) {
		const arr = Array.from(members);
		arr.sort(compareSku);
		repByRoot.set(root, arr[0] || root);
	}

	const canonBySku = new Map();
	for (const [root, members] of groupsByRoot.entries()) {
		const rep = repByRoot.get(root) || root;
		for (const s of members) canonBySku.set(s, rep);
		canonBySku.set(rep, rep);
	}

	return (sku) => {
		const s = normalizeImplicitSkuKey(sku);
		return canonBySku.get(s) || s;
	};
}

/* ---------------- similarity (same math as viz/app/linker/similarity.js) ---------------- */

function normSearchText(s) {
	return String(s ?? "")
		.toLowerCase()
		.replace(/[^a-z0-9]+/g, " ")
		.replace(/\s+/g, " ")
		.trim();
}

function tokenizeQuery(q) {
	const n = normSearchText(q);
	return n ? n.split(" ").filter(Boolean) : [];
}

const SIM_STOP_TOKENS = new Set([
	"the",
	"a",
	"an",
	"and",
	"of",
	"to",
	"in",
	"for",
	"with",
	"year",
	"years",
	"yr",
	"yrs",
	"old",
	"whisky",
	"whiskey",
	"scotch",
	"single",
	"malt",
	"cask",
	"finish",
	"edition",
	"release",
	"batch",
	"strength",
	"abv",
	"proof",
	"anniversary",
]);

const ORDINAL_RE = /^(\d+)(st|nd|rd|th)$/i;

function numKey(t) {
	const s = String(t || "")
		.trim()
		.toLowerCase();
	if (!s) return "";
	if (/^\d+$/.test(s)) return s;
	const m = s.match(ORDINAL_RE);
	return m ? m[1] : "";
}

function extractAgeFromText(normName) {
	const s = String(normName || "");
	if (!s) return "";

	const m = s.match(/\b(?:aged\s*)?(\d{1,2})\s*(?:yr|yrs|year|years)\b/i);
	if (m && m[1]) return String(parseInt(m[1], 10));

	const m2 = s.match(/\b(\d{1,2})\s*yo\b/i);
	if (m2 && m2[1]) return String(parseInt(m2[1], 10));

	return "";
}

function filterSimTokens(tokens) {
	const out = [];
	const seen = new Set();

	const SIM_EQUIV = new Map([
		["years", "yr"],
		["year", "yr"],
		["yrs", "yr"],
		["yr", "yr"],
		["whiskey", "whisky"],
		["whisky", "whisky"],
		["bourbon", "bourbon"],
	]);

	const VOL_UNIT = new Set(["ml", "l", "cl", "oz", "liter", "liters", "litre", "litres"]);
	const VOL_INLINE_RE = /^\d+(?:\.\d+)?(?:ml|l|cl|oz)$/i;
	const PCT_INLINE_RE = /^\d+(?:\.\d+)?%$/;

	const arr = Array.isArray(tokens) ? tokens : [];

	for (let i = 0; i < arr.length; i++) {
		let t = String(arr[i] || "")
			.trim()
			.toLowerCase();
		if (!t) continue;
		if (!/[a-z0-9]/i.test(t)) continue;
		if (VOL_INLINE_RE.test(t)) continue;
		if (PCT_INLINE_RE.test(t)) continue;

		t = SIM_EQUIV.get(t) || t;

		const nk = numKey(t);
		if (nk) t = nk;

		if (VOL_UNIT.has(t) || t === "abv" || t === "proof") continue;

		if (/^\d+(?:\.\d+)?$/.test(t)) {
			const next = String(arr[i + 1] || "")
				.trim()
				.toLowerCase();
			const nextNorm = SIM_EQUIV.get(next) || next;
			if (VOL_UNIT.has(nextNorm)) {
				i++;
				continue;
			}
		}

		if (!numKey(t) && SIM_STOP_TOKENS.has(t)) continue;

		if (seen.has(t)) continue;
		seen.add(t);
		out.push(t);
	}

	return out;
}

function tokenContainmentScore(aTokens, bTokens) {
	const A = filterSimTokens(aTokens || []);
	const B = filterSimTokens(bTokens || []);
	if (!A.length || !B.length) return 0;

	const aSet = new Set(A);
	const bSet = new Set(B);

	const small = aSet.size <= bSet.size ? aSet : bSet;
	const big = aSet.size <= bSet.size ? bSet : aSet;

	let hit = 0;
	for (const t of small) if (big.has(t)) hit++;

	const recall = hit / Math.max(1, small.size);
	const precision = hit / Math.max(1, big.size);
	const f1 = (2 * precision * recall) / Math.max(1e-9, precision + recall);

	return f1;
}

function levenshtein(a, b) {
	a = String(a || "");
	b = String(b || "");
	const n = a.length,
		m = b.length;
	if (!n) return m;
	if (!m) return n;

	const dp = new Array(m + 1);
	for (let j = 0; j <= m; j++) dp[j] = j;

	for (let i = 1; i <= n; i++) {
		let prev = dp[0];
		dp[0] = i;
		const ca = a.charCodeAt(i - 1);
		for (let j = 1; j <= m; j++) {
			const tmp = dp[j];
			const cost = ca === b.charCodeAt(j - 1) ? 0 : 1;
			dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
			prev = tmp;
		}
	}
	return dp[m];
}

function numberMismatchPenalty(aTokens, bTokens) {
	const aNums = new Set((aTokens || []).map(numKey).filter(Boolean));
	const bNums = new Set((bTokens || []).map(numKey).filter(Boolean));
	if (!aNums.size || !bNums.size) return 1.0;
	for (const n of aNums) if (bNums.has(n)) return 1.0;
	return 0.28;
}

function similarityScore(aName, bName) {
	const a = normSearchText(aName);
	const b = normSearchText(bName);
	if (!a || !b) return 0;

	const aAge = extractAgeFromText(a);
	const bAge = extractAgeFromText(b);
	const ageBoth = !!(aAge && bAge);
	const ageMatch = ageBoth && aAge === bAge;
	const ageMismatch = ageBoth && aAge !== bAge;

	const aToksRaw = tokenizeQuery(a);
	const bToksRaw = tokenizeQuery(b);

	const aToks = filterSimTokens(aToksRaw);
	const bToks = filterSimTokens(bToksRaw);
	if (!aToks.length || !bToks.length) return 0;

	const contain = tokenContainmentScore(aToksRaw, bToksRaw);

	const aFirst = aToks[0] || "";
	const bFirst = bToks[0] || "";
	const firstMatch = aFirst && bFirst && aFirst === bFirst ? 1 : 0;

	const A = new Set(aToks.slice(1));
	const B = new Set(bToks.slice(1));
	let inter = 0;
	for (const w of A) if (B.has(w)) inter++;
	const denom = Math.max(1, Math.max(A.size, B.size));
	const overlapTail = inter / denom;

	const d = levenshtein(a, b);
	const maxLen = Math.max(1, Math.max(a.length, b.length));
	const levSim = 1 - d / maxLen;

	let gate = firstMatch ? 1.0 : Math.min(0.8, 0.06 + 0.95 * contain);
	const smallN = Math.min(aToks.length, bToks.length);
	if (!firstMatch && smallN <= 3 && contain < 0.78) gate *= 0.18;

	const numGate = numberMismatchPenalty(aToks, bToks);

	let s = numGate * (firstMatch * 3.0 + overlapTail * 2.2 * gate + levSim * (firstMatch ? 1.0 : 0.1 + 0.7 * contain));

	if (ageMatch) s *= 2.2;
	else if (ageMismatch) s *= 0.18;

	s *= 1 + 0.9 * contain;

	return s;
}

/* ---------------- debug helpers ---------------- */

function eprintln(...args) {
	console.error(...args);
}
function truncate(s, n) {
	s = String(s || "");
	return s.length <= n ? s : s.slice(0, n - 1) + "…";
}

/* ---------------- main ---------------- */

function main() {
	const args = parseArgs(process.argv.slice(2));
	const repoRoot = process.cwd();

	const abPath = path.isAbsolute(args.ab) ? args.ab : path.join(repoRoot, args.ab);
	const bcPath = path.isAbsolute(args.bc) ? args.bc : path.join(repoRoot, args.bc);
	const metaPath = args.meta ? (path.isAbsolute(args.meta) ? args.meta : path.join(repoRoot, args.meta)) : "";

	const ab = readJson(abPath);
	const bc = readJson(bcPath);

	const meta = metaPath ? readJson(metaPath) : null;
	const canonicalSku = meta ? buildCanonicalSkuFnFromMeta(meta) : (sku) => normalizeImplicitSkuKey(sku);

	const ignoreSet = meta ? buildIgnoreSet(meta) : new Set();
	function isIgnoredPair(a, b) {
		const k = canonicalPairKey(a, b);
		return k ? ignoreSet.has(k) : false;
	}

	const abBuilt = buildRankMap(ab);
	const bcBuilt = buildRankMap(bc);

	const abMap = abBuilt.map;
	const bcMap = bcBuilt.map;

	// SKU pools for “cross group” matching
	const abSkus = new Set(abMap.keys());
	const bcSkus = new Set(bcMap.keys());

	// union SKU -> row (for name lookup)
	const rowBySku = new Map();
	for (const m of [abMap, bcMap]) {
		for (const [canonSku, v] of m.entries()) {
			if (!rowBySku.has(canonSku)) rowBySku.set(canonSku, v.row);
		}
	}

	const allSkus = Array.from(rowBySku.keys());
	const allNames = new Map();
	let namedCount = 0;
	for (const sku of allSkus) {
		const n = pickName(rowBySku.get(sku));
		allNames.set(sku, n);
		if (n) namedCount++;
	}

	if (args.debug) {
		eprintln("[rank_discrepency] inputs:", {
			abPath,
			bcPath,
			metaPath: metaPath || "(none)",
			linkCount: Array.isArray(meta?.links) ? meta.links.length : 0,
			ignoreCount: Array.isArray(meta?.ignores) ? meta.ignores.length : 0,
			ignoreSetSize: ignoreSet.size,
			minDiscrep: args.minDiscrep,
			minScore: args.minScore,
			minContain: args.minContain,
			requireCrossGroup: args.requireCrossGroup,
			top: args.top,
			includeMissing: args.includeMissing,
		});
		eprintln("[rank_discrepency] extracted rows:", {
			abRows: abBuilt.rowsLen,
			bcRows: bcBuilt.rowsLen,
			abKeys: abMap.size,
			bcKeys: bcMap.size,
		});
		eprintln("[rank_discrepency] name coverage:", {
			totalSkus: allSkus.length,
			named: namedCount,
			unnamed: allSkus.length - namedCount,
		});
	}

	if (args.debugPayload) {
		const ab0 = abBuilt.rows[0];
		const bc0 = bcBuilt.rows[0];
		eprintln("[rank_discrepency] sample AB rep.name:", truncate(ab0?.representative?.name || "", 120));
		eprintln("[rank_discrepency] sample BC rep.name:", truncate(bc0?.representative?.name || "", 120));
	}

	const keys = new Set([...abMap.keys(), ...bcMap.keys()]);
	const diffs = [];

	for (const canonSku of keys) {
		const a = abMap.get(canonSku);
		const b = bcMap.get(canonSku);
		if (!args.includeMissing && (!a || !b)) continue;

		const rankAB = a ? a.rank : null;
		const rankBC = b ? b.rank : null;
		const discrep = rankAB !== null && rankBC !== null ? Math.abs(rankAB - rankBC) : Infinity;
		if (discrep !== Infinity && discrep < args.minDiscrep) continue;

		diffs.push({ canonSku, discrep, rankAB, rankBC, sumRank: (rankAB ?? 1e9) + (rankBC ?? 1e9) });
	}

	diffs.sort((x, y) => {
		if (y.discrep !== x.discrep) return y.discrep - x.discrep;
		if (x.sumRank !== y.sumRank) return x.sumRank - y.sumRank;
		return String(x.canonSku).localeCompare(String(y.canonSku));
	});

	if (args.debug) {
		eprintln("[rank_discrepency] diffs:", { unionKeys: keys.size, diffsAfterMin: diffs.length });
		eprintln(
			"[rank_discrepency] top discrep sample:",
			diffs.slice(0, 5).map((d) => ({
				sku: d.canonSku,
				discrep: d.discrep,
				rankAB: d.rankAB,
				rankBC: d.rankBC,
				name: truncate(allNames.get(String(d.canonSku)) || "", 80),
			})),
		);
	}

	if (args.debugBest && diffs.length) {
		const skuA = String(diffs[0].canonSku);
		const nameA = allNames.get(skuA) || "";
		const groupA = canonicalSku(skuA);
		const aInAB = abSkus.has(skuA);
		const pool = args.requireCrossGroup ? (aInAB ? bcSkus : abSkus) : new Set(allSkus);

		const aRaw = tokenizeQuery(nameA);
		const scored = [];

		for (const skuB of pool) {
			if (skuB === skuA) continue;
			if (canonicalSku(skuB) === groupA) continue;
			if (isIgnoredPair(skuA, skuB)) continue;

			const nameB = allNames.get(skuB) || "";
			if (!nameB) continue;

			const contain = tokenContainmentScore(aRaw, tokenizeQuery(nameB));
			if (contain < args.minContain) continue;

			const s = similarityScore(nameA, nameB);
			scored.push({ skuB, s, contain, nameB });
		}

		scored.sort((a, b) => b.s - a.s);
		eprintln("[rank_discrepency] debug-best for first discrep:", {
			skuA,
			side: aInAB ? "AB" : "BC",
			nameA: truncate(nameA, 120),
			minContain: args.minContain,
			top5: scored
				.slice(0, 5)
				.map((x) => ({ sku: x.skuB, score: x.s, contain: x.contain, name: truncate(x.nameB, 120) })),
		});
	}

	const filtered = [];
	const debugLines = [];

	for (const d of diffs) {
		const skuA = String(d.canonSku);
		const nameA = allNames.get(skuA) || "";
		if (!nameA) continue;

		const aInAB = abSkus.has(skuA);
		const pool = args.requireCrossGroup ? (aInAB ? bcSkus : abSkus) : new Set(allSkus);

		const groupA = canonicalSku(skuA);
		const aRaw = tokenizeQuery(nameA);

		let best = 0,
			bestSku = "",
			bestName = "",
			bestContain = 0;
		let bestWasIgnored = false;

		for (const skuB of pool) {
			if (skuB === skuA) continue;
			if (canonicalSku(skuB) === groupA) continue;

			if (isIgnoredPair(skuA, skuB)) {
				// critical: ignored pairs must NOT satisfy the requirement
				bestWasIgnored = true;
				continue;
			}

			const nameB = allNames.get(skuB) || "";
			if (!nameB) continue;

			const contain = tokenContainmentScore(aRaw, tokenizeQuery(nameB));
			if (contain < args.minContain) continue;

			const s = similarityScore(nameA, nameB);
			if (s > best) {
				best = s;
				bestSku = skuB;
				bestName = nameB;
				bestContain = contain;
			}
		}

		const pass = bestSku && best >= args.minScore;

		if (args.debug && debugLines.length < args.debugN) {
			debugLines.push({
				sku: skuA,
				side: aInAB ? "AB" : "BC",
				discrep: d.discrep,
				rankAB: d.rankAB,
				rankBC: d.rankBC,
				nameA: truncate(nameA, 52),
				best,
				bestContain,
				bestSku,
				bestSide: abSkus.has(bestSku) ? "AB" : "BC",
				bestName: truncate(bestName, 52),
				sawIgnoredPairs: bestWasIgnored,
				pass,
			});
		}

		if (!pass) continue;

		filtered.push({ ...d, best, bestSku, bestName, bestContain });
		if (filtered.length >= args.top) break;
	}

	if (args.debug) {
		eprintln("[rank_discrepency] filter results:", {
			filtered: filtered.length,
			minScore: args.minScore,
			minContain: args.minContain,
			requireCrossGroup: args.requireCrossGroup,
			minDiscrep: args.minDiscrep,
		});
		eprintln("[rank_discrepency] debug sample (first N checked):");
		for (const x of debugLines) eprintln("  ", x);
	}

	for (const d of filtered) {
		if (args.dumpScores) {
			eprintln(
				"[rank_discrepency] emit",
				JSON.stringify({
					sku: d.canonSku,
					discrep: d.discrep,
					rankAB: d.rankAB,
					rankBC: d.rankBC,
					best: d.best,
					bestContain: d.bestContain,
					bestSku: d.bestSku,
					bestName: truncate(d.bestName, 120),
				}),
			);
		}
		console.log(args.base + encodeURIComponent(String(d.canonSku)));
	}
}

main();