diff --git a/src/tracker/db.js b/src/tracker/db.js index 0f14ca6..70096fa 100644 --- a/src/tracker/db.js +++ b/src/tracker/db.js @@ -4,7 +4,7 @@ const fs = require("fs"); const path = require("path"); const crypto = require("crypto"); -const { normalizeCspc } = require("../utils/sku"); +const { normalizeSkuKey } = require("../utils/sku"); const { priceToNumber } = require("../utils/price"); function ensureDir(dir) { @@ -51,6 +51,8 @@ function writeJsonAtomic(file, obj) { } function buildDbObject(ctx, merged) { + const storeLabel = ctx?.store?.name || ctx?.store?.host || ""; + return { version: 6, store: ctx.store.host, @@ -65,7 +67,8 @@ function buildDbObject(ctx, merged) { .map((it) => ({ name: it.name, price: it.price || "", - sku: normalizeCspc(it.sku) || "", + // IMPORTANT: keep real 6-digit when present; otherwise store stable u:hash(store|url) + sku: normalizeSkuKey(it.sku, { storeLabel, url: it.url }) || "", url: it.url, img: String(it.img || "").trim(), removed: Boolean(it.removed), @@ -88,8 +91,12 @@ function listDbFiles(dbDir) { return out; } -function buildCheapestSkuIndexFromAllDbs(dbDir) { - const cheapest = new Map(); // sku -> { storeLabel, priceNum } +/** + * cheapest map is keyed by CANONICAL sku (for report comparisons), + * but DB rows remain raw/mined skuKey. + */ +function buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap } = {}) { + const cheapest = new Map(); // canonSku -> { storeLabel, priceNum } for (const file of listDbFiles(dbDir)) { try { @@ -100,14 +107,16 @@ function buildCheapestSkuIndexFromAllDbs(dbDir) { for (const it of items) { if (it?.removed) continue; - const sku = normalizeCspc(it?.sku || ""); - if (!sku) continue; + const skuKey = normalizeSkuKey(it?.sku || "", { storeLabel, url: it?.url || "" }); + if (!skuKey) continue; + + const canon = skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey; const p = priceToNumber(it?.price || ""); if (!Number.isFinite(p) || p <= 0) continue; - const prev = cheapest.get(sku); - if (!prev || p < prev.priceNum) cheapest.set(sku, { storeLabel, priceNum: p }); + const prev = cheapest.get(canon); + if (!prev || p < prev.priceNum) cheapest.set(canon, { storeLabel, priceNum: p }); } } catch { // ignore parse errors diff --git a/src/tracker/report.js b/src/tracker/report.js index 2e94e36..2e22a7f 100644 --- a/src/tracker/report.js +++ b/src/tracker/report.js @@ -2,9 +2,10 @@ const { C, color } = require("../utils/ansi"); const { padLeft, padRight } = require("../utils/string"); -const { normalizeCspc } = require("../utils/sku"); +const { normalizeCspc, normalizeSkuKey } = require("../utils/sku"); const { priceToNumber, salePctOff } = require("../utils/price"); const { buildCheapestSkuIndexFromAllDbs } = require("./db"); +const { loadSkuMap } = require("../utils/sku_map"); function secStr(ms) { const s = Number.isFinite(ms) ? ms / 1000 : 0; @@ -52,7 +53,12 @@ function addCategoryResultToReport(report, storeName, catLabel, newItems, update function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && process.stdout.isTTY) } = {}) { const paint = (s, code) => color(s, code, colorize); - const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir); + + // Load mapping for comparisons only + const skuMap = loadSkuMap({ dbDir }); + + // Cheapest index is keyed by canonical sku (mapped) + const cheapestSku = buildCheapestSkuIndexFromAllDbs(dbDir, { skuMap }); const endedAt = new Date(); const durMs = endedAt - report.startedAt; @@ -114,26 +120,40 @@ function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && return s ? paint(` ${s}`, C.gray) : ""; } - function cheaperAtInline(catLabel, sku, currentPriceStr) { - const s = normalizeCspc(sku); - if (!s) return ""; - const best = cheapestSku.get(s); + function canonicalKeyForReportItem(catLabel, skuRaw, url) { + const storeLabel = storeFromCatLabel(catLabel); + const skuKey = normalizeSkuKey(skuRaw, { storeLabel, url }); + if (!skuKey) return ""; + return skuMap && typeof skuMap.canonicalSku === "function" ? skuMap.canonicalSku(skuKey) : skuKey; + } + + function cheaperAtInline(catLabel, skuRaw, url, currentPriceStr) { + const canon = canonicalKeyForReportItem(catLabel, skuRaw, url); + if (!canon) return ""; + + const best = cheapestSku.get(canon); if (!best || !best.storeLabel) return ""; + const curStore = storeFromCatLabel(catLabel); if (!curStore || best.storeLabel === curStore) return ""; + const curP = priceToNumber(currentPriceStr); if (!Number.isFinite(curP)) return ""; if (best.priceNum >= curP) return ""; + return paint(` (Cheaper at ${best.storeLabel})`, C.gray); } - function availableAtInline(catLabel, sku) { - const s = normalizeCspc(sku); - if (!s) return ""; - const best = cheapestSku.get(s); + function availableAtInline(catLabel, skuRaw, url) { + const canon = canonicalKeyForReportItem(catLabel, skuRaw, url); + if (!canon) return ""; + + const best = cheapestSku.get(canon); if (!best || !best.storeLabel) return ""; + const curStore = storeFromCatLabel(catLabel); if (curStore && best.storeLabel === curStore) return ""; + return paint(` (Available at ${best.storeLabel})`, C.gray); } @@ -141,11 +161,9 @@ function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && ln(paint(`NEW LISTINGS (${report.newItems.length})`, C.bold + C.green)); for (const it of report.newItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); - const sku = normalizeCspc(it.sku || ""); - const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || ""); - ln( - `${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}` - ); + const sku = String(it.sku || ""); + const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || ""); + ln(`${paint("+", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`); ln(` ${paint(it.url, C.dim)}`); } ln(""); @@ -158,11 +176,9 @@ function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && ln(paint(`RESTORED (${report.restoredItems.length})`, C.bold + C.green)); for (const it of report.restoredItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); - const sku = normalizeCspc(it.sku || ""); - const cheapTag = cheaperAtInline(it.catLabel, sku, it.price || ""); - ln( - `${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}` - ); + const sku = String(it.sku || ""); + const cheapTag = cheaperAtInline(it.catLabel, sku, it.url, it.price || ""); + ln(`${paint("R", C.green)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${cheapTag}`); ln(` ${paint(it.url, C.dim)}`); } ln(""); @@ -175,11 +191,9 @@ function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && ln(paint(`REMOVED (${report.removedItems.length})`, C.bold + C.yellow)); for (const it of report.removedItems.sort((a, b) => (a.catLabel + a.name).localeCompare(b.catLabel + b.name))) { const price = it.price ? paint(it.price, C.cyan) : paint("(no price)", C.gray); - const sku = normalizeCspc(it.sku || ""); - const availTag = availableAtInline(it.catLabel, sku); - ln( - `${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}` - ); + const sku = String(it.sku || ""); + const availTag = availableAtInline(it.catLabel, sku, it.url); + ln(`${paint("-", C.yellow)} ${padRight(it.catLabel, reportLabelW)} | ${paint(it.name, C.bold)}${skuInline(sku)} ${price}${availTag}`); ln(` ${paint(it.url, C.dim)}`); } ln(""); @@ -217,8 +231,8 @@ function renderFinalReport(report, { dbDir, colorize = Boolean(process.stdout && newP = paint(newP, C.cyan); } - const sku = normalizeCspc(u.sku || ""); - const cheapTag = cheaperAtInline(u.catLabel, sku, newRaw || ""); + const sku = String(u.sku || ""); + const cheapTag = cheaperAtInline(u.catLabel, sku, u.url, newRaw || ""); ln( `${paint("~", C.cyan)} ${padRight(u.catLabel, reportLabelW)} | ${paint(u.name, C.bold)}${skuInline(sku)} ${oldP} ${paint("->", C.gray)} ${newP}${offTag}${cheapTag}` diff --git a/src/utils/sku.js b/src/utils/sku.js index 4ec0e43..9d488f8 100644 --- a/src/utils/sku.js +++ b/src/utils/sku.js @@ -6,4 +6,37 @@ function normalizeCspc(v) { return m ? m[1] : ""; } -module.exports = { normalizeCspc }; +function fnv1a32(str) { + let h = 0x811c9dc5; + for (let i = 0; i < str.length; i++) { + h ^= str.charCodeAt(i); + h = Math.imul(h, 0x01000193); + } + return (h >>> 0).toString(16).padStart(8, "0"); +} + +function makeSyntheticSkuKey({ storeLabel, url }) { + const store = String(storeLabel || "store"); + const u = String(url || ""); + if (!u) return ""; + return `u:${fnv1a32(`${store}|${u}`)}`; +} + +/** + * For DB + comparisons: + * - If we can extract a real 6-digit SKU, use it. + * - Else if v already looks like u:xxxx, keep it. + * - Else if sku missing, generate u:hash(store|url) if possible. + */ +function normalizeSkuKey(v, { storeLabel, url } = {}) { + const raw = String(v ?? "").trim(); + const cspc = normalizeCspc(raw); + if (cspc) return cspc; + + if (raw.startsWith("u:")) return raw; + + const syn = makeSyntheticSkuKey({ storeLabel, url }); + return syn || ""; +} + +module.exports = { normalizeCspc, normalizeSkuKey, makeSyntheticSkuKey }; diff --git a/src/utils/sku_map.js b/src/utils/sku_map.js new file mode 100644 index 0000000..29256e4 --- /dev/null +++ b/src/utils/sku_map.js @@ -0,0 +1,188 @@ +"use strict"; + +const fs = require("fs"); +const path = require("path"); + +/* ---------------- Union-Find (undirected grouping) ---------------- */ + +class DSU { + constructor() { + this.parent = new Map(); + this.rank = new Map(); + } + _add(x) { + if (!this.parent.has(x)) { + this.parent.set(x, x); + this.rank.set(x, 0); + } + } + find(x) { + x = String(x || "").trim(); + if (!x) return ""; + this._add(x); + let p = this.parent.get(x); + if (p !== x) { + p = this.find(p); + this.parent.set(x, p); + } + return p; + } + union(a, b) { + a = String(a || "").trim(); + b = String(b || "").trim(); + if (!a || !b || a === b) return; + const ra = this.find(a); + const rb = this.find(b); + if (!ra || !rb || ra === rb) return; + + const rka = this.rank.get(ra) || 0; + const rkb = this.rank.get(rb) || 0; + + if (rka < rkb) this.parent.set(ra, rb); + else if (rkb < rka) this.parent.set(rb, ra); + else { + this.parent.set(rb, ra); + this.rank.set(ra, rka + 1); + } + } +} + +function isUnknownSkuKey(k) { + return String(k || "").startsWith("u:"); +} + +function isNumericSku(k) { + return /^\d+$/.test(String(k || "").trim()); +} + +function compareSku(a, b) { + a = String(a || "").trim(); + b = String(b || "").trim(); + if (a === b) return 0; + + const au = isUnknownSkuKey(a); + const bu = isUnknownSkuKey(b); + if (au !== bu) return au ? 1 : -1; // real first + + const an = isNumericSku(a); + const bn = isNumericSku(b); + if (an && bn) { + const na = Number(a); + const nb = Number(b); + if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1; + } + + return a < b ? -1 : 1; +} + +/* ---------------- File discovery ---------------- */ + +function tryReadJson(file) { + try { + const txt = fs.readFileSync(file, "utf8"); + return JSON.parse(txt); + } catch { + return null; + } +} + +function defaultSkuLinksCandidates(dbDir) { + const out = []; + + // 1) next to db dir: /../sku_links.json (common when dbDir is .../data/db) + if (dbDir) { + out.push(path.join(dbDir, "..", "sku_links.json")); + } + + // 2) repo root conventional location + out.push(path.join(process.cwd(), "data", "sku_links.json")); + + // 3) common worktree location + out.push(path.join(process.cwd(), ".worktrees", "data", "data", "sku_links.json")); + + return out; +} + +function findSkuLinksFile({ dbDir, mappingFile } = {}) { + // env override + const env = String(process.env.SPIRIT_TRACKER_SKU_LINKS || "").trim(); + if (env) return env; + + if (mappingFile) return mappingFile; + + for (const f of defaultSkuLinksCandidates(dbDir)) { + if (!f) continue; + try { + if (fs.existsSync(f)) return f; + } catch { + // ignore + } + } + + return ""; +} + +/* ---------------- Public API ---------------- */ + +function buildSkuMapFromLinksArray(links) { + const dsu = new DSU(); + const all = new Set(); + + for (const x of Array.isArray(links) ? links : []) { + const a = String(x?.fromSku || "").trim(); + const b = String(x?.toSku || "").trim(); + if (!a || !b) continue; + + all.add(a); + all.add(b); + + // undirected union => hardened vs A->B->C and cycles + dsu.union(a, b); + } + + // root -> Set(members) + const byRoot = new Map(); + for (const s of all) { + const r = dsu.find(s); + if (!r) continue; + let set = byRoot.get(r); + if (!set) byRoot.set(r, (set = new Set())); + set.add(s); + } + + // root -> canonical rep + const repByRoot = new Map(); + for (const [root, members] of byRoot.entries()) { + const arr = Array.from(members); + arr.sort(compareSku); + repByRoot.set(root, arr[0] || root); + } + + // sku -> canonical rep + const canonBySku = new Map(); + for (const [root, members] of byRoot.entries()) { + const rep = repByRoot.get(root) || root; + for (const s of members) canonBySku.set(s, rep); + } + + function canonicalSku(sku) { + const s = String(sku || "").trim(); + if (!s) return s; + return canonBySku.get(s) || s; + } + + return { canonicalSku, _canonBySku: canonBySku }; +} + +function loadSkuMap({ dbDir, mappingFile } = {}) { + const file = findSkuLinksFile({ dbDir, mappingFile }); + if (!file) { + return buildSkuMapFromLinksArray([]); + } + + const obj = tryReadJson(file); + const links = Array.isArray(obj?.links) ? obj.links : []; + return buildSkuMapFromLinksArray(links); +} + +module.exports = { loadSkuMap }; diff --git a/viz/app/mapping.js b/viz/app/mapping.js index 3dd976b..c056ad2 100644 --- a/viz/app/mapping.js +++ b/viz/app/mapping.js @@ -15,6 +15,7 @@ function canonicalPairKey(a, b) { } function buildForwardMap(links) { + // Keep this for reference/debug; grouping no longer depends on direction. const m = new Map(); for (const x of Array.isArray(links) ? links : []) { const fromSku = String(x?.fromSku || "").trim(); @@ -24,56 +25,6 @@ function buildForwardMap(links) { return m; } -function resolveSkuWithMap(sku, forwardMap) { - const s0 = String(sku || "").trim(); - if (!s0) return s0; - - // NOTE: u: keys are allowed to resolve through the map (so unknowns can be grouped) - - const seen = new Set(); - let cur = s0; - while (forwardMap.has(cur)) { - if (seen.has(cur)) break; // cycle guard - seen.add(cur); - cur = String(forwardMap.get(cur) || "").trim() || cur; - } - return cur || s0; -} - -function buildToGroups(links, forwardMap) { - // group: canonical toSku -> Set(all skus mapping to it, transitively) incl toSku itself - const groups = new Map(); - - // seed: include all explicit endpoints - for (const x of Array.isArray(links) ? links : []) { - const fromSku = String(x?.fromSku || "").trim(); - const toSku = String(x?.toSku || "").trim(); - if (!fromSku || !toSku) continue; - - const canonTo = resolveSkuWithMap(toSku, forwardMap); - if (!groups.has(canonTo)) groups.set(canonTo, new Set([canonTo])); - groups.get(canonTo).add(fromSku); - groups.get(canonTo).add(toSku); - } - - // close transitively: any sku that resolves to canonTo belongs in its group - const allSkus = new Set(); - for (const x of Array.isArray(links) ? links : []) { - const a = String(x?.fromSku || "").trim(); - const b = String(x?.toSku || "").trim(); - if (a) allSkus.add(a); - if (b) allSkus.add(b); - } - - for (const s of allSkus) { - const canon = resolveSkuWithMap(s, forwardMap); - if (!groups.has(canon)) groups.set(canon, new Set([canon])); - groups.get(canon).add(s); - } - - return groups; -} - function buildIgnoreSet(ignores) { const s = new Set(); for (const x of Array.isArray(ignores) ? ignores : []) { @@ -85,6 +36,136 @@ function buildIgnoreSet(ignores) { return s; } +/* ---------------- Union-Find grouping (hardened) ---------------- */ + +class DSU { + constructor() { + this.parent = new Map(); + this.rank = new Map(); + } + _add(x) { + if (!this.parent.has(x)) { + this.parent.set(x, x); + this.rank.set(x, 0); + } + } + find(x) { + x = String(x || "").trim(); + if (!x) return ""; + this._add(x); + let p = this.parent.get(x); + if (p !== x) { + p = this.find(p); + this.parent.set(x, p); + } + return p; + } + union(a, b) { + a = String(a || "").trim(); + b = String(b || "").trim(); + if (!a || !b || a === b) return; + const ra = this.find(a); + const rb = this.find(b); + if (!ra || !rb || ra === rb) return; + + const rka = this.rank.get(ra) || 0; + const rkb = this.rank.get(rb) || 0; + + if (rka < rkb) { + this.parent.set(ra, rb); + } else if (rkb < rka) { + this.parent.set(rb, ra); + } else { + this.parent.set(rb, ra); + this.rank.set(ra, rka + 1); + } + } +} + +function isUnknownSkuKey(key) { + return String(key || "").startsWith("u:"); +} + +function isNumericSku(key) { + return /^\d+$/.test(String(key || "").trim()); +} + +function compareSku(a, b) { + // Stable ordering to choose a canonical representative. + // Prefer real (non-u:) > unknown (u:). Among reals: numeric ascending if possible, else lex. + a = String(a || "").trim(); + b = String(b || "").trim(); + if (a === b) return 0; + + const aUnknown = isUnknownSkuKey(a); + const bUnknown = isUnknownSkuKey(b); + if (aUnknown !== bUnknown) return aUnknown ? 1 : -1; // real first + + const aNum = isNumericSku(a); + const bNum = isNumericSku(b); + if (aNum && bNum) { + // compare as integers (safe: these are small SKU strings) + const na = Number(a); + const nb = Number(b); + if (Number.isFinite(na) && Number.isFinite(nb) && na !== nb) return na < nb ? -1 : 1; + } + + // fallback lex + return a < b ? -1 : 1; +} + +function buildGroupsAndCanonicalMap(links) { + const dsu = new DSU(); + const all = new Set(); + + for (const x of Array.isArray(links) ? links : []) { + const a = String(x?.fromSku || "").trim(); + const b = String(x?.toSku || "").trim(); + if (!a || !b) continue; + all.add(a); + all.add(b); + + // IMPORTANT: union is undirected for grouping (hardened vs cycles) + dsu.union(a, b); + } + + // root -> Set(members) + const groupsByRoot = new Map(); + for (const s of all) { + const r = dsu.find(s); + if (!r) continue; + let set = groupsByRoot.get(r); + if (!set) groupsByRoot.set(r, (set = new Set())); + set.add(s); + } + + // Choose a canonical representative per group + const repByRoot = new Map(); + for (const [root, members] of groupsByRoot.entries()) { + const arr = Array.from(members); + arr.sort(compareSku); + const rep = arr[0] || root; + repByRoot.set(root, rep); + } + + // sku -> canonical rep + const canonBySku = new Map(); + // canonical rep -> Set(members) (what the rest of the app uses) + const groupsByCanon = new Map(); + + for (const [root, members] of groupsByRoot.entries()) { + const rep = repByRoot.get(root) || root; + let g = groupsByCanon.get(rep); + if (!g) groupsByCanon.set(rep, (g = new Set([rep]))); + for (const s of members) { + canonBySku.set(s, rep); + g.add(s); + } + } + + return { canonBySku, groupsByCanon }; +} + export async function loadSkuRules() { if (CACHED) return CACHED; @@ -92,17 +173,21 @@ export async function loadSkuRules() { const links = Array.isArray(meta?.links) ? meta.links : []; const ignores = Array.isArray(meta?.ignores) ? meta.ignores : []; + // keep forwardMap for visibility/debug; grouping uses union-find const forwardMap = buildForwardMap(links); - const toGroups = buildToGroups(links, forwardMap); + + const { canonBySku, groupsByCanon } = buildGroupsAndCanonicalMap(links); const ignoreSet = buildIgnoreSet(ignores); function canonicalSku(sku) { - return resolveSkuWithMap(sku, forwardMap); + const s = String(sku || "").trim(); + if (!s) return s; + return canonBySku.get(s) || s; } function groupForCanonical(toSku) { const canon = canonicalSku(toSku); - const g = toGroups.get(canon); + const g = groupsByCanon.get(canon); return g ? new Set(g) : new Set([canon]); } @@ -115,8 +200,11 @@ export async function loadSkuRules() { links, ignores, forwardMap, - toGroups, + + // "toGroups" retained name for compatibility with existing code + toGroups: groupsByCanon, ignoreSet, + canonicalSku, groupForCanonical, isIgnoredPair,