diff --git a/src/stores/willowpark.js b/src/stores/willowpark.js index 15aa658..0cccd6c 100644 --- a/src/stores/willowpark.js +++ b/src/stores/willowpark.js @@ -3,6 +3,7 @@ const { decodeHtml, stripTags, extractFirstImgUrl, cleanText } = require("../utils/html"); const { makePageUrlShopifyQueryPage } = require("../utils/url"); +const { needsSkuDetail, pickBetterSku, normalizeCspc } = require("../utils/sku"); function extractSkuFromUrlOrHref(hrefOrUrl) { const s = String(hrefOrUrl || ""); @@ -66,8 +67,8 @@ function parseProductsWillowPark(html, ctx, finalUrl) { const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`; const starts = [...s.matchAll(/]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)] - .map(m => m.index) - .filter(i => typeof i === "number"); + .map((m) => m.index) + .filter((i) => typeof i === "number"); const blocks = []; for (let i = 0; i < starts.length; i++) { @@ -91,7 +92,8 @@ function parseProductsWillowPark(html, ctx, finalUrl) { url = canonicalizeWillowUrl(url); const titleHtml = - block.match(/]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] || ""; + block.match(/]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] || + ""; const name = cleanText(decodeHtml(stripTags(titleHtml))); if (!name) continue; @@ -120,6 +122,147 @@ function willowIsEmptyListingPage(html) { return false; } +/* ---------------- Storefront GraphQL (token extracted from HTML) ---------------- */ + +const WILLOW_STOREFRONT_GQL_URL = "https://willow-park-wines.myshopify.com/api/2025-07/graphql.json"; + +const PRODUCT_BY_ID_QUERY = ` +query ($id: ID!) @inContext(country: CA) { + product(id: $id) { + variants(first: 50) { + nodes { sku availableForSale quantityAvailable } + } + } +} +`; + +function pickBestVariantSku(product) { + const vs = Array.isArray(product?.variants?.nodes) ? product.variants.nodes : []; + if (!vs.length) return ""; + + const inStock = vs.find((v) => Number(v?.quantityAvailable) > 0 && String(v?.sku || "").trim()); + if (inStock) return String(inStock.sku).trim(); + + const forSale = vs.find((v) => Boolean(v?.availableForSale) && String(v?.sku || "").trim()); + if (forSale) return String(forSale.sku).trim(); + + const any = vs.find((v) => String(v?.sku || "").trim()); + return any ? String(any.sku).trim() : ""; +} + +function extractStorefrontTokenFromHtml(html) { + const s = String(html || ""); + + // 1) script#shopify-features JSON: {"accessToken":"..."} + const j = s.match(/]+id=["']shopify-features["'][^>]*>([\s\S]*?)<\/script>/i)?.[1]; + if (j) { + try { + const obj = JSON.parse(j); + const t = String(obj?.accessToken || "").trim(); + if (t) return t; + } catch {} + } + + // 2) meta name="shopify-checkout-api-token" + const m = s.match( + /]+name=["']shopify-checkout-api-token["'][^>]+content=["']([^"']+)["']/i + )?.[1]; + return String(m || "").trim(); +} + +async function willowGetStorefrontToken(ctx) { + if (ctx._willowStorefrontToken) return ctx._willowStorefrontToken; + + const r = await ctx.http.fetchTextWithRetry("https://www.willowpark.net/", "willow:token", ctx.store.ua); + const t = extractStorefrontTokenFromHtml(r?.text || ""); + if (!t) throw new Error("Willow Park: could not find storefront token in homepage HTML"); + + ctx._willowStorefrontToken = t; + return t; +} + +async function willowGql(ctx, label, query, variables) { + const token = await willowGetStorefrontToken(ctx); + + const r = await ctx.http.fetchJsonWithRetry(WILLOW_STOREFRONT_GQL_URL, label, ctx.store.ua, { + method: "POST", + headers: { + Accept: "application/json", + "content-type": "application/json", + Origin: "https://www.willowpark.net", + Referer: "https://www.willowpark.net/", + "x-shopify-storefront-access-token": token, + }, + body: JSON.stringify({ query, variables }), + }); + + // If token is rejected, clear so a future attempt re-fetches it once. + if (r?.status === 401 || r?.status === 403) ctx._willowStorefrontToken = ""; + return r; +} + +// If GQL returns a numeric SKU that isn't 6 digits, namespace it as id:. +// Keep 6-digit CSPC as-is. For non-numeric / already-namespaced formats, return as-is. +function normalizeWillowGqlSku(rawSku) { + const s = String(rawSku || "").trim(); + if (!s) return ""; + const cspc = normalizeCspc(s); + if (cspc) return cspc; // 6-digit wins + if (/^id:/i.test(s) || /^upc:/i.test(s) || /^u:/i.test(s)) return s; + if (/^\d+$/.test(s)) return `id:${s}`; + return s; +} + + +async function willowFetchSkuByPid(ctx, pid) { + const id = String(pid || "").trim(); + if (!id) return ""; + + if (!ctx._willowSkuCacheByPid) ctx._willowSkuCacheByPid = new Map(); + if (ctx._willowSkuCacheByPid.has(id)) return ctx._willowSkuCacheByPid.get(id); + + const gid = `gid://shopify/Product/${id}`; + let sku = ""; + + try { + const r = await willowGql(ctx, `willow:gql:pid:${id}`, PRODUCT_BY_ID_QUERY, { id: gid }); + if (r?.status === 200) sku = normalizeWillowGqlSku(pickBestVariantSku(r?.json?.data?.product)); + } catch { + sku = ""; + } + + ctx._willowSkuCacheByPid.set(id, sku); + return sku; +} + +/** + * Second-pass repair: if SKU is missing/synthetic, use Storefront GQL by product id. + * Budgeted to avoid exploding requests. + */ +async function willowRepairDiscoveredItems(ctx, discovered, prevDb) { + const budget = Number.isFinite(ctx?.config?.willowparkGqlBudget) ? ctx.config.willowparkGqlBudget : 200; + let used = 0; + + for (const [url, it] of discovered.entries()) { + if (!it) continue; + + // Seed from prev DB so we don't repair repeatedly if we already learned a good SKU. + const prev = prevDb?.byUrl?.get(url); + if (prev) it.sku = pickBetterSku(it.sku, prev.sku); + + if (!needsSkuDetail(it.sku)) continue; + if (used >= budget) break; + + const repaired = await willowFetchSkuByPid(ctx, it.pid); + if (repaired) it.sku = pickBetterSku(repaired, it.sku); + + discovered.set(url, it); + used++; + } + + ctx.logger.ok(`${ctx.catPrefixOut} | Willow SKU repair (GQL): used=${used}/${budget}`); +} + function createStore(defaultUa) { return { key: "willowpark", @@ -131,6 +274,9 @@ function createStore(defaultUa) { makePageUrl: makePageUrlShopifyQueryPage, isEmptyListingPage: willowIsEmptyListingPage, + // Hook called by scanner (add 1-line call in scanner before merge) + repairDiscoveredItems: willowRepairDiscoveredItems, + categories: [ { key: "scotch", diff --git a/src/tracker/category_scan.js b/src/tracker/category_scan.js index 6f814f9..309cb8e 100644 --- a/src/tracker/category_scan.js +++ b/src/tracker/category_scan.js @@ -343,6 +343,10 @@ async function discoverAndScanCategory(ctx, prevDb, report) { } } + if (typeof ctx.store.repairDiscoveredItems === "function") { + await ctx.store.repairDiscoveredItems(ctx, discovered, prevDb); + } + logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}${dups ? ` (${dups} dups)` : ""}`); const { merged, newItems, updatedItems, removedItems, restoredItems, metaChangedItems } =