From 23da22f74a19d72b860a7d58d0af540022b6db85 Mon Sep 17 00:00:00 2001 From: "Brennan Wilkes (Text Groove)" Date: Thu, 29 Jan 2026 23:26:40 -0800 Subject: [PATCH] feat: Support for willow park --- src/stores/index.js | 2 + src/stores/willowpark.js | 146 +++++++++++++++++++++++++++++++++++++++ viz/app/linker_page.js | 2 +- 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/stores/willowpark.js diff --git a/src/stores/index.js b/src/stores/index.js index eee31cc..3ee5aa5 100644 --- a/src/stores/index.js +++ b/src/stores/index.js @@ -14,6 +14,7 @@ const { createStore: createCoop } = require("./coop"); const { createStore: createTudor } = require("./tudor"); const { createStore: createVintage } = require("./vintagespirits"); const { createStore: createVessel } = require("./vessel"); +const { createStore: createWillowPark } = require("./willowpark"); function createStores({ defaultUa } = {}) { return [ @@ -28,6 +29,7 @@ function createStores({ defaultUa } = {}) { createTudor(defaultUa), createMaltsAndGrains(defaultUa), createBCL(defaultUa), + createWillowPark(defaultUa), createVessel(defaultUa), createLegacy(defaultUa), createVintage(defaultUa), diff --git a/src/stores/willowpark.js b/src/stores/willowpark.js new file mode 100644 index 0000000..f018a77 --- /dev/null +++ b/src/stores/willowpark.js @@ -0,0 +1,146 @@ +// src/stores/willowpark.js +"use strict"; + +const { decodeHtml, stripTags, extractFirstImgUrl, cleanText } = require("../utils/html"); +const { makePageUrlShopifyQueryPage } = require("../utils/url"); +function extractSkuFromUrlOrHref(hrefOrUrl) { + const s = String(hrefOrUrl || ""); + // Common Willow patterns: + // /products/-123456 + // /collections/rum/products/-123456 + // Also sometimes querystring fragments etc. + const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/); + return m ? m[1] : ""; + } + +function canonicalizeWillowUrl(raw) { + try { + const u = new URL(String(raw)); + u.search = ""; + u.hash = ""; + const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i); + if (m) u.pathname = `/products/${m[1]}`; + return u.toString(); + } catch { + return String(raw || ""); + } +} + +// Prefer exact decimal from visually-hidden spans. +// Fallback: reconstruct from $3999. +function extractWillowCardPrice(block) { + const b = String(block || ""); + + const current = + b.match( + /grid-product__price--current[\s\S]*?]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i + )?.[1] || + b.match(/]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1]; + + if (current) return current.replace(/\s+/g, ""); + + const sup = b.match(/\$\s*([\d,]+)\s*\s*(\d{2})\s*<\/sup>/i); + if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`; + + const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/); + return any ? any[0].replace(/\s+/g, "") : ""; +} + +function parseProductsWillowPark(html, ctx, finalUrl) { + const s = String(html || ""); + const items = []; + + const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`; + + // Find start offsets of each product tile. + // This ignores
nodes safely. + const starts = [...s.matchAll(/]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)] + .map((m) => m.index) + .filter((i) => typeof i === "number"); + + // Slice into blocks from each start to the next start. + // Robust to varying nesting/closing div counts. + const blocks = []; + for (let i = 0; i < starts.length; i++) { + const a = starts[i]; + const b = i + 1 < starts.length ? starts[i + 1] : s.length; + blocks.push(s.slice(a, b)); + } + + for (const block of blocks) { + // Do NOT skip sold-out by badge; badge can exist but be display:none. + // Availability filtering should be done via URL query (?filter.v.availability=1). + + const href = + block.match(/]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] || + block.match(/]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1]; + if (!href) continue; + + let url; + try { + url = new URL(decodeHtml(href), base).toString(); + } catch { + continue; + } + url = canonicalizeWillowUrl(url); + + const titleHtml = + block.match(/]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] || ""; + const name = cleanText(decodeHtml(stripTags(titleHtml))); + if (!name) continue; + + const price = extractWillowCardPrice(block); + const img = extractFirstImgUrl(block, base); + + // Some pages include data-product-id on the tile; useful but optional. + const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || ""; + + const sku = extractSkuFromUrlOrHref(href) || extractSkuFromUrlOrHref(url); + + items.push({ name, price, url, sku, img, pid }); + } + + // De-dupe by canonical URL (same product can appear multiple times). + const uniq = new Map(); + for (const it of items) uniq.set(it.url, it); + return [...uniq.values()]; +} + +// Helps discovery + scanning stop when paging past inventory. +function willowIsEmptyListingPage(html) { + const s = String(html || ""); + if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true; + if (/No products found/i.test(s)) return true; + if (/collection--empty\b/i.test(s)) return true; + return false; +} + +function createStore(defaultUa) { + return { + key: "willowpark", + name: "Willow Park", + host: "www.willowpark.net", + ua: defaultUa, + + parseProducts: parseProductsWillowPark, + makePageUrl: makePageUrlShopifyQueryPage, + isEmptyListingPage: willowIsEmptyListingPage, + + categories: [ + { + key: "whisky-whiskey", + label: "Whisky / Whiskey", + startUrl: "https://www.willowpark.net/collections/whisky-whiskey?filter.v.availability=1", + discoveryStartPage: 5, + }, + { + key: "rum", + label: "Rum", + startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1", + discoveryStartPage: 3, + }, + ], + }; +} + +module.exports = { createStore, parseProductsWillowPark }; diff --git a/viz/app/linker_page.js b/viz/app/linker_page.js index 8ecb9f2..a15a913 100644 --- a/viz/app/linker_page.js +++ b/viz/app/linker_page.js @@ -261,7 +261,7 @@ function buildMappedSkuSet(links, rules) { function isBCStoreLabel(label) { const s = String(label || "").toLowerCase(); - return s.includes("bcl") || s.includes("strath") || s.includes("gull") || s.includes("legacy") || s.includes("tudor"); + return s.includes("bcl") || s.includes("strath") || s.includes("gull") || s.includes("legacy") || s.includes("tudor") ||s.includes("vessel") ||s.includes("vintagespirits"); } function skuIsBC(allRows, skuKey) {