mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
feat: Support for willow park
This commit is contained in:
parent
50b8d143fb
commit
23da22f74a
3 changed files with 149 additions and 1 deletions
|
|
@ -14,6 +14,7 @@ const { createStore: createCoop } = require("./coop");
|
||||||
const { createStore: createTudor } = require("./tudor");
|
const { createStore: createTudor } = require("./tudor");
|
||||||
const { createStore: createVintage } = require("./vintagespirits");
|
const { createStore: createVintage } = require("./vintagespirits");
|
||||||
const { createStore: createVessel } = require("./vessel");
|
const { createStore: createVessel } = require("./vessel");
|
||||||
|
const { createStore: createWillowPark } = require("./willowpark");
|
||||||
|
|
||||||
function createStores({ defaultUa } = {}) {
|
function createStores({ defaultUa } = {}) {
|
||||||
return [
|
return [
|
||||||
|
|
@ -28,6 +29,7 @@ function createStores({ defaultUa } = {}) {
|
||||||
createTudor(defaultUa),
|
createTudor(defaultUa),
|
||||||
createMaltsAndGrains(defaultUa),
|
createMaltsAndGrains(defaultUa),
|
||||||
createBCL(defaultUa),
|
createBCL(defaultUa),
|
||||||
|
createWillowPark(defaultUa),
|
||||||
createVessel(defaultUa),
|
createVessel(defaultUa),
|
||||||
createLegacy(defaultUa),
|
createLegacy(defaultUa),
|
||||||
createVintage(defaultUa),
|
createVintage(defaultUa),
|
||||||
|
|
|
||||||
146
src/stores/willowpark.js
Normal file
146
src/stores/willowpark.js
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
// src/stores/willowpark.js
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
const { decodeHtml, stripTags, extractFirstImgUrl, cleanText } = require("../utils/html");
|
||||||
|
const { makePageUrlShopifyQueryPage } = require("../utils/url");
|
||||||
|
function extractSkuFromUrlOrHref(hrefOrUrl) {
|
||||||
|
const s = String(hrefOrUrl || "");
|
||||||
|
// Common Willow patterns:
|
||||||
|
// /products/<handle>-123456
|
||||||
|
// /collections/rum/products/<handle>-123456
|
||||||
|
// Also sometimes querystring fragments etc.
|
||||||
|
const m = s.match(/-(\d{6})(?:\/)?(?:[?#].*)?$/);
|
||||||
|
return m ? m[1] : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function canonicalizeWillowUrl(raw) {
|
||||||
|
try {
|
||||||
|
const u = new URL(String(raw));
|
||||||
|
u.search = "";
|
||||||
|
u.hash = "";
|
||||||
|
const m = u.pathname.match(/^\/collections\/[^/]+\/products\/([^/]+)\/?$/i);
|
||||||
|
if (m) u.pathname = `/products/${m[1]}`;
|
||||||
|
return u.toString();
|
||||||
|
} catch {
|
||||||
|
return String(raw || "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefer exact decimal from visually-hidden spans.
|
||||||
|
// Fallback: reconstruct from $39<sup>99</sup>.
|
||||||
|
function extractWillowCardPrice(block) {
|
||||||
|
const b = String(block || "");
|
||||||
|
|
||||||
|
const current =
|
||||||
|
b.match(
|
||||||
|
/grid-product__price--current[\s\S]*?<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i
|
||||||
|
)?.[1] ||
|
||||||
|
b.match(/<span\b[^>]*class=["']visually-hidden["'][^>]*>\s*(\$\s*[\d,]+\.\d{2})\s*<\/span>/i)?.[1];
|
||||||
|
|
||||||
|
if (current) return current.replace(/\s+/g, "");
|
||||||
|
|
||||||
|
const sup = b.match(/\$\s*([\d,]+)\s*<sup>\s*(\d{2})\s*<\/sup>/i);
|
||||||
|
if (sup) return `$${sup[1].replace(/,/g, "")}.${sup[2]}`;
|
||||||
|
|
||||||
|
const any = b.match(/\$\s*[\d,]+(?:\.\d{2})?/);
|
||||||
|
return any ? any[0].replace(/\s+/g, "") : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseProductsWillowPark(html, ctx, finalUrl) {
|
||||||
|
const s = String(html || "");
|
||||||
|
const items = [];
|
||||||
|
|
||||||
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "www.willowpark.net"}/`;
|
||||||
|
|
||||||
|
// Find start offsets of each product tile.
|
||||||
|
// This ignores <div class="grid-anchor" ...> nodes safely.
|
||||||
|
const starts = [...s.matchAll(/<div\b[^>]*class=["'][^"']*\bgrid-item\b[^"']*\bgrid-product\b[^"']*["'][^>]*>/gi)]
|
||||||
|
.map((m) => m.index)
|
||||||
|
.filter((i) => typeof i === "number");
|
||||||
|
|
||||||
|
// Slice into blocks from each start to the next start.
|
||||||
|
// Robust to varying nesting/closing div counts.
|
||||||
|
const blocks = [];
|
||||||
|
for (let i = 0; i < starts.length; i++) {
|
||||||
|
const a = starts[i];
|
||||||
|
const b = i + 1 < starts.length ? starts[i + 1] : s.length;
|
||||||
|
blocks.push(s.slice(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const block of blocks) {
|
||||||
|
// Do NOT skip sold-out by badge; badge can exist but be display:none.
|
||||||
|
// Availability filtering should be done via URL query (?filter.v.availability=1).
|
||||||
|
|
||||||
|
const href =
|
||||||
|
block.match(/<a\b[^>]*href=["']([^"']*\/collections\/[^"']*\/products\/[^"']+)["']/i)?.[1] ||
|
||||||
|
block.match(/<a\b[^>]*href=["']([^"']*\/products\/[^"']+)["']/i)?.[1];
|
||||||
|
if (!href) continue;
|
||||||
|
|
||||||
|
let url;
|
||||||
|
try {
|
||||||
|
url = new URL(decodeHtml(href), base).toString();
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
url = canonicalizeWillowUrl(url);
|
||||||
|
|
||||||
|
const titleHtml =
|
||||||
|
block.match(/<div\b[^>]*class=["'][^"']*\bgrid-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/div>/i)?.[1] || "";
|
||||||
|
const name = cleanText(decodeHtml(stripTags(titleHtml)));
|
||||||
|
if (!name) continue;
|
||||||
|
|
||||||
|
const price = extractWillowCardPrice(block);
|
||||||
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
||||||
|
// Some pages include data-product-id on the tile; useful but optional.
|
||||||
|
const pid = block.match(/\bdata-product-id=["'](\d+)["']/i)?.[1] || "";
|
||||||
|
|
||||||
|
const sku = extractSkuFromUrlOrHref(href) || extractSkuFromUrlOrHref(url);
|
||||||
|
|
||||||
|
items.push({ name, price, url, sku, img, pid });
|
||||||
|
}
|
||||||
|
|
||||||
|
// De-dupe by canonical URL (same product can appear multiple times).
|
||||||
|
const uniq = new Map();
|
||||||
|
for (const it of items) uniq.set(it.url, it);
|
||||||
|
return [...uniq.values()];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helps discovery + scanning stop when paging past inventory.
|
||||||
|
function willowIsEmptyListingPage(html) {
|
||||||
|
const s = String(html || "");
|
||||||
|
if (/Sorry,\s+there are no products in this collection\./i.test(s)) return true;
|
||||||
|
if (/No products found/i.test(s)) return true;
|
||||||
|
if (/collection--empty\b/i.test(s)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function createStore(defaultUa) {
|
||||||
|
return {
|
||||||
|
key: "willowpark",
|
||||||
|
name: "Willow Park",
|
||||||
|
host: "www.willowpark.net",
|
||||||
|
ua: defaultUa,
|
||||||
|
|
||||||
|
parseProducts: parseProductsWillowPark,
|
||||||
|
makePageUrl: makePageUrlShopifyQueryPage,
|
||||||
|
isEmptyListingPage: willowIsEmptyListingPage,
|
||||||
|
|
||||||
|
categories: [
|
||||||
|
{
|
||||||
|
key: "whisky-whiskey",
|
||||||
|
label: "Whisky / Whiskey",
|
||||||
|
startUrl: "https://www.willowpark.net/collections/whisky-whiskey?filter.v.availability=1",
|
||||||
|
discoveryStartPage: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "rum",
|
||||||
|
label: "Rum",
|
||||||
|
startUrl: "https://www.willowpark.net/collections/rum?filter.v.availability=1",
|
||||||
|
discoveryStartPage: 3,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { createStore, parseProductsWillowPark };
|
||||||
|
|
@ -261,7 +261,7 @@ function buildMappedSkuSet(links, rules) {
|
||||||
|
|
||||||
function isBCStoreLabel(label) {
|
function isBCStoreLabel(label) {
|
||||||
const s = String(label || "").toLowerCase();
|
const s = String(label || "").toLowerCase();
|
||||||
return s.includes("bcl") || s.includes("strath") || s.includes("gull") || s.includes("legacy") || s.includes("tudor");
|
return s.includes("bcl") || s.includes("strath") || s.includes("gull") || s.includes("legacy") || s.includes("tudor") ||s.includes("vessel") ||s.includes("vintagespirits");
|
||||||
}
|
}
|
||||||
|
|
||||||
function skuIsBC(allRows, skuKey) {
|
function skuIsBC(allRows, skuKey) {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue