diff --git a/src/stores/index.js b/src/stores/index.js index a7547f0..eee31cc 100644 --- a/src/stores/index.js +++ b/src/stores/index.js @@ -13,6 +13,7 @@ const { createStore: createGull } = require("./gull"); const { createStore: createCoop } = require("./coop"); const { createStore: createTudor } = require("./tudor"); const { createStore: createVintage } = require("./vintagespirits"); +const { createStore: createVessel } = require("./vessel"); function createStores({ defaultUa } = {}) { return [ @@ -27,6 +28,7 @@ function createStores({ defaultUa } = {}) { createTudor(defaultUa), createMaltsAndGrains(defaultUa), createBCL(defaultUa), + createVessel(defaultUa), createLegacy(defaultUa), createVintage(defaultUa), ]; diff --git a/src/stores/vessel.js b/src/stores/vessel.js new file mode 100644 index 0000000..4c725fc --- /dev/null +++ b/src/stores/vessel.js @@ -0,0 +1,134 @@ +"use strict"; + +const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html"); +const { normalizeCspc } = require("../utils/sku"); +const { normalizeBaseUrl } = require("../utils/url"); + +function normalizeAbsUrl(raw) { + const s = String(raw || "").trim(); + if (!s) return ""; + if (s.startsWith("//")) return `https:${s}`; + if (/^https?:\/\//i.test(s)) return s; + try { + return new URL(s, "https://vesselliquor.com/").toString(); + } catch { + return s; + } +} + +function makeVesselPageUrl(baseUrl, pageNum) { + const u = new URL(normalizeBaseUrl(baseUrl)); + u.hash = ""; + if (pageNum <= 1) u.searchParams.delete("page"); + else u.searchParams.set("page", String(pageNum)); + u.search = u.searchParams.toString() ? `?${u.searchParams.toString()}` : ""; + return u.toString(); +} + +function vesselLooksInStock(block) { + const s = String(block || "").toLowerCase(); + if (s.includes("sold out") || s.includes("sold-out") || s.includes("out of stock")) return false; + if (/\bdata-available=["']false["']/.test(s)) return false; + return true; +} + +function vesselExtractPrice(block) { + const s = String(block || ""); + + const saleTags = [...s.matchAll(/]*>([\s\S]*?)<\/sale-price>/gi)]; + for (let i = saleTags.length - 1; i >= 0; i--) { + const txt = cleanText(decodeHtml(saleTags[i][1] || "")); + const m = txt.match(/\$\s*\d+(?:\.\d{2})?/); + if (m) return m[0].replace(/\s+/g, ""); + } + + // Fallback: read price-list but ignore compare-at (crossed-out) + const withoutCompare = s.replace(/]*>[\s\S]*?<\/compare-at-price>/gi, ""); + const pl = withoutCompare.match(/]*>([\s\S]*?)<\/price-list>/i); + if (pl) { + const txt = cleanText(decodeHtml(pl[1] || "")); + const all = [...txt.matchAll(/\$\s*\d+(?:\.\d{2})?/g)]; + if (all.length) return all[all.length - 1][0].replace(/\s+/g, ""); + } + + return ""; +} + +function vesselCardToItem(block, base) { + if (!vesselLooksInStock(block)) return null; + + const hrefM = block.match(/]*href=["']([^"']+)["'][^>]*>/i); + if (!hrefM || !hrefM[1]) return null; + + let url = ""; + try { + url = new URL(decodeHtml(hrefM[1]), base).toString(); + } catch { + return null; + } + + const titleM = + block.match(/product-card__title[\s\S]*?]*>([\s\S]*?)<\/a>/i) || + block.match(/]*\balt=["']([^"']+)["']/i); + + const name = cleanText(decodeHtml(titleM ? titleM[1] : "")); + if (!name) return null; + + const img = normalizeAbsUrl(extractFirstImgUrl(block, base)); + const price = vesselExtractPrice(block); + + // Try to pull a 6-digit SKU (often their image filenames are CSPC-like). + const sku = normalizeCspc(img) || ""; + + return { name, price, url, sku, img }; +} + +function parseProductsVessel(html, ctx) { + const s = String(html || ""); + const base = `https://${(ctx && ctx.store && ctx.store.host) || "vesselliquor.com"}/`; + + const parts = s.split(/ max) max = n; } + // Shopify: ?page=23 + for (const m of s.matchAll(/href=["'][^"']*[?&]page=(\d+)[^"']*["']/gi)) { + const n = Number(m[1]); + if (Number.isFinite(n) && n > max) max = n; + } + // Sometimes themes render plain numbers without /page/ in href; keep it conservative: // Only trust these if we already found at least one pagination-ish token. if (max > 1) return max; @@ -236,11 +242,23 @@ async function discoverTotalPagesFast(ctx, baseUrl, guess, step) { } const extracted = extractTotalPagesFromPaginationHtml(html1); - if (extracted && extracted >= 1) { - ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`); - return extracted; - } + // Shopify collections with filters often lie about pagination. + // If page 1 looks full, don't trust a tiny extracted count. + if (extracted && extracted >= 1) { + const looksTruncated = + extracted <= 2 && items1 >= 40; // Shopify default page size ≈ 48 + + if (!looksTruncated) { + ctx.logger.ok(`${ctx.catPrefixOut} | Total pages (from pagination): ${extracted}`); + return extracted; + } + + ctx.logger.warn( + `${ctx.catPrefixOut} | Pagination says ${extracted} but page looks full; falling back to probe` + ); + } + // Fallback to probing if pagination parse fails const g = Math.max(2, guess); const pg = await probePage(ctx, baseUrl, g, state);