mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-04-27 15:07:43 +00:00
fix: Prices for gull
This commit is contained in:
parent
6e21757956
commit
1a1267eba6
1 changed files with 65 additions and 29 deletions
|
|
@ -2,25 +2,57 @@
|
||||||
|
|
||||||
const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html");
|
const { decodeHtml, cleanText, extractFirstImgUrl } = require("../utils/html");
|
||||||
const { normalizeCspc } = require("../utils/sku");
|
const { normalizeCspc } = require("../utils/sku");
|
||||||
const { extractPriceFromTmbBlock } = require("../utils/woocommerce");
|
|
||||||
const { makePageUrl } = require("../utils/url");
|
const { makePageUrl } = require("../utils/url");
|
||||||
|
|
||||||
function looksInStock(block) {
|
function looksInStock(block) {
|
||||||
const s = String(block || "");
|
const s = String(block || "");
|
||||||
if (/\boutofstock\b/i.test(s)) return false;
|
if (/\boutofstock\b/i.test(s)) return false;
|
||||||
// your sample has: <p class="stock in-stock">1 in stock</p>
|
|
||||||
if (/\bin-stock\b/i.test(s)) return true;
|
if (/\bin-stock\b/i.test(s)) return true;
|
||||||
if (/\binstock\b/i.test(s)) return true;
|
if (/\binstock\b/i.test(s)) return true;
|
||||||
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
|
if (/>\s*\d+\s+in\s+stock\s*</i.test(s)) return true;
|
||||||
return /\bin-stock\b/i.test(s);
|
return /\bin-stock\b/i.test(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gull product tiles commonly contain two amounts:
|
||||||
|
// - actual price (e.g. 24.05)
|
||||||
|
// - deposit (e.g. 0.10) inside the "price suffix"
|
||||||
|
// We extract all amounts and pick the last one >= 1.00 (sale price if present).
|
||||||
|
function extractGullPriceFromBlock(block) {
|
||||||
|
const s = String(block || "");
|
||||||
|
const nums = [];
|
||||||
|
|
||||||
|
// Match WooCommerce "Price amount" blocks, pull out the BDI contents,
|
||||||
|
// then strip tags/entities and parse as float.
|
||||||
|
const re =
|
||||||
|
/<span\b[^>]*class=["'][^"']*\bwoocommerce-Price-amount\b[^"']*["'][^>]*>\s*<bdi\b[^>]*>([\s\S]*?)<\/bdi>/gi;
|
||||||
|
|
||||||
|
for (const m of s.matchAll(re)) {
|
||||||
|
const raw = cleanText(decodeHtml(m[1] || "")); // e.g. "$24.05"
|
||||||
|
const n = parseFloat(String(raw).replace(/[^0-9.]/g, ""));
|
||||||
|
if (Number.isFinite(n)) nums.push(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out bottle deposits / tiny fees (usually 0.10, 0.20, etc.)
|
||||||
|
const big = nums.filter((n) => n >= 1.0);
|
||||||
|
|
||||||
|
if (!big.length) return "";
|
||||||
|
|
||||||
|
// If sale price exists, Woo often renders old then new; taking the last >=1
|
||||||
|
// typically yields the current price.
|
||||||
|
const chosen = big[big.length - 1];
|
||||||
|
|
||||||
|
// Normalize formatting
|
||||||
|
return `$${chosen.toFixed(2)}`;
|
||||||
|
}
|
||||||
|
|
||||||
function parseProductsGull(html, ctx) {
|
function parseProductsGull(html, ctx) {
|
||||||
const s = String(html || "");
|
const s = String(html || "");
|
||||||
const items = [];
|
const items = [];
|
||||||
|
|
||||||
// split on <li class="product ...">
|
// split on <li class="product ...">
|
||||||
const parts = s.split(/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i);
|
const parts = s.split(
|
||||||
|
/<li\b[^>]*class=["'][^"']*\bproduct\b[^"']*["'][^>]*>/i
|
||||||
|
);
|
||||||
if (parts.length <= 1) return items;
|
if (parts.length <= 1) return items;
|
||||||
|
|
||||||
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
|
const base = `https://${(ctx && ctx.store && ctx.store.host) || "gullliquorstore.com"}/`;
|
||||||
|
|
@ -30,7 +62,9 @@ function parseProductsGull(html, ctx) {
|
||||||
|
|
||||||
if (!looksInStock(block)) continue;
|
if (!looksInStock(block)) continue;
|
||||||
|
|
||||||
const hrefM = block.match(/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i);
|
const hrefM = block.match(
|
||||||
|
/<a\b[^>]*href=["']([^"']+)["'][^>]*class=["'][^"']*\bwoocommerce-LoopProduct-link\b/i
|
||||||
|
);
|
||||||
if (!hrefM || !hrefM[1]) continue;
|
if (!hrefM || !hrefM[1]) continue;
|
||||||
|
|
||||||
let url;
|
let url;
|
||||||
|
|
@ -40,17 +74,18 @@ function parseProductsGull(html, ctx) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const titleM = block.match(/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i);
|
const titleM = block.match(
|
||||||
|
/<h2\b[^>]*class=["'][^"']*\bwoocommerce-loop-product__title\b[^"']*["'][^>]*>([\s\S]*?)<\/h2>/i
|
||||||
|
);
|
||||||
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
const name = cleanText(decodeHtml(titleM ? titleM[1] : ""));
|
||||||
if (!name) continue;
|
if (!name) continue;
|
||||||
|
|
||||||
// Price is in standard Woo <span class="price"> ... </span>
|
const price = extractGullPriceFromBlock(block);
|
||||||
const price = extractPriceFromTmbBlock(block) || "";
|
|
||||||
|
|
||||||
const sku = normalizeCspc(
|
const sku = normalizeCspc(
|
||||||
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
block.match(/\bdata-product_sku=["']([^"']+)["']/i)?.[1] ||
|
||||||
block.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i)?.[1] ||
|
block.match(/\bSKU\b[^0-9]{0,20}(\d{6})\b/i)?.[1] ||
|
||||||
url
|
url
|
||||||
);
|
);
|
||||||
|
|
||||||
const img = extractFirstImgUrl(block, base);
|
const img = extractFirstImgUrl(block, base);
|
||||||
|
|
@ -63,6 +98,7 @@ function parseProductsGull(html, ctx) {
|
||||||
return [...uniq.values()];
|
return [...uniq.values()];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function createStore(defaultUa) {
|
function createStore(defaultUa) {
|
||||||
return {
|
return {
|
||||||
key: "gull",
|
key: "gull",
|
||||||
|
|
@ -72,26 +108,26 @@ function createStore(defaultUa) {
|
||||||
parseProducts: parseProductsGull,
|
parseProducts: parseProductsGull,
|
||||||
makePageUrl, // enables /page/N/ paging
|
makePageUrl, // enables /page/N/ paging
|
||||||
categories: [
|
categories: [
|
||||||
{
|
{
|
||||||
key: "whisky",
|
key: "whisky",
|
||||||
label: "Whisky",
|
label: "Whisky",
|
||||||
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
|
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=whisky",
|
||||||
discoveryStartPage: 3,
|
discoveryStartPage: 3,
|
||||||
discoveryStep: 2,
|
discoveryStep: 2,
|
||||||
pageConcurrency: 1,
|
pageConcurrency: 1,
|
||||||
pageStaggerMs: 10000,
|
pageStaggerMs: 10000,
|
||||||
discoveryDelayMs: 10000
|
discoveryDelayMs: 10000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "rum",
|
key: "rum",
|
||||||
label: "Rum",
|
label: "Rum",
|
||||||
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
|
startUrl: "https://gullliquorstore.com/product-category/spirits/?spirit_type=rum",
|
||||||
discoveryStartPage: 3,
|
discoveryStartPage: 3,
|
||||||
discoveryStep: 2,
|
discoveryStep: 2,
|
||||||
pageConcurrency: 1,
|
pageConcurrency: 1,
|
||||||
pageStaggerMs: 10000,
|
pageStaggerMs: 10000,
|
||||||
discoveryDelayMs: 10000
|
discoveryDelayMs: 10000,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue