mirror of
https://github.com/samsonjs/spirit-tracker.git
synced 2026-03-25 09:25:51 +00:00
feat: Slow down
This commit is contained in:
parent
7ff1bde8bd
commit
d53b68c3a3
2 changed files with 133 additions and 47 deletions
|
|
@ -1,3 +1,4 @@
|
|||
// src/core/http.js
|
||||
"use strict";
|
||||
|
||||
const { setTimeout: sleep } = require("timers/promises");
|
||||
|
|
@ -134,29 +135,70 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
|
||||
// host -> epoch ms when next request is allowed
|
||||
const hostNextOkAt = new Map();
|
||||
const minHostIntervalMs = 900;
|
||||
|
||||
// Conservative pacing defaults (slow > blocked)
|
||||
const minHostIntervalMs = 2500;
|
||||
|
||||
// Per-host inflight clamp (prevents bursts when global concurrency is high)
|
||||
const hostInflight = new Map();
|
||||
const maxHostInflight = 1;
|
||||
|
||||
function inflightStr() {
|
||||
return `inflight=${inflight}`;
|
||||
}
|
||||
|
||||
async function acquireHost(url) {
|
||||
const host = hostFromUrl(url);
|
||||
if (!host) return () => {};
|
||||
|
||||
while (true) {
|
||||
const cur = hostInflight.get(host) || 0;
|
||||
if (cur < maxHostInflight) {
|
||||
hostInflight.set(host, cur + 1);
|
||||
return () => {
|
||||
const n = (hostInflight.get(host) || 1) - 1;
|
||||
if (n <= 0) hostInflight.delete(host);
|
||||
else hostInflight.set(host, n);
|
||||
};
|
||||
}
|
||||
await sleep(50);
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ Pre-pacing reservation: reserve the next slot BEFORE the fetch is sent
|
||||
async function throttleHost(url) {
|
||||
const host = hostFromUrl(url);
|
||||
if (!host) return;
|
||||
const now = Date.now();
|
||||
const next = hostNextOkAt.get(host) || 0;
|
||||
if (next > now) {
|
||||
logger?.dbg?.(`THROTTLE host=${host} wait=${next - now}ms`);
|
||||
await sleep(next - now);
|
||||
|
||||
while (true) {
|
||||
const now = Date.now();
|
||||
const next = hostNextOkAt.get(host) || 0;
|
||||
const wait = next - now;
|
||||
|
||||
if (wait > 0) {
|
||||
logger?.dbg?.(`THROTTLE host=${host} wait=${wait}ms`);
|
||||
await sleep(wait);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reserve immediately to prevent concurrent pass-through
|
||||
hostNextOkAt.set(host, now + minHostIntervalMs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
function noteHost(url, extraDelayMs = 0) {
|
||||
const host = hostFromUrl(url);
|
||||
if (!host) return;
|
||||
const until = Date.now() + minHostIntervalMs + extraDelayMs;
|
||||
hostNextOkAt.set(host, until);
|
||||
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${until - Date.now()}ms`);
|
||||
|
||||
const now = Date.now();
|
||||
const current = hostNextOkAt.get(host) || 0;
|
||||
|
||||
// Extend (never shorten) any existing cooldown
|
||||
const target = now + minHostIntervalMs + Math.max(0, extraDelayMs);
|
||||
hostNextOkAt.set(host, Math.max(current, target));
|
||||
|
||||
logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${Math.max(0, (hostNextOkAt.get(host) || 0) - Date.now())}ms`);
|
||||
}
|
||||
|
||||
async function fetchWithRetry(
|
||||
|
|
@ -170,9 +212,9 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
const start = Date.now();
|
||||
|
||||
inflight++;
|
||||
logger?.dbg?.(
|
||||
`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`
|
||||
);
|
||||
logger?.dbg?.(`REQ#${reqId} START ${tag} attempt=${attempt + 1}/${maxRetries + 1} ${url} (${inflightStr()})`);
|
||||
|
||||
const releaseHost = await acquireHost(url);
|
||||
|
||||
try {
|
||||
await throttleHost(url);
|
||||
|
|
@ -181,11 +223,7 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
const t = setTimeoutCb(() => ctrl.abort(), timeoutMs);
|
||||
|
||||
const cookieHdr =
|
||||
cookies &&
|
||||
!("Cookie" in headers) &&
|
||||
!("cookie" in headers)
|
||||
? cookieJar.cookieHeaderFor(url)
|
||||
: "";
|
||||
cookies && !("Cookie" in headers) && !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : "";
|
||||
|
||||
const res = await fetch(url, {
|
||||
method,
|
||||
|
|
@ -207,20 +245,20 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
const finalUrl = res.url || url;
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
// Always pace the host a bit after any response
|
||||
noteHost(finalUrl);
|
||||
if (cookies) cookieJar.storeFromResponse(url, res);
|
||||
|
||||
logger?.dbg?.(
|
||||
`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`
|
||||
);
|
||||
logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}`);
|
||||
|
||||
if (status === 429) {
|
||||
const raMs = retryAfterMs(res);
|
||||
if (raMs > 0) noteHost(finalUrl, raMs);
|
||||
let raMs = retryAfterMs(res);
|
||||
|
||||
logger?.dbg?.(
|
||||
`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`
|
||||
);
|
||||
// ✅ If no Retry-After header, enforce a real cooldown (Shopify often omits it)
|
||||
if (raMs <= 0) raMs = 15000 + Math.floor(Math.random() * 5000);
|
||||
|
||||
noteHost(finalUrl, raMs);
|
||||
logger?.dbg?.(`REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}`);
|
||||
throw new RetryableError("HTTP 429");
|
||||
}
|
||||
|
||||
|
|
@ -231,9 +269,7 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
if (status >= 400) {
|
||||
const bodyTxt = await safeText(res);
|
||||
throw new Error(
|
||||
`HTTP ${status} bodyHead=${String(bodyTxt)
|
||||
.slice(0, 160)
|
||||
.replace(/\s+/g, " ")}`
|
||||
`HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}`
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -274,6 +310,7 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) {
|
|||
logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`);
|
||||
await sleep(delay);
|
||||
} finally {
|
||||
releaseHost();
|
||||
inflight--;
|
||||
logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
// src/stores/craftcellars.js
|
||||
"use strict";
|
||||
|
||||
const { setTimeout: sleep } = require("timers/promises");
|
||||
|
||||
const { decodeHtml, stripTags, extractFirstImgUrl } = require("../utils/html");
|
||||
const { sanitizeName } = require("../utils/text");
|
||||
const { normalizeCspc } = require("../utils/sku");
|
||||
|
|
@ -29,7 +32,8 @@ function canonicalizeCraftProductUrl(raw) {
|
|||
|
||||
function extractShopifyCardPrice(block) {
|
||||
const b = String(block || "");
|
||||
const dollars = (txt) => [...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
|
||||
const dollars = (txt) =>
|
||||
[...String(txt).matchAll(/\$\s*[\d,]+(?:\.\d{2})?/g)].map((m) => m[0].replace(/\s+/g, ""));
|
||||
|
||||
const saleRegion = b.split(/sale price/i)[1] || "";
|
||||
const saleD = dollars(saleRegion);
|
||||
|
|
@ -83,12 +87,8 @@ function parseProductsCraftCellarsInner(html, ctx) {
|
|||
url = canonicalizeCraftProductUrl(url);
|
||||
|
||||
const nameHtml =
|
||||
block.match(
|
||||
/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i
|
||||
)?.[1] ||
|
||||
block.match(
|
||||
/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i
|
||||
)?.[1] ||
|
||||
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>\s*<[^>]*>\s*([^<]{2,200}?)\s*</i)?.[1] ||
|
||||
block.match(/<h[23]\b[^>]*>[\s\S]*?<a\b[^>]*\/products\/[^"']+[^>]*>([\s\S]*?)<\/a>[\s\S]*?<\/h[23]>/i)?.[1] ||
|
||||
block.match(/<a\b[^>]*href=["'][^"']*\/products\/[^"']+["'][^>]*>([\s\S]*?)<\/a>/i)?.[1];
|
||||
|
||||
const name = sanitizeName(stripTags(decodeHtml(nameHtml || "")));
|
||||
|
|
@ -105,13 +105,16 @@ function parseProductsCraftCellarsInner(html, ctx) {
|
|||
return [...uniq.values()];
|
||||
}
|
||||
|
||||
|
||||
function usdFromShopifyPriceStr(s) {
|
||||
const n = Number(String(s || "").replace(/[^0-9.]/g, ""));
|
||||
if (!Number.isFinite(n)) return "";
|
||||
return `$${n.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||
}
|
||||
|
||||
function cfgNum(v, fallback) {
|
||||
return Number.isFinite(v) ? v : fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Craft Cellars:
|
||||
* - HTML listing with ?filter.v.availability=1 is the allowlist (prevents OOS leaking in)
|
||||
|
|
@ -120,6 +123,18 @@ function usdFromShopifyPriceStr(s) {
|
|||
async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
||||
const t0 = Date.now();
|
||||
|
||||
// Strongly prefer "slow and steady" to avoid 429s.
|
||||
// Use per-category knobs if present; otherwise default conservative.
|
||||
const perPageDelayMs = Math.max(
|
||||
0,
|
||||
cfgNum(ctx?.cat?.pageStaggerMs, cfgNum(ctx?.cat?.discoveryDelayMs, 0)) || 0
|
||||
) || 0;
|
||||
|
||||
const perJsonPageDelayMs = Math.max(
|
||||
0,
|
||||
cfgNum(ctx?.cat?.jsonPageDelayMs, perPageDelayMs)
|
||||
);
|
||||
|
||||
// 1) HTML scan: allowlist of in-stock listing URLs
|
||||
const htmlMap = new Map(); // url -> {name, price, url, img}
|
||||
|
||||
|
|
@ -128,6 +143,8 @@ async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
|||
let emptyStreak = 0;
|
||||
|
||||
for (let p = 1; p <= maxPages; p++) {
|
||||
if (p > 1 && perPageDelayMs > 0) await sleep(perPageDelayMs);
|
||||
|
||||
const pageUrl = makePageUrlShopifyQueryPage(ctx.cat.startUrl, p);
|
||||
const { text: html } = await ctx.http.fetchTextWithRetry(pageUrl, `craft:html:${ctx.cat.key}:p${p}`, ctx.store.ua);
|
||||
htmlPagesFetched++;
|
||||
|
|
@ -151,9 +168,7 @@ async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
|||
|
||||
// If HTML returns nothing, don't let JSON invent a category
|
||||
if (!htmlMap.size) {
|
||||
ctx.logger.warn(
|
||||
`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing to use products.json as source of truth.`
|
||||
);
|
||||
ctx.logger.warn(`${ctx.catPrefixOut} | HTML listing returned 0 items; refusing to use products.json as source of truth.`);
|
||||
}
|
||||
|
||||
// 2) JSON scan: build SKU index (but do NOT add new URLs from JSON)
|
||||
|
|
@ -170,6 +185,8 @@ async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
|||
let jsonPagesFetched = 0;
|
||||
|
||||
while (true) {
|
||||
if (jsonPage > 1 && perJsonPageDelayMs > 0) await sleep(perJsonPageDelayMs);
|
||||
|
||||
const url = `https://${ctx.store.host}/collections/${collectionHandle}/products.json?limit=${limit}&page=${jsonPage}`;
|
||||
const r = await ctx.http.fetchJsonWithRetry(url, `craft:coljson:${ctx.cat.key}:p${jsonPage}`, ctx.store.ua);
|
||||
|
||||
|
|
@ -240,7 +257,9 @@ async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
|||
|
||||
ctx.logger.ok(`${ctx.catPrefixOut} | Unique products (this run): ${discovered.size}`);
|
||||
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, { storeLabel: ctx.store.name });
|
||||
const { merged, newItems, updatedItems, removedItems, restoredItems } = mergeDiscoveredIntoDb(prevDb, discovered, {
|
||||
storeLabel: ctx.store.name,
|
||||
});
|
||||
|
||||
const dbObj = buildDbObject(ctx, merged);
|
||||
writeJsonAtomic(ctx.dbFile, dbObj);
|
||||
|
|
@ -271,7 +290,6 @@ async function scanCategoryCraftCellars(ctx, prevDb, report) {
|
|||
addCategoryResultToReport(report, ctx.store.name, ctx.cat.label, newItems, updatedItems, removedItems, restoredItems);
|
||||
}
|
||||
|
||||
|
||||
function createStore(defaultUa) {
|
||||
return {
|
||||
key: "craftcellars",
|
||||
|
|
@ -292,37 +310,68 @@ function createStore(defaultUa) {
|
|||
key: "whisky",
|
||||
label: "Whisky",
|
||||
startUrl: "https://craftcellars.ca/collections/whisky?filter.v.availability=1",
|
||||
discoveryStartPage: 10,
|
||||
|
||||
// slow-and-safe defaults (override globally if you want)
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
{
|
||||
key: "rum",
|
||||
label: "Rum",
|
||||
startUrl: "https://craftcellars.ca/collections/rum?filter.v.availability=1",
|
||||
discoveryStartPage: 5,
|
||||
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
{
|
||||
key: "single-malt-scotch",
|
||||
label: "Single Malt Scotch",
|
||||
startUrl: "https://craftcellars.ca/collections/single-malt-scotch?filter.v.availability=1",
|
||||
discoveryStartPage: 1,
|
||||
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
{
|
||||
key: "other-scotch-styles",
|
||||
label: "Other Scotch Styles",
|
||||
startUrl: "https://craftcellars.ca/collections/other-scotch-styles?filter.v.availability=1",
|
||||
discoveryStartPage: 1,
|
||||
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
{
|
||||
key: "single-grain-scotch",
|
||||
label: "Single Grain Scotch",
|
||||
startUrl: "https://craftcellars.ca/collections/single-grain-scotch?filter.v.availability=1",
|
||||
discoveryStartPage: 1,
|
||||
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
{
|
||||
key: "blended-malt-scotch",
|
||||
label: "Blended Malt Scotch",
|
||||
startUrl: "https://craftcellars.ca/collections/blended-malt-scotch?filter.v.availability=1",
|
||||
discoveryStartPage: 10,
|
||||
|
||||
discoveryStartPage: 3,
|
||||
discoveryStep: 2,
|
||||
pageConcurrency: 1,
|
||||
pageStaggerMs: 10000,
|
||||
discoveryDelayMs: 10000,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in a new issue