From eca7a967333fb2c321ddeae494f0459d0ae61ff7 Mon Sep 17 00:00:00 2001 From: "Brennan Wilkes (Text Groove)" Date: Thu, 29 Jan 2026 10:06:59 -0800 Subject: [PATCH] feat: Better retry logic --- src/core/http.js | 130 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 33 deletions(-) diff --git a/src/core/http.js b/src/core/http.js index b55dc83..01b2764 100644 --- a/src/core/http.js +++ b/src/core/http.js @@ -1,6 +1,9 @@ "use strict"; const { setTimeout: sleep } = require("timers/promises"); +const { setTimeout: setTimeoutCb, clearTimeout } = require("timers"); + +/* ---------------- Errors ---------------- */ class RetryableError extends Error { constructor(msg) { @@ -17,12 +20,29 @@ function isRetryable(e) { return /ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|socket hang up|fetch failed/i.test(msg); } +/* ---------------- Backoff ---------------- */ + function backoffMs(attempt) { const base = Math.min(12000, 500 * Math.pow(2, attempt)); const jitter = Math.floor(Math.random() * 400); return base + jitter; } +function retryAfterMs(res) { + const ra = res?.headers?.get ? res.headers.get("retry-after") : null; + if (!ra) return 0; + + const secs = Number(String(ra).trim()); + if (Number.isFinite(secs)) return Math.max(0, secs * 1000); + + const dt = Date.parse(String(ra)); + if (Number.isFinite(dt)) return Math.max(0, dt - Date.now()); + + return 0; +} + +/* ---------------- Utils ---------------- */ + async function safeText(res) { try { return await res.text(); @@ -31,22 +51,21 @@ async function safeText(res) { } } +function hostFromUrl(u) { + try { + return new URL(u).host || ""; + } catch { + return ""; + } +} + /* ---------------- Cookies (simple jar) ---------------- */ // host -> Map(cookieName -> "name=value") function createCookieJar() { const jar = new Map(); - function getHost(u) { - try { - return new URL(u).hostname || ""; - } catch { - return ""; - } - } - function parseSetCookieLine(line) { - // "name=value; Path=/; Secure; HttpOnly; ..." const s = String(line || "").trim(); if (!s) return null; const first = s.split(";")[0] || ""; @@ -59,22 +78,16 @@ function createCookieJar() { } function getSetCookieArray(headers) { - // Node/undici may support headers.getSetCookie() if (headers && typeof headers.getSetCookie === "function") { try { const arr = headers.getSetCookie(); return Array.isArray(arr) ? arr : []; - } catch { - // fall through - } + } catch {} } - // Fallback: single combined header (may lose multiples, but better than nothing) const one = headers?.get ? headers.get("set-cookie") : null; if (!one) return []; - // Best-effort split. This is imperfect with Expires=... commas, but OK for most WP cookies. - // If this causes issues later, we can replace with a more robust splitter. return String(one) .split(/,(?=[^;,]*=)/g) .map((x) => x.trim()) @@ -82,7 +95,7 @@ function createCookieJar() { } function storeFromResponse(url, res) { - const host = getHost(res?.url || url); + const host = hostFromUrl(res?.url || url); if (!host) return; const lines = getSetCookieArray(res?.headers); @@ -96,13 +109,12 @@ function createCookieJar() { for (const line of lines) { const c = parseSetCookieLine(line); - if (!c) continue; - m.set(c.name, c.pair); + if (c) m.set(c.name, c.pair); } } function cookieHeaderFor(url) { - const host = getHost(url); + const host = hostFromUrl(url); if (!host) return ""; const m = jar.get(host); if (!m || m.size === 0) return ""; @@ -120,10 +132,33 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { const cookieJar = createCookieJar(); + // host -> epoch ms when next request is allowed + const hostNextOkAt = new Map(); + const minHostIntervalMs = 900; + function inflightStr() { return `inflight=${inflight}`; } + async function throttleHost(url) { + const host = hostFromUrl(url); + if (!host) return; + const now = Date.now(); + const next = hostNextOkAt.get(host) || 0; + if (next > now) { + logger?.dbg?.(`THROTTLE host=${host} wait=${next - now}ms`); + await sleep(next - now); + } + } + + function noteHost(url, extraDelayMs = 0) { + const host = hostFromUrl(url); + if (!host) return; + const until = Date.now() + minHostIntervalMs + extraDelayMs; + hostNextOkAt.set(host, until); + logger?.dbg?.(`HOST-PACE host=${host} nextOkIn=${until - Date.now()}ms`); + } + async function fetchWithRetry( url, tag, @@ -140,11 +175,15 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { ); try { + await throttleHost(url); + const ctrl = new AbortController(); - const t = setTimeout(() => ctrl.abort(), timeoutMs); + const t = setTimeoutCb(() => ctrl.abort(), timeoutMs); const cookieHdr = - cookies && !Object.prototype.hasOwnProperty.call(headers, "Cookie") && !Object.prototype.hasOwnProperty.call(headers, "cookie") + cookies && + !("Cookie" in headers) && + !("cookie" in headers) ? cookieJar.cookieHeaderFor(url) : ""; @@ -166,48 +205,72 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { const status = res.status; const finalUrl = res.url || url; + const elapsed = Date.now() - start; - // capture cookies for subsequent requests to same host + noteHost(finalUrl); if (cookies) cookieJar.storeFromResponse(url, res); - logger?.dbg?.(`REQ#${reqId} HTTP ${status} ${tag} finalUrl=${finalUrl}`); + logger?.dbg?.( + `REQ#${reqId} HTTP ${status} ${tag} ms=${elapsed} finalUrl=${finalUrl}` + ); - if (status === 429 || status === 408 || (status >= 500 && status <= 599)) { + if (status === 429) { + const raMs = retryAfterMs(res); + if (raMs > 0) noteHost(finalUrl, raMs); + + logger?.dbg?.( + `REQ#${reqId} 429 retryAfterMs=${raMs} host=${hostFromUrl(finalUrl)}` + ); + throw new RetryableError("HTTP 429"); + } + + if (status === 408 || (status >= 500 && status <= 599)) { throw new RetryableError(`HTTP ${status}`); } + if (status >= 400) { const bodyTxt = await safeText(res); throw new Error( - `HTTP ${status} bodyHead=${String(bodyTxt).slice(0, 160).replace(/\s+/g, " ")}` + `HTTP ${status} bodyHead=${String(bodyTxt) + .slice(0, 160) + .replace(/\s+/g, " ")}` ); } if (mode === "json") { const txt = await res.text(); - const ms = Date.now() - start; let json; try { json = JSON.parse(txt); } catch (e) { throw new RetryableError(`Bad JSON: ${e?.message || e}`); } - return { json, ms, bytes: txt.length, status, finalUrl }; + return { json, ms: elapsed, bytes: txt.length, status, finalUrl }; } const text = await res.text(); - if (!text || text.length < 200) throw new RetryableError(`Short HTML bytes=${text.length}`); + if (!text || text.length < 200) { + throw new RetryableError(`Short HTML bytes=${text.length}`); + } - const ms = Date.now() - start; - return { text, ms, bytes: text.length, status, finalUrl }; + return { text, ms: elapsed, bytes: text.length, status, finalUrl }; } catch (e) { const retryable = isRetryable(e); + const host = hostFromUrl(url); + const nextOk = hostNextOkAt.get(host) || 0; + logger?.dbg?.( - `REQ#${reqId} ERROR ${tag} retryable=${retryable} err=${e?.message || e} (${inflightStr()})` + `REQ#${reqId} FAIL ${tag} retryable=${retryable} err=${e?.message || e} host=${host} nextOkIn=${Math.max( + 0, + nextOk - Date.now() + )}ms` ); if (!retryable || attempt === maxRetries) throw e; - const delay = backoffMs(attempt); + let delay = backoffMs(attempt); + if (nextOk > Date.now()) delay = Math.max(delay, nextOk - Date.now()); + logger?.warn?.(`Request failed, retrying in ${delay}ms (${attempt + 1}/${maxRetries})`); await sleep(delay); } finally { @@ -215,6 +278,7 @@ function createHttpClient({ maxRetries, timeoutMs, defaultUa, logger }) { logger?.dbg?.(`REQ#${reqId} END ${tag} (${inflightStr()})`); } } + throw new Error("unreachable"); }