From 6ccd93255608932d4cf9f3468cf7ff0b7b651fa4 Mon Sep 17 00:00:00 2001 From: "Brennan Wilkes (Text Groove)" Date: Wed, 21 Jan 2026 23:02:35 -0800 Subject: [PATCH] feat: Detailed cron --- scripts/cron_setup.sh | 21 ++++--- scripts/run_daily.sh | 6 +- src/main.js | 136 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 141 insertions(+), 22 deletions(-) diff --git a/scripts/cron_setup.sh b/scripts/cron_setup.sh index 72a029c..f7006ba 100755 --- a/scripts/cron_setup.sh +++ b/scripts/cron_setup.sh @@ -16,16 +16,23 @@ fi mkdir -p "$REPO_ROOT/reports" -# Default: run 4 times/day (every 6 hours). Override via: -# CRON_SCHEDULE="15 */4 * * *" (example) -CRON_SCHEDULE="${CRON_SCHEDULE:-0 */6 * * *}" +# 8 runs/day total: +# - Big runs (all stores) at 00:00 and 12:00 +# - Small runs (2 stores) at 03:00, 06:00, 09:00, 15:00, 18:00, 21:00 +CRON_BIG="${CRON_SCHEDULE_BIG:-0 0,12 * * *}" +CRON_SMALL="${CRON_SCHEDULE_SMALL:-0 3,6,9,15,18,21 * * *}" + +# Comma-separated. Can be store keys OR names (main.js normalizes). +STORES_SMALL="${STORES_SMALL:-sierra_springs,craft_cellars}" # Use a stable marker so we can replace old lines (including the previous "daily" one). MARKER="# spirit-tracker" -CRON_LINE="$CRON_SCHEDULE NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER" +CRON_LINE_BIG="$CRON_BIG NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER big" +CRON_LINE_SMALL="$CRON_SMALL STORES=$STORES_SMALL NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER small" -# Install (idempotent): remove any previous line with the marker, then append. -{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE"; } | crontab - +# Install (idempotent): remove any previous line with the marker, then append both. +{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE_BIG"; echo "$CRON_LINE_SMALL"; } | crontab - echo "Installed cron job:" -echo "$CRON_LINE" +echo "$CRON_LINE_BIG" +echo "$CRON_LINE_SMALL" diff --git a/scripts/run_daily.sh b/scripts/run_daily.sh index fdcbdd5..a661bbe 100755 --- a/scripts/run_daily.sh +++ b/scripts/run_daily.sh @@ -57,7 +57,11 @@ if git show-ref --verify --quiet "refs/remotes/$REMOTE/$MAIN_BRANCH"; then fi # Run tracker (writes data/db + a plain report file in reports/) -"$NODE_BIN" bin/tracker.js +TRACKER_ARGS=() +if [[ -n "${STORES:-}" ]]; then + TRACKER_ARGS+=(--stores "${STORES}") +fi +"$NODE_BIN" bin/tracker.js "${TRACKER_ARGS[@]}" # Build viz artifacts on the data branch "$NODE_BIN" tools/build_viz_index.js diff --git a/src/main.js b/src/main.js index 029a3fc..02ce290 100644 --- a/src/main.js +++ b/src/main.js @@ -1,6 +1,7 @@ #!/usr/bin/env node "use strict"; +// NOTE: store filtering is implemented here without touching utils/args.js const fs = require("fs"); const path = require("path"); @@ -24,12 +25,88 @@ function resolveDir(p, fallback) { return path.isAbsolute(v) ? v : path.join(process.cwd(), v); } -async function main() { - if (typeof fetch !== "function") { - throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). "); +function getFlagValue(argv, flag) { + // Supports: + // --stores=a,b + // --stores a,b + const idx = argv.indexOf(flag); + if (idx >= 0) return argv[idx + 1] || ""; + const pref = `${flag}=`; + for (const a of argv) { + if (a.startsWith(pref)) return a.slice(pref.length); + } + return ""; +} + +function normToken(s) { + return String(s || "") + .toLowerCase() + .trim() + .replace(/[^a-z0-9]+/g, ""); +} + +function parseStoresFilter(raw) { + const v = String(raw || "").trim(); + if (!v) return []; + return v + .split(",") + .map((x) => x.trim()) + .filter(Boolean); +} + +function filterStoresOrThrow(stores, wantedListRaw) { + const wanted = parseStoresFilter(wantedListRaw); + if (!wanted.length) return stores; + + const wantedNorm = wanted.map(normToken).filter(Boolean); + + const matched = []; + const missing = []; + + for (let i = 0; i < wanted.length; i++) { + const w = wanted[i]; + const wn = wantedNorm[i]; + if (!wn) continue; + + // match against key/name/host (normalized) + const hit = stores.find((s) => { + const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean); + return candidates.includes(wn); + }); + + if (hit) matched.push(hit); + else missing.push(w); } - const args = parseArgs(process.argv.slice(2)); + if (missing.length) { + const avail = stores + .map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`) + .join(", "); + throw new Error( + `Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}` + ); + } + + // de-dupe by key (in case name+key both matched) + const uniq = []; + const seen = new Set(); + for (const s of matched) { + if (seen.has(s.key)) continue; + seen.add(s.key); + uniq.push(s); + } + return uniq; +} + +async function main() { + if (typeof fetch !== "function") { + throw new Error( + "Global fetch() not found. Please use Node.js 18+ (or newer). " + ); + } + + const argv = process.argv.slice(2); + const args = parseArgs(argv); const logger = createLogger({ debug: args.debug, colorize: true }); @@ -37,31 +114,62 @@ async function main() { debug: args.debug, maxPages: args.maxPages, concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), - staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), + staggerMs: + args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), - discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), - discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), + discoveryGuess: + args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), + discoveryStep: + args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), defaultUa: DEFAULT_UA, defaultParseProducts: parseProductsSierra, - dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")), - reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")), + dbDir: resolveDir( + args.dataDir ?? process.env.DATA_DIR, + path.join(process.cwd(), "data", "db") + ), + reportDir: resolveDir( + args.reportDir ?? process.env.REPORT_DIR, + path.join(process.cwd(), "reports") + ), }; ensureDir(config.dbDir); ensureDir(config.reportDir); - const http = createHttpClient({ maxRetries: config.maxRetries, timeoutMs: config.timeoutMs, defaultUa: config.defaultUa, logger }); + const http = createHttpClient({ + maxRetries: config.maxRetries, + timeoutMs: config.timeoutMs, + defaultUa: config.defaultUa, + logger, + }); const stores = createStores({ defaultUa: config.defaultUa }); - const report = await runAllStores(stores, { config, logger, http }); + const storesFilterRaw = + getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim(); - const reportTextColor = renderFinalReport(report, { dbDir: config.dbDir, colorize: logger.colorize }); + const storesToRun = filterStoresOrThrow(stores, storesFilterRaw); + if (storesFilterRaw) { + logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`); + } + + const report = await runAllStores(storesToRun, { config, logger, http }); + + const reportTextColor = renderFinalReport(report, { + dbDir: config.dbDir, + colorize: logger.colorize, + }); process.stdout.write(reportTextColor); - const reportTextPlain = renderFinalReport(report, { dbDir: config.dbDir, colorize: false }); - const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`); + const reportTextPlain = renderFinalReport(report, { + dbDir: config.dbDir, + colorize: false, + }); + const file = path.join( + config.reportDir, + `${isoTimestampFileSafe(new Date())}.txt` + ); try { fs.writeFileSync(file, reportTextPlain, "utf8"); logger.ok(`Report saved: ${logger.dim(file)}`);