feat: Detailed cron

This commit is contained in:
Brennan Wilkes (Text Groove) 2026-01-21 23:02:35 -08:00
parent fcfabcb615
commit 6ccd932556
3 changed files with 141 additions and 22 deletions

View file

@ -16,16 +16,23 @@ fi
mkdir -p "$REPO_ROOT/reports" mkdir -p "$REPO_ROOT/reports"
# Default: run 4 times/day (every 6 hours). Override via: # 8 runs/day total:
# CRON_SCHEDULE="15 */4 * * *" (example) # - Big runs (all stores) at 00:00 and 12:00
CRON_SCHEDULE="${CRON_SCHEDULE:-0 */6 * * *}" # - Small runs (2 stores) at 03:00, 06:00, 09:00, 15:00, 18:00, 21:00
CRON_BIG="${CRON_SCHEDULE_BIG:-0 0,12 * * *}"
CRON_SMALL="${CRON_SCHEDULE_SMALL:-0 3,6,9,15,18,21 * * *}"
# Comma-separated. Can be store keys OR names (main.js normalizes).
STORES_SMALL="${STORES_SMALL:-sierra_springs,craft_cellars}"
# Use a stable marker so we can replace old lines (including the previous "daily" one). # Use a stable marker so we can replace old lines (including the previous "daily" one).
MARKER="# spirit-tracker" MARKER="# spirit-tracker"
CRON_LINE="$CRON_SCHEDULE NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER" CRON_LINE_BIG="$CRON_BIG NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER big"
CRON_LINE_SMALL="$CRON_SMALL STORES=$STORES_SMALL NODE_BIN=$NODE_BIN MAIN_BRANCH=$MAIN_BRANCH DATA_BRANCH=$DATA_BRANCH bash \"$REPO_ROOT/scripts/run_daily.sh\" >> \"$REPO_ROOT/reports/cron.log\" 2>&1 $MARKER small"
# Install (idempotent): remove any previous line with the marker, then append. # Install (idempotent): remove any previous line with the marker, then append both.
{ crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE"; } | crontab - { crontab -l 2>/dev/null | grep -vF "$MARKER" || true; echo "$CRON_LINE_BIG"; echo "$CRON_LINE_SMALL"; } | crontab -
echo "Installed cron job:" echo "Installed cron job:"
echo "$CRON_LINE" echo "$CRON_LINE_BIG"
echo "$CRON_LINE_SMALL"

View file

@ -57,7 +57,11 @@ if git show-ref --verify --quiet "refs/remotes/$REMOTE/$MAIN_BRANCH"; then
fi fi
# Run tracker (writes data/db + a plain report file in reports/) # Run tracker (writes data/db + a plain report file in reports/)
"$NODE_BIN" bin/tracker.js TRACKER_ARGS=()
if [[ -n "${STORES:-}" ]]; then
TRACKER_ARGS+=(--stores "${STORES}")
fi
"$NODE_BIN" bin/tracker.js "${TRACKER_ARGS[@]}"
# Build viz artifacts on the data branch # Build viz artifacts on the data branch
"$NODE_BIN" tools/build_viz_index.js "$NODE_BIN" tools/build_viz_index.js

View file

@ -1,6 +1,7 @@
#!/usr/bin/env node #!/usr/bin/env node
"use strict"; "use strict";
// NOTE: store filtering is implemented here without touching utils/args.js
const fs = require("fs"); const fs = require("fs");
const path = require("path"); const path = require("path");
@ -24,12 +25,88 @@ function resolveDir(p, fallback) {
return path.isAbsolute(v) ? v : path.join(process.cwd(), v); return path.isAbsolute(v) ? v : path.join(process.cwd(), v);
} }
async function main() { function getFlagValue(argv, flag) {
if (typeof fetch !== "function") { // Supports:
throw new Error("Global fetch() not found. Please use Node.js 18+ (or newer). "); // --stores=a,b
// --stores a,b
const idx = argv.indexOf(flag);
if (idx >= 0) return argv[idx + 1] || "";
const pref = `${flag}=`;
for (const a of argv) {
if (a.startsWith(pref)) return a.slice(pref.length);
}
return "";
}
function normToken(s) {
return String(s || "")
.toLowerCase()
.trim()
.replace(/[^a-z0-9]+/g, "");
}
function parseStoresFilter(raw) {
const v = String(raw || "").trim();
if (!v) return [];
return v
.split(",")
.map((x) => x.trim())
.filter(Boolean);
}
function filterStoresOrThrow(stores, wantedListRaw) {
const wanted = parseStoresFilter(wantedListRaw);
if (!wanted.length) return stores;
const wantedNorm = wanted.map(normToken).filter(Boolean);
const matched = [];
const missing = [];
for (let i = 0; i < wanted.length; i++) {
const w = wanted[i];
const wn = wantedNorm[i];
if (!wn) continue;
// match against key/name/host (normalized)
const hit = stores.find((s) => {
const candidates = [s.key, s.name, s.host].map(normToken).filter(Boolean);
return candidates.includes(wn);
});
if (hit) matched.push(hit);
else missing.push(w);
} }
const args = parseArgs(process.argv.slice(2)); if (missing.length) {
const avail = stores
.map((s) => `${s.key}${s.name ? ` (${s.name})` : ""}`)
.join(", ");
throw new Error(
`Unknown store(s) in --stores: ${missing.join(", ")}\nAvailable: ${avail}`
);
}
// de-dupe by key (in case name+key both matched)
const uniq = [];
const seen = new Set();
for (const s of matched) {
if (seen.has(s.key)) continue;
seen.add(s.key);
uniq.push(s);
}
return uniq;
}
async function main() {
if (typeof fetch !== "function") {
throw new Error(
"Global fetch() not found. Please use Node.js 18+ (or newer). "
);
}
const argv = process.argv.slice(2);
const args = parseArgs(argv);
const logger = createLogger({ debug: args.debug, colorize: true }); const logger = createLogger({ debug: args.debug, colorize: true });
@ -37,31 +114,62 @@ async function main() {
debug: args.debug, debug: args.debug,
maxPages: args.maxPages, maxPages: args.maxPages,
concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64), concurrency: args.concurrency ?? clampInt(process.env.CONCURRENCY, 6, 1, 64),
staggerMs: args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000), staggerMs:
args.staggerMs ?? clampInt(process.env.STAGGER_MS, 150, 0, 5000),
maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20), maxRetries: clampInt(process.env.MAX_RETRIES, 6, 0, 20),
timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000), timeoutMs: clampInt(process.env.TIMEOUT_MS, 25000, 1000, 120000),
discoveryGuess: args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000), discoveryGuess:
discoveryStep: args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500), args.guess ?? clampInt(process.env.DISCOVERY_GUESS, 20, 1, 5000),
discoveryStep:
args.step ?? clampInt(process.env.DISCOVERY_STEP, 5, 1, 500),
categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64), categoryConcurrency: clampInt(process.env.CATEGORY_CONCURRENCY, 5, 1, 64),
defaultUa: DEFAULT_UA, defaultUa: DEFAULT_UA,
defaultParseProducts: parseProductsSierra, defaultParseProducts: parseProductsSierra,
dbDir: resolveDir(args.dataDir ?? process.env.DATA_DIR, path.join(process.cwd(), "data", "db")), dbDir: resolveDir(
reportDir: resolveDir(args.reportDir ?? process.env.REPORT_DIR, path.join(process.cwd(), "reports")), args.dataDir ?? process.env.DATA_DIR,
path.join(process.cwd(), "data", "db")
),
reportDir: resolveDir(
args.reportDir ?? process.env.REPORT_DIR,
path.join(process.cwd(), "reports")
),
}; };
ensureDir(config.dbDir); ensureDir(config.dbDir);
ensureDir(config.reportDir); ensureDir(config.reportDir);
const http = createHttpClient({ maxRetries: config.maxRetries, timeoutMs: config.timeoutMs, defaultUa: config.defaultUa, logger }); const http = createHttpClient({
maxRetries: config.maxRetries,
timeoutMs: config.timeoutMs,
defaultUa: config.defaultUa,
logger,
});
const stores = createStores({ defaultUa: config.defaultUa }); const stores = createStores({ defaultUa: config.defaultUa });
const report = await runAllStores(stores, { config, logger, http }); const storesFilterRaw =
getFlagValue(argv, "--stores") || String(process.env.STORES || "").trim();
const reportTextColor = renderFinalReport(report, { dbDir: config.dbDir, colorize: logger.colorize }); const storesToRun = filterStoresOrThrow(stores, storesFilterRaw);
if (storesFilterRaw) {
logger.info(`Stores filter: ${storesToRun.map((s) => s.key).join(", ")}`);
}
const report = await runAllStores(storesToRun, { config, logger, http });
const reportTextColor = renderFinalReport(report, {
dbDir: config.dbDir,
colorize: logger.colorize,
});
process.stdout.write(reportTextColor); process.stdout.write(reportTextColor);
const reportTextPlain = renderFinalReport(report, { dbDir: config.dbDir, colorize: false }); const reportTextPlain = renderFinalReport(report, {
const file = path.join(config.reportDir, `${isoTimestampFileSafe(new Date())}.txt`); dbDir: config.dbDir,
colorize: false,
});
const file = path.join(
config.reportDir,
`${isoTimestampFileSafe(new Date())}.txt`
);
try { try {
fs.writeFileSync(file, reportTextPlain, "utf8"); fs.writeFileSync(file, reportTextPlain, "utf8");
logger.ok(`Report saved: ${logger.dim(file)}`); logger.ok(`Report saved: ${logger.dim(file)}`);