Fix EMFILE error in git-watcher by using shallow directory watching

- Use depth: 0 to watch only immediate children instead of recursive watching
- Prevents 'too many open files' errors in large repositories
- Still detects both tracked and untracked file changes
- Combines shallow watching with periodic polling for reliability
This commit is contained in:
Peter Steinberger 2025-07-28 14:25:11 +02:00
parent 240b491050
commit a5741a56bb

View file

@ -2,11 +2,19 @@
* Git File Watcher Service * Git File Watcher Service
* *
* Monitors git repositories for file changes and broadcasts git status updates via SSE. * Monitors git repositories for file changes and broadcasts git status updates via SSE.
* Simply watches for any file system changes and lets Git determine what's important. *
* Uses a shallow watch strategy to prevent EMFILE errors:
* - Watches repository root at depth 0 (immediate children only)
* - Watches specific .git files that affect status
* - Combined with periodic polling to catch any missed changes
*
* This approach prevents watching thousands of files in large repos while still
* detecting both tracked and untracked file changes.
*/ */
import * as chokidar from 'chokidar'; import * as chokidar from 'chokidar';
import type { Response } from 'express'; import type { Response } from 'express';
import { accessSync } from 'fs';
import { type GitStatusCounts, getDetailedGitStatus } from '../utils/git-status.js'; import { type GitStatusCounts, getDetailedGitStatus } from '../utils/git-status.js';
import { createLogger } from '../utils/logger.js'; import { createLogger } from '../utils/logger.js';
@ -19,6 +27,7 @@ interface WatcherInfo {
gitRepoPath: string; gitRepoPath: string;
lastStatus?: GitStatusCounts; lastStatus?: GitStatusCounts;
debounceTimer?: NodeJS.Timeout; debounceTimer?: NodeJS.Timeout;
periodicCheckTimer?: NodeJS.Timeout;
clients: Set<Response>; clients: Set<Response>;
} }
@ -37,32 +46,57 @@ export class GitWatcher {
logger.debug(`Starting git watcher for session ${sessionId} at ${gitRepoPath}`); logger.debug(`Starting git watcher for session ${sessionId} at ${gitRepoPath}`);
// Watch the repository root, but ignore performance-killing directories // Watch strategy:
// 1. Watch the repository root at depth 0 (only immediate files/folders)
// 2. Watch .git directory separately for git operations
// This gives us file change detection without watching thousands of files
const watcher = chokidar.watch(gitRepoPath, { const watcher = chokidar.watch(gitRepoPath, {
ignoreInitial: true, ignoreInitial: true,
ignored: [ ignored: [
// Ignore directories that would kill performance // Always ignore these to prevent performance issues
'**/node_modules/**', '**/node_modules/**',
'**/.git/objects/**', // Git's object database - huge and changes don't matter '**/.git/objects/**', // Git's object database - huge
'**/.git/logs/**', // Git's log files - not relevant for status '**/.git/logs/**', // Git's log files
'**/dist/**', '**/dist/**',
'**/build/**', '**/build/**',
'**/.next/**', '**/.next/**',
'**/coverage/**', '**/coverage/**',
'**/.turbo/**', '**/.turbo/**',
'**/*.log', '**/*.log',
'**/.DS_Store',
], ],
// CRITICAL: Only watch immediate children, not recursive
depth: 0,
// Don't follow symlinks to avoid infinite loops // Don't follow symlinks to avoid infinite loops
followSymlinks: false, followSymlinks: false,
// Use native events for better performance // Use native events for better performance
usePolling: false, usePolling: false,
// Optimize for performance // Don't wait for write to finish - we'll debounce anyway
awaitWriteFinish: { awaitWriteFinish: false,
stabilityThreshold: 200,
pollInterval: 100,
},
}); });
// Also watch specific git files that affect status
const gitPaths = [
`${gitRepoPath}/.git/index`,
`${gitRepoPath}/.git/HEAD`,
`${gitRepoPath}/.git/refs/heads`,
].filter((path) => {
try {
accessSync(path);
return true;
} catch {
return false;
}
});
if (gitPaths.length > 0) {
// Add git paths to the watcher
watcher.add(gitPaths);
}
logger.debug(`Git watcher started for session ${sessionId} with shallow directory watching`);
const watcherInfo: WatcherInfo = { const watcherInfo: WatcherInfo = {
watcher, watcher,
sessionId, sessionId,
@ -101,6 +135,12 @@ export class GitWatcher {
// Get initial status // Get initial status
this.checkAndBroadcastStatus(watcherInfo); this.checkAndBroadcastStatus(watcherInfo);
// Start periodic check every 2 seconds to catch working directory changes
// This complements the git file watching and ensures we don't miss changes
watcherInfo.periodicCheckTimer = setInterval(() => {
this.checkAndBroadcastStatus(watcherInfo);
}, 2000);
} }
/** /**
@ -160,6 +200,11 @@ export class GitWatcher {
clearTimeout(watcherInfo.debounceTimer); clearTimeout(watcherInfo.debounceTimer);
} }
// Clear periodic check timer
if (watcherInfo.periodicCheckTimer) {
clearInterval(watcherInfo.periodicCheckTimer);
}
// Close watcher // Close watcher
watcherInfo.watcher.close(); watcherInfo.watcher.close();