Fix zombie process detection and add PTY-specific cleanup

- Update is_pid_alive to detect zombie processes (status 'Z') as dead
- Add spawn_type field to distinguish PTY vs socket sessions
- Add reap_zombies function to clean up zombie children
- Only attempt zombie reaping for PTY sessions to avoid interfering with osascript processes
- Fix session cleanup to work properly with zombie processes

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Mario Zechner 2025-06-18 14:19:55 +02:00
parent ef19211451
commit 3fdad988ff
6 changed files with 89 additions and 29 deletions

View file

@ -1047,8 +1047,14 @@ fn handle_session_kill(control_path: &Path, path: &str) -> Response<String> {
if let Ok(session_info) = serde_json::from_str::<serde_json::Value>(&content) {
if let Some(pid) = session_info.get("pid").and_then(serde_json::Value::as_u64) {
// Wait for the process to actually die
for _ in 0..30 {
// 30 * 100ms = 3 seconds max
for _ in 0..30 { // 30 * 100ms = 3 seconds max
// Only reap zombies for PTY sessions
if let Some(spawn_type) = session_info.get("spawn_type").and_then(|s| s.as_str()) {
if spawn_type == "pty" {
sessions::reap_zombies();
}
}
if !sessions::is_pid_alive(pid as u32) {
process_died = true;
break;

View file

@ -25,12 +25,18 @@ pub struct SessionInfo {
pub started_at: Option<Timestamp>,
#[serde(default = "get_default_term")]
pub term: String,
#[serde(default = "get_default_spawn_type")]
pub spawn_type: String,
}
fn get_default_term() -> String {
DEFAULT_TERM.to_string()
}
fn get_default_spawn_type() -> String {
"socket".to_string()
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct SessionListEntry {
#[serde(flatten)]

View file

@ -250,20 +250,42 @@ fn write_to_pipe_with_timeout(
}
pub fn is_pid_alive(pid: u32) -> bool {
// On Linux, check /proc/{pid} for better performance
#[cfg(target_os = "linux")]
{
std::path::Path::new(&format!("/proc/{pid}")).exists()
}
let output = Command::new("ps")
.args(["-p", &pid.to_string(), "-o", "stat="])
.output();
// On other platforms, use ps command
#[cfg(not(target_os = "linux"))]
{
let output = Command::new("ps").arg("-p").arg(pid.to_string()).output();
match output {
Ok(output) => output.status.success(),
Err(_) => false,
match output {
Ok(output) => {
if !output.status.success() {
// Process doesn't exist
false
} else {
// Check if it's a zombie process (status starts with 'Z')
let stat = String::from_utf8_lossy(&output.stdout);
let stat = stat.trim();
!stat.starts_with('Z')
}
}
Err(_) => false,
}
}
/// Attempt to reap zombie children
pub fn reap_zombies() {
use std::ptr;
use libc::{waitpid, WNOHANG, WUNTRACED};
loop {
// Try to reap any zombie children
let result = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG | WUNTRACED) };
if result <= 0 {
// No more children to reap or error occurred
break;
}
// Successfully reaped a zombie child
eprintln!("Reaped zombie child with PID: {}", result);
}
}

View file

@ -187,7 +187,8 @@ fn spawn_via_pty(command: &[String], working_dir: Option<&str>) -> Result<String
"status": "running",
"started_at": jiff::Timestamp::now(),
"term": "xterm-256color",
"pid": child.as_raw() as u32
"pid": child.as_raw() as u32,
"spawn_type": "pty"
});
std::fs::write(
format!("{session_dir}/session.json"),
@ -363,8 +364,9 @@ fn handle_pty_session(
let master_fd_dup2 = unsafe { libc::dup(master_fd) };
if master_fd_dup2 != -1 {
let stdin_path_clone = stdin_path;
let session_id_clone = session_id.to_string();
std::thread::spawn(move || {
if let Err(e) = handle_stdin_to_pty(master_fd_dup2, &stdin_path_clone) {
if let Err(e) = handle_stdin_to_pty(master_fd_dup2, &stdin_path_clone, &session_id_clone) {
eprintln!("Stdin handler error: {e}");
}
// Clean up the duplicated fd when done
@ -441,7 +443,7 @@ fn handle_pty_session(
}
/// Handle stdin FIFO -> PTY master forwarding
fn handle_stdin_to_pty(master_fd: RawFd, stdin_path: &str) -> Result<()> {
fn handle_stdin_to_pty(master_fd: RawFd, stdin_path: &str, session_id: &str) -> Result<()> {
use nix::fcntl::OFlag;
use std::fs::OpenOptions;
use std::os::unix::fs::OpenOptionsExt;
@ -464,7 +466,30 @@ fn handle_stdin_to_pty(master_fd: RawFd, stdin_path: &str) -> Result<()> {
std::thread::sleep(std::time::Duration::from_millis(10));
}
Ok(n) => {
// Write to PTY master using libc::write
// Check for Ctrl+C and send SIGINT directly for responsiveness
if n == 1 && buffer[0] == 0x03 {
// Ctrl+C detected - send SIGINT to process group for immediate response
let session_json_path = format!("{}/{}/session.json",
env::var("TTY_FWD_CONTROL_DIR").unwrap_or_else(|_| {
format!("{}/.vibetunnel/control", env::var("HOME").unwrap_or_default())
}),
session_id);
if let Ok(content) = std::fs::read_to_string(&session_json_path) {
if let Ok(session_info) = serde_json::from_str::<serde_json::Value>(&content) {
if let Some(pid) = session_info.get("pid").and_then(|p| p.as_u64()) {
// Send SIGINT to the process group for immediate response
unsafe {
libc::kill(-(pid as i32), libc::SIGINT);
}
eprintln!("Sent SIGINT to process group {}", pid);
}
}
}
// Still write Ctrl+C through PTY for terminal consistency
}
// Write to PTY master using libc::write (blocking)
let bytes_written =
unsafe { libc::write(master_fd, buffer.as_ptr().cast::<libc::c_void>(), n) };
if bytes_written == -1 {

View file

@ -275,6 +275,7 @@ pub fn create_session_info(
exit_code: None,
started_at: Some(Timestamp::now()),
term,
spawn_type: "socket".to_string(),
};
let session_info_str = serde_json::to_string(&session_info)?;

View file

@ -237,38 +237,38 @@ export class SessionView extends LitElement {
// Use CastConverter to connect terminal to stream with reconnection tracking
const connection = CastConverter.connectToStream(this.terminal, streamUrl);
// Wrap the connection to track reconnections
const originalEventSource = connection.eventSource;
let lastErrorTime = 0;
const reconnectThreshold = 3; // Max reconnects before giving up
const reconnectWindow = 5000; // 5 second window
const handleError = () => {
const now = Date.now();
// Reset counter if enough time has passed since last error
if (now - lastErrorTime > reconnectWindow) {
this.reconnectCount = 0;
}
this.reconnectCount++;
lastErrorTime = now;
console.log(`Stream error #${this.reconnectCount} for session ${this.session?.id}`);
// If we've had too many reconnects, mark session as exited
if (this.reconnectCount >= reconnectThreshold) {
console.log(`Session ${this.session?.id} marked as exited due to excessive reconnections`);
if (this.session && this.session.status !== 'exited') {
this.session = { ...this.session, status: 'exited' };
this.requestUpdate();
// Disconnect the stream and load final snapshot
connection.disconnect();
this.streamConnection = null;
// Load final snapshot
requestAnimationFrame(() => {
this.loadSessionSnapshot();
@ -276,10 +276,10 @@ export class SessionView extends LitElement {
}
}
};
// Override the error handler
originalEventSource.addEventListener('error', handleError);
this.streamConnection = connection;
}