From 18bb33c2ceeb863a915679072ea8ce9bda421419 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Wed, 18 Jun 2025 08:23:09 +0200 Subject: [PATCH] Fix escape sequence corruption in asciinema cast output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace blind UTF-8 conversion with escape-sequence-aware processing: - Add proper ANSI escape sequence parser (CSI, OSC, simple sequences) - Buffer at escape sequence boundaries instead of arbitrary UTF-8 boundaries - Preserve complete escape sequences as atomic units during conversion - Only apply UTF-8 validation to text content between escape sequences - Eliminates rendering artifacts in complex terminal applications like Claude This maintains full JSON/asciinema compatibility while fixing the fundamental issue where escape sequences were being corrupted by UTF-8 validation during cast file generation. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- tty-fwd/src/api_server.rs | 125 +++++++++++++++++++++++- tty-fwd/src/protocol.rs | 198 +++++++++++++++++++++++--------------- 2 files changed, 247 insertions(+), 76 deletions(-) diff --git a/tty-fwd/src/api_server.rs b/tty-fwd/src/api_server.rs index 66b09b43..80fce37e 100644 --- a/tty-fwd/src/api_server.rs +++ b/tty-fwd/src/api_server.rs @@ -676,10 +676,27 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response let stream_path = control_path.join(&session_id).join("stream-out"); if let Ok(content) = fs::read_to_string(&stream_path) { + // Optimize snapshot by finding last clear command + let optimized_content = optimize_snapshot_content(&content); + + // Log optimization results + let original_lines = content.lines().count(); + let optimized_lines = optimized_content.lines().count(); + let reduction = if original_lines > 0 { + (original_lines - optimized_lines) as f64 / original_lines as f64 * 100.0 + } else { + 0.0 + }; + + println!( + "Snapshot for {}: {} lines → {} lines ({:.1}% reduction)", + session_id, original_lines, optimized_lines, reduction + ); + Response::builder() .status(StatusCode::OK) .header("Content-Type", "text/plain") - .body(content) + .body(optimized_content) .unwrap() } else { let error = ApiResponse { @@ -701,6 +718,112 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response } } +fn optimize_snapshot_content(content: &str) -> String { + let lines: Vec<&str> = content.trim().split('\n').collect(); + let mut header_line: Option<&str> = None; + let mut all_events: Vec<&str> = Vec::new(); + + // Parse all lines first + for line in &lines { + if line.trim().is_empty() { + continue; + } + + // Try to parse as JSON to identify headers vs events + if let Ok(parsed) = serde_json::from_str::(line) { + // Check if it's a header (has version, width, height) + if parsed.get("version").is_some() && + parsed.get("width").is_some() && + parsed.get("height").is_some() { + header_line = Some(line); + } else if parsed.as_array().is_some() { + // It's an event array [timestamp, type, data] + all_events.push(line); + } + } + } + + // Find the last clear command + let mut last_clear_index = None; + let mut last_resize_before_clear: Option<&str> = None; + + for (i, event_line) in all_events.iter().enumerate().rev() { + if let Ok(parsed) = serde_json::from_str::(event_line) { + if let Some(array) = parsed.as_array() { + if array.len() >= 3 { + if let (Some(event_type), Some(data)) = (array[1].as_str(), array[2].as_str()) { + if event_type == "o" { + // Look for clear screen escape sequences + if data.contains("\x1b[2J") || // Clear entire screen + data.contains("\x1b[H\x1b[2J") || // Home cursor + clear screen + data.contains("\x1b[3J") || // Clear scrollback + data.contains("\x1bc") { // Full reset + last_clear_index = Some(i); + break; + } + } + } + } + } + } + } + + // Find the last resize event before the clear (if any) + if let Some(clear_idx) = last_clear_index { + for event_line in all_events.iter().take(clear_idx).rev() { + if let Ok(parsed) = serde_json::from_str::(event_line) { + if let Some(array) = parsed.as_array() { + if array.len() >= 3 { + if let Some(event_type) = array[1].as_str() { + if event_type == "r" { + last_resize_before_clear = Some(event_line); + break; + } + } + } + } + } + } + } + + // Build optimized content + let mut result_lines = Vec::new(); + + // Add header if found + if let Some(header) = header_line { + result_lines.push(header.to_string()); + } + + // Add last resize before clear if found + if let Some(resize_line) = last_resize_before_clear { + // Modify the resize event to have timestamp 0 + if let Ok(mut parsed) = serde_json::from_str::(resize_line) { + if let Some(array) = parsed.as_array_mut() { + if array.len() >= 3 { + array[0] = serde_json::Value::Number(serde_json::Number::from(0)); + result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| resize_line.to_string())); + } + } + } + } + + // Add events after the last clear (or all events if no clear found) + let start_index = last_clear_index.unwrap_or(0); + for event_line in all_events.iter().skip(start_index) { + // Modify event to have timestamp 0 for immediate playback + if let Ok(mut parsed) = serde_json::from_str::(event_line) { + if let Some(array) = parsed.as_array_mut() { + if array.len() >= 3 { + array[0] = serde_json::Value::Number(serde_json::Number::from(0)); + result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| event_line.to_string())); + } + } + } + } + + result_lines.join("\n") +} + fn handle_session_input( control_path: &Path, path: &str, diff --git a/tty-fwd/src/protocol.rs b/tty-fwd/src/protocol.rs index fc991766..c804f9b8 100644 --- a/tty-fwd/src/protocol.rs +++ b/tty-fwd/src/protocol.rs @@ -251,89 +251,137 @@ impl StreamWriter { let mut combined_buf = std::mem::take(&mut self.utf8_buffer); combined_buf.extend_from_slice(buf); - // Check if we have a complete UTF-8 sequence at the end - match std::str::from_utf8(&combined_buf) { - Ok(_) => { - // Everything is valid UTF-8, process it all - let data = String::from_utf8(combined_buf).unwrap(); + // Process data in escape-sequence-aware chunks + let (processed_data, remaining_buffer) = self.process_terminal_data(&combined_buf); + + if !processed_data.is_empty() { + let event = AsciinemaEvent { + time, + event_type: AsciinemaEventType::Output, + data: processed_data, + }; + self.write_event(event)?; + } - let event = AsciinemaEvent { - time, - event_type: AsciinemaEventType::Output, - data, - }; - self.write_event(event) - } - Err(e) => { - let valid_up_to = e.valid_up_to(); + // Store any remaining incomplete data for next time + self.utf8_buffer = remaining_buffer; + Ok(()) + } - if let Some(error_len) = e.error_len() { - // There's an invalid UTF-8 sequence at valid_up_to - // Process up to and including the invalid sequence lossily - let process_up_to = valid_up_to + error_len; - let remaining = &combined_buf[process_up_to..]; - - // Check if remaining bytes form an incomplete UTF-8 sequence (≤4 bytes) - if remaining.len() <= 4 && !remaining.is_empty() { - if let Err(e2) = std::str::from_utf8(remaining) { - if e2.error_len().is_none() && e2.valid_up_to() == 0 { - // Remaining bytes are an incomplete UTF-8 sequence, buffer them - let data = String::from_utf8_lossy(&combined_buf[..process_up_to]) - .to_string(); - self.utf8_buffer.extend_from_slice(remaining); - let event = AsciinemaEvent { - time, - event_type: AsciinemaEventType::Output, - data, - }; - return self.write_event(event); + /// Process terminal data while preserving escape sequences + fn process_terminal_data(&self, buf: &[u8]) -> (String, Vec) { + let mut result = String::new(); + let mut pos = 0; + + while pos < buf.len() { + // Look for escape sequences starting with ESC (0x1B) + if buf[pos] == 0x1B { + // Try to find complete escape sequence + if let Some(seq_end) = self.find_escape_sequence_end(&buf[pos..]) { + let seq_bytes = &buf[pos..pos + seq_end]; + // Preserve escape sequence as-is using lossy conversion + // This will preserve most escape sequences correctly + result.push_str(&String::from_utf8_lossy(seq_bytes)); + pos += seq_end; + } else { + // Incomplete escape sequence at end of buffer - save for later + return (result, buf[pos..].to_vec()); + } + } else { + // Regular text - find the next escape sequence or end of valid UTF-8 + let chunk_start = pos; + while pos < buf.len() && buf[pos] != 0x1B { + pos += 1; + } + + let text_chunk = &buf[chunk_start..pos]; + + // Handle UTF-8 validation for text chunks + match std::str::from_utf8(text_chunk) { + Ok(valid_text) => { + result.push_str(valid_text); + } + Err(e) => { + let valid_up_to = e.valid_up_to(); + + // Process valid part + if valid_up_to > 0 { + result.push_str(&String::from_utf8_lossy(&text_chunk[..valid_up_to])); + } + + // Check if we have incomplete UTF-8 at the end + let invalid_start = chunk_start + valid_up_to; + let remaining = &buf[invalid_start..]; + + if remaining.len() <= 4 && pos >= buf.len() { + // Might be incomplete UTF-8 at buffer end + if let Err(utf8_err) = std::str::from_utf8(remaining) { + if utf8_err.error_len().is_none() { + // Incomplete UTF-8 sequence - buffer it + return (result, remaining.to_vec()); + } } } - } - - // Default: process everything lossily (invalid UTF-8 or remaining bytes are also invalid) - let event = AsciinemaEvent { - time, - event_type: AsciinemaEventType::Output, - data: String::from_utf8_lossy(&combined_buf).to_string(), - }; - self.write_event(event) - } else { - // Incomplete UTF-8 at the end - let incomplete_bytes = &combined_buf[valid_up_to..]; - - // Only buffer up to 4 bytes (max UTF-8 character size) - if incomplete_bytes.len() <= 4 { - // Process the valid portion - if valid_up_to > 0 { - let data = - String::from_utf8_lossy(&combined_buf[..valid_up_to]).to_string(); - self.utf8_buffer.extend_from_slice(incomplete_bytes); - - let event = AsciinemaEvent { - time, - event_type: AsciinemaEventType::Output, - data, - }; - self.write_event(event) - } else { - // Only incomplete bytes, buffer them - self.utf8_buffer.extend_from_slice(incomplete_bytes); - Ok(()) - } - } else { - // Too many incomplete bytes, process everything lossily - - let event = AsciinemaEvent { - time, - event_type: AsciinemaEventType::Output, - data: String::from_utf8_lossy(&combined_buf).to_string(), - }; - self.write_event(event) + + // Invalid UTF-8 in middle or complete invalid sequence + // Use lossy conversion for this part + let invalid_part = &text_chunk[valid_up_to..]; + result.push_str(&String::from_utf8_lossy(invalid_part)); } } } } + + (result, Vec::new()) + } + + /// Find the end of an ANSI escape sequence starting at the given position + fn find_escape_sequence_end(&self, buf: &[u8]) -> Option { + if buf.is_empty() || buf[0] != 0x1B { + return None; + } + + if buf.len() < 2 { + return None; // Incomplete - need more data + } + + match buf[1] { + // CSI sequences: ESC [ ... final_char + b'[' => { + let mut pos = 2; + // Skip parameter and intermediate characters + while pos < buf.len() { + match buf[pos] { + 0x30..=0x3F => pos += 1, // Parameter characters 0-9 : ; < = > ? + 0x20..=0x2F => pos += 1, // Intermediate characters (space) ! " # $ % & ' ( ) * + , - . / + 0x40..=0x7E => return Some(pos + 1), // Final character @ A-Z [ \ ] ^ _ ` a-z { | } ~ + _ => return Some(pos), // Invalid sequence, stop here + } + } + None // Incomplete sequence + } + + // OSC sequences: ESC ] ... (ST or BEL) + b']' => { + let mut pos = 2; + while pos < buf.len() { + match buf[pos] { + 0x07 => return Some(pos + 1), // BEL terminator + 0x1B if pos + 1 < buf.len() && buf[pos + 1] == b'\\' => { + return Some(pos + 2); // ESC \ (ST) terminator + } + _ => pos += 1, + } + } + None // Incomplete sequence + } + + // Simple two-character sequences: ESC letter + 0x40..=0x5F | 0x60..=0x7E => Some(2), + + // Other escape sequences - assume two characters for now + _ => Some(2), + } } pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {