From 18bb33c2ceeb863a915679072ea8ce9bda421419 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Wed, 18 Jun 2025 08:23:09 +0200
Subject: [PATCH] Fix escape sequence corruption in asciinema cast output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace blind UTF-8 conversion with escape-sequence-aware processing:
- Add proper ANSI escape sequence parser (CSI, OSC, simple sequences)
- Buffer at escape sequence boundaries instead of arbitrary UTF-8 boundaries
- Preserve complete escape sequences as atomic units during conversion
- Only apply UTF-8 validation to text content between escape sequences
- Eliminates rendering artifacts in complex terminal applications like Claude

This maintains full JSON/asciinema compatibility while fixing the
fundamental issue where escape sequences were being corrupted by
UTF-8 validation during cast file generation.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 tty-fwd/src/api_server.rs | 125 +++++++++++++++++++++++-
 tty-fwd/src/protocol.rs   | 198 +++++++++++++++++++++++---------------
 2 files changed, 247 insertions(+), 76 deletions(-)
diff --git a/tty-fwd/src/api_server.rs b/tty-fwd/src/api_server.rs
index 66b09b43..80fce37e 100644
--- a/tty-fwd/src/api_server.rs
+++ b/tty-fwd/src/api_server.rs
@@ -676,10 +676,27 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
         let stream_path = control_path.join(&session_id).join("stream-out");
 
         if let Ok(content) = fs::read_to_string(&stream_path) {
+            // Optimize snapshot by finding last clear command
+            let optimized_content = optimize_snapshot_content(&content);
+            
+            // Log optimization results
+            let original_lines = content.lines().count();
+            let optimized_lines = optimized_content.lines().count();
+            let reduction = if original_lines > 0 {
+                (original_lines - optimized_lines) as f64 / original_lines as f64 * 100.0
+            } else {
+                0.0
+            };
+            
+            println!(
+                "Snapshot for {}: {} lines → {} lines ({:.1}% reduction)",
+                session_id, original_lines, optimized_lines, reduction
+            );
+            
             Response::builder()
                 .status(StatusCode::OK)
                 .header("Content-Type", "text/plain")
-                .body(content)
+                .body(optimized_content)
                 .unwrap()
         } else {
             let error = ApiResponse {
@@ -701,6 +718,112 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
     }
 }
 
+fn optimize_snapshot_content(content: &str) -> String {
+    let lines: Vec<&str> = content.trim().split('\n').collect();
+    let mut header_line: Option<&str> = None;
+    let mut all_events: Vec<&str> = Vec::new();
+    
+    // Parse all lines first
+    for line in &lines {
+        if line.trim().is_empty() {
+            continue;
+        }
+        
+        // Try to parse as JSON to identify headers vs events
+        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(line) {
+            // Check if it's a header (has version, width, height)
+            if parsed.get("version").is_some() && 
+               parsed.get("width").is_some() && 
+               parsed.get("height").is_some() {
+                header_line = Some(line);
+            } else if parsed.as_array().is_some() {
+                // It's an event array [timestamp, type, data]
+                all_events.push(line);
+            }
+        }
+    }
+    
+    // Find the last clear command
+    let mut last_clear_index = None;
+    let mut last_resize_before_clear: Option<&str> = None;
+    
+    for (i, event_line) in all_events.iter().enumerate().rev() {
+        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
+            if let Some(array) = parsed.as_array() {
+                if array.len() >= 3 {
+                    if let (Some(event_type), Some(data)) = (array[1].as_str(), array[2].as_str()) {
+                        if event_type == "o" {
+                            // Look for clear screen escape sequences
+                            if data.contains("\x1b[2J") ||      // Clear entire screen
+                               data.contains("\x1b[H\x1b[2J") || // Home cursor + clear screen  
+                               data.contains("\x1b[3J") ||      // Clear scrollback
+                               data.contains("\x1bc") {         // Full reset
+                                last_clear_index = Some(i);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    // Find the last resize event before the clear (if any)
+    if let Some(clear_idx) = last_clear_index {
+        for event_line in all_events.iter().take(clear_idx).rev() {
+            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
+                if let Some(array) = parsed.as_array() {
+                    if array.len() >= 3 {
+                        if let Some(event_type) = array[1].as_str() {
+                            if event_type == "r" {
+                                last_resize_before_clear = Some(event_line);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    // Build optimized content
+    let mut result_lines = Vec::new();
+    
+    // Add header if found
+    if let Some(header) = header_line {
+        result_lines.push(header.to_string());
+    }
+    
+    // Add last resize before clear if found
+    if let Some(resize_line) = last_resize_before_clear {
+        // Modify the resize event to have timestamp 0
+        if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(resize_line) {
+            if let Some(array) = parsed.as_array_mut() {
+                if array.len() >= 3 {
+                    array[0] = serde_json::Value::Number(serde_json::Number::from(0));
+                    result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| resize_line.to_string()));
+                }
+            }
+        }
+    }
+    
+    // Add events after the last clear (or all events if no clear found)
+    let start_index = last_clear_index.unwrap_or(0);
+    for event_line in all_events.iter().skip(start_index) {
+        // Modify event to have timestamp 0 for immediate playback
+        if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
+            if let Some(array) = parsed.as_array_mut() {
+                if array.len() >= 3 {
+                    array[0] = serde_json::Value::Number(serde_json::Number::from(0));
+                    result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| event_line.to_string()));
+                }
+            }
+        }
+    }
+    
+    result_lines.join("\n")
+}
+
 fn handle_session_input(
     control_path: &Path,
     path: &str,
diff --git a/tty-fwd/src/protocol.rs b/tty-fwd/src/protocol.rs
index fc991766..c804f9b8 100644
--- a/tty-fwd/src/protocol.rs
+++ b/tty-fwd/src/protocol.rs
@@ -251,89 +251,137 @@ impl StreamWriter {
         let mut combined_buf = std::mem::take(&mut self.utf8_buffer);
         combined_buf.extend_from_slice(buf);
 
-        // Check if we have a complete UTF-8 sequence at the end
-        match std::str::from_utf8(&combined_buf) {
-            Ok(_) => {
-                // Everything is valid UTF-8, process it all
-                let data = String::from_utf8(combined_buf).unwrap();
+        // Process data in escape-sequence-aware chunks
+        let (processed_data, remaining_buffer) = self.process_terminal_data(&combined_buf);
+        
+        if !processed_data.is_empty() {
+            let event = AsciinemaEvent {
+                time,
+                event_type: AsciinemaEventType::Output,
+                data: processed_data,
+            };
+            self.write_event(event)?;
+        }
 
-                let event = AsciinemaEvent {
-                    time,
-                    event_type: AsciinemaEventType::Output,
-                    data,
-                };
-                self.write_event(event)
-            }
-            Err(e) => {
-                let valid_up_to = e.valid_up_to();
+        // Store any remaining incomplete data for next time
+        self.utf8_buffer = remaining_buffer;
+        Ok(())
+    }
 
-                if let Some(error_len) = e.error_len() {
-                    // There's an invalid UTF-8 sequence at valid_up_to
-                    // Process up to and including the invalid sequence lossily
-                    let process_up_to = valid_up_to + error_len;
-                    let remaining = &combined_buf[process_up_to..];
-
-                    // Check if remaining bytes form an incomplete UTF-8 sequence (≤4 bytes)
-                    if remaining.len() <= 4 && !remaining.is_empty() {
-                        if let Err(e2) = std::str::from_utf8(remaining) {
-                            if e2.error_len().is_none() && e2.valid_up_to() == 0 {
-                                // Remaining bytes are an incomplete UTF-8 sequence, buffer them
-                                let data = String::from_utf8_lossy(&combined_buf[..process_up_to])
-                                    .to_string();
-                                self.utf8_buffer.extend_from_slice(remaining);
-                                let event = AsciinemaEvent {
-                                    time,
-                                    event_type: AsciinemaEventType::Output,
-                                    data,
-                                };
-                                return self.write_event(event);
+    /// Process terminal data while preserving escape sequences
+    fn process_terminal_data(&self, buf: &[u8]) -> (String, Vec<u8>) {
+        let mut result = String::new();
+        let mut pos = 0;
+        
+        while pos < buf.len() {
+            // Look for escape sequences starting with ESC (0x1B)
+            if buf[pos] == 0x1B {
+                // Try to find complete escape sequence
+                if let Some(seq_end) = self.find_escape_sequence_end(&buf[pos..]) {
+                    let seq_bytes = &buf[pos..pos + seq_end];
+                    // Preserve escape sequence as-is using lossy conversion
+                    // This will preserve most escape sequences correctly
+                    result.push_str(&String::from_utf8_lossy(seq_bytes));
+                    pos += seq_end;
+                } else {
+                    // Incomplete escape sequence at end of buffer - save for later
+                    return (result, buf[pos..].to_vec());
+                }
+            } else {
+                // Regular text - find the next escape sequence or end of valid UTF-8
+                let chunk_start = pos;
+                while pos < buf.len() && buf[pos] != 0x1B {
+                    pos += 1;
+                }
+                
+                let text_chunk = &buf[chunk_start..pos];
+                
+                // Handle UTF-8 validation for text chunks
+                match std::str::from_utf8(text_chunk) {
+                    Ok(valid_text) => {
+                        result.push_str(valid_text);
+                    }
+                    Err(e) => {
+                        let valid_up_to = e.valid_up_to();
+                        
+                        // Process valid part
+                        if valid_up_to > 0 {
+                            result.push_str(&String::from_utf8_lossy(&text_chunk[..valid_up_to]));
+                        }
+                        
+                        // Check if we have incomplete UTF-8 at the end
+                        let invalid_start = chunk_start + valid_up_to;
+                        let remaining = &buf[invalid_start..];
+                        
+                        if remaining.len() <= 4 && pos >= buf.len() {
+                            // Might be incomplete UTF-8 at buffer end
+                            if let Err(utf8_err) = std::str::from_utf8(remaining) {
+                                if utf8_err.error_len().is_none() {
+                                    // Incomplete UTF-8 sequence - buffer it
+                                    return (result, remaining.to_vec());
+                                }
                             }
                         }
-                    }
-
-                    // Default: process everything lossily (invalid UTF-8 or remaining bytes are also invalid)
-                    let event = AsciinemaEvent {
-                        time,
-                        event_type: AsciinemaEventType::Output,
-                        data: String::from_utf8_lossy(&combined_buf).to_string(),
-                    };
-                    self.write_event(event)
-                } else {
-                    // Incomplete UTF-8 at the end
-                    let incomplete_bytes = &combined_buf[valid_up_to..];
-
-                    // Only buffer up to 4 bytes (max UTF-8 character size)
-                    if incomplete_bytes.len() <= 4 {
-                        // Process the valid portion
-                        if valid_up_to > 0 {
-                            let data =
-                                String::from_utf8_lossy(&combined_buf[..valid_up_to]).to_string();
-                            self.utf8_buffer.extend_from_slice(incomplete_bytes);
-
-                            let event = AsciinemaEvent {
-                                time,
-                                event_type: AsciinemaEventType::Output,
-                                data,
-                            };
-                            self.write_event(event)
-                        } else {
-                            // Only incomplete bytes, buffer them
-                            self.utf8_buffer.extend_from_slice(incomplete_bytes);
-                            Ok(())
-                        }
-                    } else {
-                        // Too many incomplete bytes, process everything lossily
-
-                        let event = AsciinemaEvent {
-                            time,
-                            event_type: AsciinemaEventType::Output,
-                            data: String::from_utf8_lossy(&combined_buf).to_string(),
-                        };
-                        self.write_event(event)
+                        
+                        // Invalid UTF-8 in middle or complete invalid sequence
+                        // Use lossy conversion for this part
+                        let invalid_part = &text_chunk[valid_up_to..];
+                        result.push_str(&String::from_utf8_lossy(invalid_part));
                     }
                 }
             }
         }
+        
+        (result, Vec::new())
+    }
+
+    /// Find the end of an ANSI escape sequence starting at the given position
+    fn find_escape_sequence_end(&self, buf: &[u8]) -> Option<usize> {
+        if buf.is_empty() || buf[0] != 0x1B {
+            return None;
+        }
+        
+        if buf.len() < 2 {
+            return None; // Incomplete - need more data
+        }
+        
+        match buf[1] {
+            // CSI sequences: ESC [ ... final_char
+            b'[' => {
+                let mut pos = 2;
+                // Skip parameter and intermediate characters
+                while pos < buf.len() {
+                    match buf[pos] {
+                        0x30..=0x3F => pos += 1, // Parameter characters 0-9 : ; < = > ?
+                        0x20..=0x2F => pos += 1, // Intermediate characters (space) ! " # $ % & ' ( ) * + , - . /
+                        0x40..=0x7E => return Some(pos + 1), // Final character @ A-Z [ \ ] ^ _ ` a-z { | } ~
+                        _ => return Some(pos), // Invalid sequence, stop here
+                    }
+                }
+                None // Incomplete sequence
+            }
+            
+            // OSC sequences: ESC ] ... (ST or BEL)
+            b']' => {
+                let mut pos = 2;
+                while pos < buf.len() {
+                    match buf[pos] {
+                        0x07 => return Some(pos + 1), // BEL terminator
+                        0x1B if pos + 1 < buf.len() && buf[pos + 1] == b'\\' => {
+                            return Some(pos + 2); // ESC \ (ST) terminator
+                        }
+                        _ => pos += 1,
+                    }
+                }
+                None // Incomplete sequence
+            }
+            
+            // Simple two-character sequences: ESC letter
+            0x40..=0x5F | 0x60..=0x7E => Some(2),
+            
+            // Other escape sequences - assume two characters for now
+            _ => Some(2),
+        }
     }
 
     pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {