Fix escape sequence corruption in asciinema cast output

Replace blind UTF-8 conversion with escape-sequence-aware processing:
- Add proper ANSI escape sequence parser (CSI, OSC, simple sequences)
- Buffer at escape sequence boundaries instead of arbitrary UTF-8 boundaries
- Preserve complete escape sequences as atomic units during conversion
- Only apply UTF-8 validation to text content between escape sequences
- Eliminates rendering artifacts in complex terminal applications like Claude

This maintains full JSON/asciinema compatibility while fixing the
fundamental issue where escape sequences were being corrupted by
UTF-8 validation during cast file generation.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Mario Zechner 2025-06-18 08:23:09 +02:00
parent e47adc9808
commit 18bb33c2ce
2 changed files with 247 additions and 76 deletions

View file

@ -676,10 +676,27 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
let stream_path = control_path.join(&session_id).join("stream-out");
if let Ok(content) = fs::read_to_string(&stream_path) {
// Optimize snapshot by finding last clear command
let optimized_content = optimize_snapshot_content(&content);
// Log optimization results
let original_lines = content.lines().count();
let optimized_lines = optimized_content.lines().count();
let reduction = if original_lines > 0 {
(original_lines - optimized_lines) as f64 / original_lines as f64 * 100.0
} else {
0.0
};
println!(
"Snapshot for {}: {} lines → {} lines ({:.1}% reduction)",
session_id, original_lines, optimized_lines, reduction
);
Response::builder()
.status(StatusCode::OK)
.header("Content-Type", "text/plain")
.body(content)
.body(optimized_content)
.unwrap()
} else {
let error = ApiResponse {
@ -701,6 +718,112 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
}
}
fn optimize_snapshot_content(content: &str) -> String {
let lines: Vec<&str> = content.trim().split('\n').collect();
let mut header_line: Option<&str> = None;
let mut all_events: Vec<&str> = Vec::new();
// Parse all lines first
for line in &lines {
if line.trim().is_empty() {
continue;
}
// Try to parse as JSON to identify headers vs events
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(line) {
// Check if it's a header (has version, width, height)
if parsed.get("version").is_some() &&
parsed.get("width").is_some() &&
parsed.get("height").is_some() {
header_line = Some(line);
} else if parsed.as_array().is_some() {
// It's an event array [timestamp, type, data]
all_events.push(line);
}
}
}
// Find the last clear command
let mut last_clear_index = None;
let mut last_resize_before_clear: Option<&str> = None;
for (i, event_line) in all_events.iter().enumerate().rev() {
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
if let Some(array) = parsed.as_array() {
if array.len() >= 3 {
if let (Some(event_type), Some(data)) = (array[1].as_str(), array[2].as_str()) {
if event_type == "o" {
// Look for clear screen escape sequences
if data.contains("\x1b[2J") || // Clear entire screen
data.contains("\x1b[H\x1b[2J") || // Home cursor + clear screen
data.contains("\x1b[3J") || // Clear scrollback
data.contains("\x1bc") { // Full reset
last_clear_index = Some(i);
break;
}
}
}
}
}
}
}
// Find the last resize event before the clear (if any)
if let Some(clear_idx) = last_clear_index {
for event_line in all_events.iter().take(clear_idx).rev() {
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
if let Some(array) = parsed.as_array() {
if array.len() >= 3 {
if let Some(event_type) = array[1].as_str() {
if event_type == "r" {
last_resize_before_clear = Some(event_line);
break;
}
}
}
}
}
}
}
// Build optimized content
let mut result_lines = Vec::new();
// Add header if found
if let Some(header) = header_line {
result_lines.push(header.to_string());
}
// Add last resize before clear if found
if let Some(resize_line) = last_resize_before_clear {
// Modify the resize event to have timestamp 0
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(resize_line) {
if let Some(array) = parsed.as_array_mut() {
if array.len() >= 3 {
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| resize_line.to_string()));
}
}
}
}
// Add events after the last clear (or all events if no clear found)
let start_index = last_clear_index.unwrap_or(0);
for event_line in all_events.iter().skip(start_index) {
// Modify event to have timestamp 0 for immediate playback
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
if let Some(array) = parsed.as_array_mut() {
if array.len() >= 3 {
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| event_line.to_string()));
}
}
}
}
result_lines.join("\n")
}
fn handle_session_input(
control_path: &Path,
path: &str,

View file

@ -251,89 +251,137 @@ impl StreamWriter {
let mut combined_buf = std::mem::take(&mut self.utf8_buffer);
combined_buf.extend_from_slice(buf);
// Check if we have a complete UTF-8 sequence at the end
match std::str::from_utf8(&combined_buf) {
Ok(_) => {
// Everything is valid UTF-8, process it all
let data = String::from_utf8(combined_buf).unwrap();
// Process data in escape-sequence-aware chunks
let (processed_data, remaining_buffer) = self.process_terminal_data(&combined_buf);
if !processed_data.is_empty() {
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data: processed_data,
};
self.write_event(event)?;
}
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data,
};
self.write_event(event)
}
Err(e) => {
let valid_up_to = e.valid_up_to();
// Store any remaining incomplete data for next time
self.utf8_buffer = remaining_buffer;
Ok(())
}
if let Some(error_len) = e.error_len() {
// There's an invalid UTF-8 sequence at valid_up_to
// Process up to and including the invalid sequence lossily
let process_up_to = valid_up_to + error_len;
let remaining = &combined_buf[process_up_to..];
// Check if remaining bytes form an incomplete UTF-8 sequence (≤4 bytes)
if remaining.len() <= 4 && !remaining.is_empty() {
if let Err(e2) = std::str::from_utf8(remaining) {
if e2.error_len().is_none() && e2.valid_up_to() == 0 {
// Remaining bytes are an incomplete UTF-8 sequence, buffer them
let data = String::from_utf8_lossy(&combined_buf[..process_up_to])
.to_string();
self.utf8_buffer.extend_from_slice(remaining);
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data,
};
return self.write_event(event);
/// Process terminal data while preserving escape sequences
fn process_terminal_data(&self, buf: &[u8]) -> (String, Vec<u8>) {
let mut result = String::new();
let mut pos = 0;
while pos < buf.len() {
// Look for escape sequences starting with ESC (0x1B)
if buf[pos] == 0x1B {
// Try to find complete escape sequence
if let Some(seq_end) = self.find_escape_sequence_end(&buf[pos..]) {
let seq_bytes = &buf[pos..pos + seq_end];
// Preserve escape sequence as-is using lossy conversion
// This will preserve most escape sequences correctly
result.push_str(&String::from_utf8_lossy(seq_bytes));
pos += seq_end;
} else {
// Incomplete escape sequence at end of buffer - save for later
return (result, buf[pos..].to_vec());
}
} else {
// Regular text - find the next escape sequence or end of valid UTF-8
let chunk_start = pos;
while pos < buf.len() && buf[pos] != 0x1B {
pos += 1;
}
let text_chunk = &buf[chunk_start..pos];
// Handle UTF-8 validation for text chunks
match std::str::from_utf8(text_chunk) {
Ok(valid_text) => {
result.push_str(valid_text);
}
Err(e) => {
let valid_up_to = e.valid_up_to();
// Process valid part
if valid_up_to > 0 {
result.push_str(&String::from_utf8_lossy(&text_chunk[..valid_up_to]));
}
// Check if we have incomplete UTF-8 at the end
let invalid_start = chunk_start + valid_up_to;
let remaining = &buf[invalid_start..];
if remaining.len() <= 4 && pos >= buf.len() {
// Might be incomplete UTF-8 at buffer end
if let Err(utf8_err) = std::str::from_utf8(remaining) {
if utf8_err.error_len().is_none() {
// Incomplete UTF-8 sequence - buffer it
return (result, remaining.to_vec());
}
}
}
}
// Default: process everything lossily (invalid UTF-8 or remaining bytes are also invalid)
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data: String::from_utf8_lossy(&combined_buf).to_string(),
};
self.write_event(event)
} else {
// Incomplete UTF-8 at the end
let incomplete_bytes = &combined_buf[valid_up_to..];
// Only buffer up to 4 bytes (max UTF-8 character size)
if incomplete_bytes.len() <= 4 {
// Process the valid portion
if valid_up_to > 0 {
let data =
String::from_utf8_lossy(&combined_buf[..valid_up_to]).to_string();
self.utf8_buffer.extend_from_slice(incomplete_bytes);
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data,
};
self.write_event(event)
} else {
// Only incomplete bytes, buffer them
self.utf8_buffer.extend_from_slice(incomplete_bytes);
Ok(())
}
} else {
// Too many incomplete bytes, process everything lossily
let event = AsciinemaEvent {
time,
event_type: AsciinemaEventType::Output,
data: String::from_utf8_lossy(&combined_buf).to_string(),
};
self.write_event(event)
// Invalid UTF-8 in middle or complete invalid sequence
// Use lossy conversion for this part
let invalid_part = &text_chunk[valid_up_to..];
result.push_str(&String::from_utf8_lossy(invalid_part));
}
}
}
}
(result, Vec::new())
}
/// Find the end of an ANSI escape sequence starting at the given position
fn find_escape_sequence_end(&self, buf: &[u8]) -> Option<usize> {
if buf.is_empty() || buf[0] != 0x1B {
return None;
}
if buf.len() < 2 {
return None; // Incomplete - need more data
}
match buf[1] {
// CSI sequences: ESC [ ... final_char
b'[' => {
let mut pos = 2;
// Skip parameter and intermediate characters
while pos < buf.len() {
match buf[pos] {
0x30..=0x3F => pos += 1, // Parameter characters 0-9 : ; < = > ?
0x20..=0x2F => pos += 1, // Intermediate characters (space) ! " # $ % & ' ( ) * + , - . /
0x40..=0x7E => return Some(pos + 1), // Final character @ A-Z [ \ ] ^ _ ` a-z { | } ~
_ => return Some(pos), // Invalid sequence, stop here
}
}
None // Incomplete sequence
}
// OSC sequences: ESC ] ... (ST or BEL)
b']' => {
let mut pos = 2;
while pos < buf.len() {
match buf[pos] {
0x07 => return Some(pos + 1), // BEL terminator
0x1B if pos + 1 < buf.len() && buf[pos + 1] == b'\\' => {
return Some(pos + 2); // ESC \ (ST) terminator
}
_ => pos += 1,
}
}
None // Incomplete sequence
}
// Simple two-character sequences: ESC letter
0x40..=0x5F | 0x60..=0x7E => Some(2),
// Other escape sequences - assume two characters for now
_ => Some(2),
}
}
pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {