mirror of
https://github.com/samsonjs/vibetunnel.git
synced 2026-04-14 12:46:05 +00:00
Fix escape sequence corruption in asciinema cast output
Replace blind UTF-8 conversion with escape-sequence-aware processing: - Add proper ANSI escape sequence parser (CSI, OSC, simple sequences) - Buffer at escape sequence boundaries instead of arbitrary UTF-8 boundaries - Preserve complete escape sequences as atomic units during conversion - Only apply UTF-8 validation to text content between escape sequences - Eliminates rendering artifacts in complex terminal applications like Claude This maintains full JSON/asciinema compatibility while fixing the fundamental issue where escape sequences were being corrupted by UTF-8 validation during cast file generation. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
e47adc9808
commit
18bb33c2ce
2 changed files with 247 additions and 76 deletions
|
|
@ -676,10 +676,27 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
|
|||
let stream_path = control_path.join(&session_id).join("stream-out");
|
||||
|
||||
if let Ok(content) = fs::read_to_string(&stream_path) {
|
||||
// Optimize snapshot by finding last clear command
|
||||
let optimized_content = optimize_snapshot_content(&content);
|
||||
|
||||
// Log optimization results
|
||||
let original_lines = content.lines().count();
|
||||
let optimized_lines = optimized_content.lines().count();
|
||||
let reduction = if original_lines > 0 {
|
||||
(original_lines - optimized_lines) as f64 / original_lines as f64 * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
println!(
|
||||
"Snapshot for {}: {} lines → {} lines ({:.1}% reduction)",
|
||||
session_id, original_lines, optimized_lines, reduction
|
||||
);
|
||||
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header("Content-Type", "text/plain")
|
||||
.body(content)
|
||||
.body(optimized_content)
|
||||
.unwrap()
|
||||
} else {
|
||||
let error = ApiResponse {
|
||||
|
|
@ -701,6 +718,112 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
|
|||
}
|
||||
}
|
||||
|
||||
fn optimize_snapshot_content(content: &str) -> String {
|
||||
let lines: Vec<&str> = content.trim().split('\n').collect();
|
||||
let mut header_line: Option<&str> = None;
|
||||
let mut all_events: Vec<&str> = Vec::new();
|
||||
|
||||
// Parse all lines first
|
||||
for line in &lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to parse as JSON to identify headers vs events
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
// Check if it's a header (has version, width, height)
|
||||
if parsed.get("version").is_some() &&
|
||||
parsed.get("width").is_some() &&
|
||||
parsed.get("height").is_some() {
|
||||
header_line = Some(line);
|
||||
} else if parsed.as_array().is_some() {
|
||||
// It's an event array [timestamp, type, data]
|
||||
all_events.push(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the last clear command
|
||||
let mut last_clear_index = None;
|
||||
let mut last_resize_before_clear: Option<&str> = None;
|
||||
|
||||
for (i, event_line) in all_events.iter().enumerate().rev() {
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||
if let Some(array) = parsed.as_array() {
|
||||
if array.len() >= 3 {
|
||||
if let (Some(event_type), Some(data)) = (array[1].as_str(), array[2].as_str()) {
|
||||
if event_type == "o" {
|
||||
// Look for clear screen escape sequences
|
||||
if data.contains("\x1b[2J") || // Clear entire screen
|
||||
data.contains("\x1b[H\x1b[2J") || // Home cursor + clear screen
|
||||
data.contains("\x1b[3J") || // Clear scrollback
|
||||
data.contains("\x1bc") { // Full reset
|
||||
last_clear_index = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the last resize event before the clear (if any)
|
||||
if let Some(clear_idx) = last_clear_index {
|
||||
for event_line in all_events.iter().take(clear_idx).rev() {
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||
if let Some(array) = parsed.as_array() {
|
||||
if array.len() >= 3 {
|
||||
if let Some(event_type) = array[1].as_str() {
|
||||
if event_type == "r" {
|
||||
last_resize_before_clear = Some(event_line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build optimized content
|
||||
let mut result_lines = Vec::new();
|
||||
|
||||
// Add header if found
|
||||
if let Some(header) = header_line {
|
||||
result_lines.push(header.to_string());
|
||||
}
|
||||
|
||||
// Add last resize before clear if found
|
||||
if let Some(resize_line) = last_resize_before_clear {
|
||||
// Modify the resize event to have timestamp 0
|
||||
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(resize_line) {
|
||||
if let Some(array) = parsed.as_array_mut() {
|
||||
if array.len() >= 3 {
|
||||
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
|
||||
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| resize_line.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add events after the last clear (or all events if no clear found)
|
||||
let start_index = last_clear_index.unwrap_or(0);
|
||||
for event_line in all_events.iter().skip(start_index) {
|
||||
// Modify event to have timestamp 0 for immediate playback
|
||||
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||
if let Some(array) = parsed.as_array_mut() {
|
||||
if array.len() >= 3 {
|
||||
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
|
||||
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| event_line.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result_lines.join("\n")
|
||||
}
|
||||
|
||||
fn handle_session_input(
|
||||
control_path: &Path,
|
||||
path: &str,
|
||||
|
|
|
|||
|
|
@ -251,89 +251,137 @@ impl StreamWriter {
|
|||
let mut combined_buf = std::mem::take(&mut self.utf8_buffer);
|
||||
combined_buf.extend_from_slice(buf);
|
||||
|
||||
// Check if we have a complete UTF-8 sequence at the end
|
||||
match std::str::from_utf8(&combined_buf) {
|
||||
Ok(_) => {
|
||||
// Everything is valid UTF-8, process it all
|
||||
let data = String::from_utf8(combined_buf).unwrap();
|
||||
// Process data in escape-sequence-aware chunks
|
||||
let (processed_data, remaining_buffer) = self.process_terminal_data(&combined_buf);
|
||||
|
||||
if !processed_data.is_empty() {
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data: processed_data,
|
||||
};
|
||||
self.write_event(event)?;
|
||||
}
|
||||
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data,
|
||||
};
|
||||
self.write_event(event)
|
||||
}
|
||||
Err(e) => {
|
||||
let valid_up_to = e.valid_up_to();
|
||||
// Store any remaining incomplete data for next time
|
||||
self.utf8_buffer = remaining_buffer;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
if let Some(error_len) = e.error_len() {
|
||||
// There's an invalid UTF-8 sequence at valid_up_to
|
||||
// Process up to and including the invalid sequence lossily
|
||||
let process_up_to = valid_up_to + error_len;
|
||||
let remaining = &combined_buf[process_up_to..];
|
||||
|
||||
// Check if remaining bytes form an incomplete UTF-8 sequence (≤4 bytes)
|
||||
if remaining.len() <= 4 && !remaining.is_empty() {
|
||||
if let Err(e2) = std::str::from_utf8(remaining) {
|
||||
if e2.error_len().is_none() && e2.valid_up_to() == 0 {
|
||||
// Remaining bytes are an incomplete UTF-8 sequence, buffer them
|
||||
let data = String::from_utf8_lossy(&combined_buf[..process_up_to])
|
||||
.to_string();
|
||||
self.utf8_buffer.extend_from_slice(remaining);
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data,
|
||||
};
|
||||
return self.write_event(event);
|
||||
/// Process terminal data while preserving escape sequences
|
||||
fn process_terminal_data(&self, buf: &[u8]) -> (String, Vec<u8>) {
|
||||
let mut result = String::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while pos < buf.len() {
|
||||
// Look for escape sequences starting with ESC (0x1B)
|
||||
if buf[pos] == 0x1B {
|
||||
// Try to find complete escape sequence
|
||||
if let Some(seq_end) = self.find_escape_sequence_end(&buf[pos..]) {
|
||||
let seq_bytes = &buf[pos..pos + seq_end];
|
||||
// Preserve escape sequence as-is using lossy conversion
|
||||
// This will preserve most escape sequences correctly
|
||||
result.push_str(&String::from_utf8_lossy(seq_bytes));
|
||||
pos += seq_end;
|
||||
} else {
|
||||
// Incomplete escape sequence at end of buffer - save for later
|
||||
return (result, buf[pos..].to_vec());
|
||||
}
|
||||
} else {
|
||||
// Regular text - find the next escape sequence or end of valid UTF-8
|
||||
let chunk_start = pos;
|
||||
while pos < buf.len() && buf[pos] != 0x1B {
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
let text_chunk = &buf[chunk_start..pos];
|
||||
|
||||
// Handle UTF-8 validation for text chunks
|
||||
match std::str::from_utf8(text_chunk) {
|
||||
Ok(valid_text) => {
|
||||
result.push_str(valid_text);
|
||||
}
|
||||
Err(e) => {
|
||||
let valid_up_to = e.valid_up_to();
|
||||
|
||||
// Process valid part
|
||||
if valid_up_to > 0 {
|
||||
result.push_str(&String::from_utf8_lossy(&text_chunk[..valid_up_to]));
|
||||
}
|
||||
|
||||
// Check if we have incomplete UTF-8 at the end
|
||||
let invalid_start = chunk_start + valid_up_to;
|
||||
let remaining = &buf[invalid_start..];
|
||||
|
||||
if remaining.len() <= 4 && pos >= buf.len() {
|
||||
// Might be incomplete UTF-8 at buffer end
|
||||
if let Err(utf8_err) = std::str::from_utf8(remaining) {
|
||||
if utf8_err.error_len().is_none() {
|
||||
// Incomplete UTF-8 sequence - buffer it
|
||||
return (result, remaining.to_vec());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Default: process everything lossily (invalid UTF-8 or remaining bytes are also invalid)
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data: String::from_utf8_lossy(&combined_buf).to_string(),
|
||||
};
|
||||
self.write_event(event)
|
||||
} else {
|
||||
// Incomplete UTF-8 at the end
|
||||
let incomplete_bytes = &combined_buf[valid_up_to..];
|
||||
|
||||
// Only buffer up to 4 bytes (max UTF-8 character size)
|
||||
if incomplete_bytes.len() <= 4 {
|
||||
// Process the valid portion
|
||||
if valid_up_to > 0 {
|
||||
let data =
|
||||
String::from_utf8_lossy(&combined_buf[..valid_up_to]).to_string();
|
||||
self.utf8_buffer.extend_from_slice(incomplete_bytes);
|
||||
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data,
|
||||
};
|
||||
self.write_event(event)
|
||||
} else {
|
||||
// Only incomplete bytes, buffer them
|
||||
self.utf8_buffer.extend_from_slice(incomplete_bytes);
|
||||
Ok(())
|
||||
}
|
||||
} else {
|
||||
// Too many incomplete bytes, process everything lossily
|
||||
|
||||
let event = AsciinemaEvent {
|
||||
time,
|
||||
event_type: AsciinemaEventType::Output,
|
||||
data: String::from_utf8_lossy(&combined_buf).to_string(),
|
||||
};
|
||||
self.write_event(event)
|
||||
|
||||
// Invalid UTF-8 in middle or complete invalid sequence
|
||||
// Use lossy conversion for this part
|
||||
let invalid_part = &text_chunk[valid_up_to..];
|
||||
result.push_str(&String::from_utf8_lossy(invalid_part));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(result, Vec::new())
|
||||
}
|
||||
|
||||
/// Find the end of an ANSI escape sequence starting at the given position
|
||||
fn find_escape_sequence_end(&self, buf: &[u8]) -> Option<usize> {
|
||||
if buf.is_empty() || buf[0] != 0x1B {
|
||||
return None;
|
||||
}
|
||||
|
||||
if buf.len() < 2 {
|
||||
return None; // Incomplete - need more data
|
||||
}
|
||||
|
||||
match buf[1] {
|
||||
// CSI sequences: ESC [ ... final_char
|
||||
b'[' => {
|
||||
let mut pos = 2;
|
||||
// Skip parameter and intermediate characters
|
||||
while pos < buf.len() {
|
||||
match buf[pos] {
|
||||
0x30..=0x3F => pos += 1, // Parameter characters 0-9 : ; < = > ?
|
||||
0x20..=0x2F => pos += 1, // Intermediate characters (space) ! " # $ % & ' ( ) * + , - . /
|
||||
0x40..=0x7E => return Some(pos + 1), // Final character @ A-Z [ \ ] ^ _ ` a-z { | } ~
|
||||
_ => return Some(pos), // Invalid sequence, stop here
|
||||
}
|
||||
}
|
||||
None // Incomplete sequence
|
||||
}
|
||||
|
||||
// OSC sequences: ESC ] ... (ST or BEL)
|
||||
b']' => {
|
||||
let mut pos = 2;
|
||||
while pos < buf.len() {
|
||||
match buf[pos] {
|
||||
0x07 => return Some(pos + 1), // BEL terminator
|
||||
0x1B if pos + 1 < buf.len() && buf[pos + 1] == b'\\' => {
|
||||
return Some(pos + 2); // ESC \ (ST) terminator
|
||||
}
|
||||
_ => pos += 1,
|
||||
}
|
||||
}
|
||||
None // Incomplete sequence
|
||||
}
|
||||
|
||||
// Simple two-character sequences: ESC letter
|
||||
0x40..=0x5F | 0x60..=0x7E => Some(2),
|
||||
|
||||
// Other escape sequences - assume two characters for now
|
||||
_ => Some(2),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {
|
||||
|
|
|
|||
Loading…
Reference in a new issue