mirror of
https://github.com/samsonjs/vibetunnel.git
synced 2026-04-27 15:17:38 +00:00
Fix escape sequence corruption in asciinema cast output
Replace blind UTF-8 conversion with escape-sequence-aware processing: - Add proper ANSI escape sequence parser (CSI, OSC, simple sequences) - Buffer at escape sequence boundaries instead of arbitrary UTF-8 boundaries - Preserve complete escape sequences as atomic units during conversion - Only apply UTF-8 validation to text content between escape sequences - Eliminates rendering artifacts in complex terminal applications like Claude This maintains full JSON/asciinema compatibility while fixing the fundamental issue where escape sequences were being corrupted by UTF-8 validation during cast file generation. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
e47adc9808
commit
18bb33c2ce
2 changed files with 247 additions and 76 deletions
|
|
@ -676,10 +676,27 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
|
||||||
let stream_path = control_path.join(&session_id).join("stream-out");
|
let stream_path = control_path.join(&session_id).join("stream-out");
|
||||||
|
|
||||||
if let Ok(content) = fs::read_to_string(&stream_path) {
|
if let Ok(content) = fs::read_to_string(&stream_path) {
|
||||||
|
// Optimize snapshot by finding last clear command
|
||||||
|
let optimized_content = optimize_snapshot_content(&content);
|
||||||
|
|
||||||
|
// Log optimization results
|
||||||
|
let original_lines = content.lines().count();
|
||||||
|
let optimized_lines = optimized_content.lines().count();
|
||||||
|
let reduction = if original_lines > 0 {
|
||||||
|
(original_lines - optimized_lines) as f64 / original_lines as f64 * 100.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Snapshot for {}: {} lines → {} lines ({:.1}% reduction)",
|
||||||
|
session_id, original_lines, optimized_lines, reduction
|
||||||
|
);
|
||||||
|
|
||||||
Response::builder()
|
Response::builder()
|
||||||
.status(StatusCode::OK)
|
.status(StatusCode::OK)
|
||||||
.header("Content-Type", "text/plain")
|
.header("Content-Type", "text/plain")
|
||||||
.body(content)
|
.body(optimized_content)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
} else {
|
} else {
|
||||||
let error = ApiResponse {
|
let error = ApiResponse {
|
||||||
|
|
@ -701,6 +718,112 @@ fn handle_session_snapshot(control_path: &Path, path: &str) -> Response<String>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn optimize_snapshot_content(content: &str) -> String {
|
||||||
|
let lines: Vec<&str> = content.trim().split('\n').collect();
|
||||||
|
let mut header_line: Option<&str> = None;
|
||||||
|
let mut all_events: Vec<&str> = Vec::new();
|
||||||
|
|
||||||
|
// Parse all lines first
|
||||||
|
for line in &lines {
|
||||||
|
if line.trim().is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to parse as JSON to identify headers vs events
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(line) {
|
||||||
|
// Check if it's a header (has version, width, height)
|
||||||
|
if parsed.get("version").is_some() &&
|
||||||
|
parsed.get("width").is_some() &&
|
||||||
|
parsed.get("height").is_some() {
|
||||||
|
header_line = Some(line);
|
||||||
|
} else if parsed.as_array().is_some() {
|
||||||
|
// It's an event array [timestamp, type, data]
|
||||||
|
all_events.push(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the last clear command
|
||||||
|
let mut last_clear_index = None;
|
||||||
|
let mut last_resize_before_clear: Option<&str> = None;
|
||||||
|
|
||||||
|
for (i, event_line) in all_events.iter().enumerate().rev() {
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||||
|
if let Some(array) = parsed.as_array() {
|
||||||
|
if array.len() >= 3 {
|
||||||
|
if let (Some(event_type), Some(data)) = (array[1].as_str(), array[2].as_str()) {
|
||||||
|
if event_type == "o" {
|
||||||
|
// Look for clear screen escape sequences
|
||||||
|
if data.contains("\x1b[2J") || // Clear entire screen
|
||||||
|
data.contains("\x1b[H\x1b[2J") || // Home cursor + clear screen
|
||||||
|
data.contains("\x1b[3J") || // Clear scrollback
|
||||||
|
data.contains("\x1bc") { // Full reset
|
||||||
|
last_clear_index = Some(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the last resize event before the clear (if any)
|
||||||
|
if let Some(clear_idx) = last_clear_index {
|
||||||
|
for event_line in all_events.iter().take(clear_idx).rev() {
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||||
|
if let Some(array) = parsed.as_array() {
|
||||||
|
if array.len() >= 3 {
|
||||||
|
if let Some(event_type) = array[1].as_str() {
|
||||||
|
if event_type == "r" {
|
||||||
|
last_resize_before_clear = Some(event_line);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build optimized content
|
||||||
|
let mut result_lines = Vec::new();
|
||||||
|
|
||||||
|
// Add header if found
|
||||||
|
if let Some(header) = header_line {
|
||||||
|
result_lines.push(header.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add last resize before clear if found
|
||||||
|
if let Some(resize_line) = last_resize_before_clear {
|
||||||
|
// Modify the resize event to have timestamp 0
|
||||||
|
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(resize_line) {
|
||||||
|
if let Some(array) = parsed.as_array_mut() {
|
||||||
|
if array.len() >= 3 {
|
||||||
|
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
|
||||||
|
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| resize_line.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add events after the last clear (or all events if no clear found)
|
||||||
|
let start_index = last_clear_index.unwrap_or(0);
|
||||||
|
for event_line in all_events.iter().skip(start_index) {
|
||||||
|
// Modify event to have timestamp 0 for immediate playback
|
||||||
|
if let Ok(mut parsed) = serde_json::from_str::<serde_json::Value>(event_line) {
|
||||||
|
if let Some(array) = parsed.as_array_mut() {
|
||||||
|
if array.len() >= 3 {
|
||||||
|
array[0] = serde_json::Value::Number(serde_json::Number::from(0));
|
||||||
|
result_lines.push(serde_json::to_string(&parsed).unwrap_or_else(|_| event_line.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result_lines.join("\n")
|
||||||
|
}
|
||||||
|
|
||||||
fn handle_session_input(
|
fn handle_session_input(
|
||||||
control_path: &Path,
|
control_path: &Path,
|
||||||
path: &str,
|
path: &str,
|
||||||
|
|
|
||||||
|
|
@ -251,89 +251,137 @@ impl StreamWriter {
|
||||||
let mut combined_buf = std::mem::take(&mut self.utf8_buffer);
|
let mut combined_buf = std::mem::take(&mut self.utf8_buffer);
|
||||||
combined_buf.extend_from_slice(buf);
|
combined_buf.extend_from_slice(buf);
|
||||||
|
|
||||||
// Check if we have a complete UTF-8 sequence at the end
|
// Process data in escape-sequence-aware chunks
|
||||||
match std::str::from_utf8(&combined_buf) {
|
let (processed_data, remaining_buffer) = self.process_terminal_data(&combined_buf);
|
||||||
Ok(_) => {
|
|
||||||
// Everything is valid UTF-8, process it all
|
if !processed_data.is_empty() {
|
||||||
let data = String::from_utf8(combined_buf).unwrap();
|
let event = AsciinemaEvent {
|
||||||
|
time,
|
||||||
|
event_type: AsciinemaEventType::Output,
|
||||||
|
data: processed_data,
|
||||||
|
};
|
||||||
|
self.write_event(event)?;
|
||||||
|
}
|
||||||
|
|
||||||
let event = AsciinemaEvent {
|
// Store any remaining incomplete data for next time
|
||||||
time,
|
self.utf8_buffer = remaining_buffer;
|
||||||
event_type: AsciinemaEventType::Output,
|
Ok(())
|
||||||
data,
|
}
|
||||||
};
|
|
||||||
self.write_event(event)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
let valid_up_to = e.valid_up_to();
|
|
||||||
|
|
||||||
if let Some(error_len) = e.error_len() {
|
/// Process terminal data while preserving escape sequences
|
||||||
// There's an invalid UTF-8 sequence at valid_up_to
|
fn process_terminal_data(&self, buf: &[u8]) -> (String, Vec<u8>) {
|
||||||
// Process up to and including the invalid sequence lossily
|
let mut result = String::new();
|
||||||
let process_up_to = valid_up_to + error_len;
|
let mut pos = 0;
|
||||||
let remaining = &combined_buf[process_up_to..];
|
|
||||||
|
while pos < buf.len() {
|
||||||
// Check if remaining bytes form an incomplete UTF-8 sequence (≤4 bytes)
|
// Look for escape sequences starting with ESC (0x1B)
|
||||||
if remaining.len() <= 4 && !remaining.is_empty() {
|
if buf[pos] == 0x1B {
|
||||||
if let Err(e2) = std::str::from_utf8(remaining) {
|
// Try to find complete escape sequence
|
||||||
if e2.error_len().is_none() && e2.valid_up_to() == 0 {
|
if let Some(seq_end) = self.find_escape_sequence_end(&buf[pos..]) {
|
||||||
// Remaining bytes are an incomplete UTF-8 sequence, buffer them
|
let seq_bytes = &buf[pos..pos + seq_end];
|
||||||
let data = String::from_utf8_lossy(&combined_buf[..process_up_to])
|
// Preserve escape sequence as-is using lossy conversion
|
||||||
.to_string();
|
// This will preserve most escape sequences correctly
|
||||||
self.utf8_buffer.extend_from_slice(remaining);
|
result.push_str(&String::from_utf8_lossy(seq_bytes));
|
||||||
let event = AsciinemaEvent {
|
pos += seq_end;
|
||||||
time,
|
} else {
|
||||||
event_type: AsciinemaEventType::Output,
|
// Incomplete escape sequence at end of buffer - save for later
|
||||||
data,
|
return (result, buf[pos..].to_vec());
|
||||||
};
|
}
|
||||||
return self.write_event(event);
|
} else {
|
||||||
|
// Regular text - find the next escape sequence or end of valid UTF-8
|
||||||
|
let chunk_start = pos;
|
||||||
|
while pos < buf.len() && buf[pos] != 0x1B {
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let text_chunk = &buf[chunk_start..pos];
|
||||||
|
|
||||||
|
// Handle UTF-8 validation for text chunks
|
||||||
|
match std::str::from_utf8(text_chunk) {
|
||||||
|
Ok(valid_text) => {
|
||||||
|
result.push_str(valid_text);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let valid_up_to = e.valid_up_to();
|
||||||
|
|
||||||
|
// Process valid part
|
||||||
|
if valid_up_to > 0 {
|
||||||
|
result.push_str(&String::from_utf8_lossy(&text_chunk[..valid_up_to]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have incomplete UTF-8 at the end
|
||||||
|
let invalid_start = chunk_start + valid_up_to;
|
||||||
|
let remaining = &buf[invalid_start..];
|
||||||
|
|
||||||
|
if remaining.len() <= 4 && pos >= buf.len() {
|
||||||
|
// Might be incomplete UTF-8 at buffer end
|
||||||
|
if let Err(utf8_err) = std::str::from_utf8(remaining) {
|
||||||
|
if utf8_err.error_len().is_none() {
|
||||||
|
// Incomplete UTF-8 sequence - buffer it
|
||||||
|
return (result, remaining.to_vec());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
// Invalid UTF-8 in middle or complete invalid sequence
|
||||||
// Default: process everything lossily (invalid UTF-8 or remaining bytes are also invalid)
|
// Use lossy conversion for this part
|
||||||
let event = AsciinemaEvent {
|
let invalid_part = &text_chunk[valid_up_to..];
|
||||||
time,
|
result.push_str(&String::from_utf8_lossy(invalid_part));
|
||||||
event_type: AsciinemaEventType::Output,
|
|
||||||
data: String::from_utf8_lossy(&combined_buf).to_string(),
|
|
||||||
};
|
|
||||||
self.write_event(event)
|
|
||||||
} else {
|
|
||||||
// Incomplete UTF-8 at the end
|
|
||||||
let incomplete_bytes = &combined_buf[valid_up_to..];
|
|
||||||
|
|
||||||
// Only buffer up to 4 bytes (max UTF-8 character size)
|
|
||||||
if incomplete_bytes.len() <= 4 {
|
|
||||||
// Process the valid portion
|
|
||||||
if valid_up_to > 0 {
|
|
||||||
let data =
|
|
||||||
String::from_utf8_lossy(&combined_buf[..valid_up_to]).to_string();
|
|
||||||
self.utf8_buffer.extend_from_slice(incomplete_bytes);
|
|
||||||
|
|
||||||
let event = AsciinemaEvent {
|
|
||||||
time,
|
|
||||||
event_type: AsciinemaEventType::Output,
|
|
||||||
data,
|
|
||||||
};
|
|
||||||
self.write_event(event)
|
|
||||||
} else {
|
|
||||||
// Only incomplete bytes, buffer them
|
|
||||||
self.utf8_buffer.extend_from_slice(incomplete_bytes);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Too many incomplete bytes, process everything lossily
|
|
||||||
|
|
||||||
let event = AsciinemaEvent {
|
|
||||||
time,
|
|
||||||
event_type: AsciinemaEventType::Output,
|
|
||||||
data: String::from_utf8_lossy(&combined_buf).to_string(),
|
|
||||||
};
|
|
||||||
self.write_event(event)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(result, Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the end of an ANSI escape sequence starting at the given position
|
||||||
|
fn find_escape_sequence_end(&self, buf: &[u8]) -> Option<usize> {
|
||||||
|
if buf.is_empty() || buf[0] != 0x1B {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if buf.len() < 2 {
|
||||||
|
return None; // Incomplete - need more data
|
||||||
|
}
|
||||||
|
|
||||||
|
match buf[1] {
|
||||||
|
// CSI sequences: ESC [ ... final_char
|
||||||
|
b'[' => {
|
||||||
|
let mut pos = 2;
|
||||||
|
// Skip parameter and intermediate characters
|
||||||
|
while pos < buf.len() {
|
||||||
|
match buf[pos] {
|
||||||
|
0x30..=0x3F => pos += 1, // Parameter characters 0-9 : ; < = > ?
|
||||||
|
0x20..=0x2F => pos += 1, // Intermediate characters (space) ! " # $ % & ' ( ) * + , - . /
|
||||||
|
0x40..=0x7E => return Some(pos + 1), // Final character @ A-Z [ \ ] ^ _ ` a-z { | } ~
|
||||||
|
_ => return Some(pos), // Invalid sequence, stop here
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None // Incomplete sequence
|
||||||
|
}
|
||||||
|
|
||||||
|
// OSC sequences: ESC ] ... (ST or BEL)
|
||||||
|
b']' => {
|
||||||
|
let mut pos = 2;
|
||||||
|
while pos < buf.len() {
|
||||||
|
match buf[pos] {
|
||||||
|
0x07 => return Some(pos + 1), // BEL terminator
|
||||||
|
0x1B if pos + 1 < buf.len() && buf[pos + 1] == b'\\' => {
|
||||||
|
return Some(pos + 2); // ESC \ (ST) terminator
|
||||||
|
}
|
||||||
|
_ => pos += 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None // Incomplete sequence
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple two-character sequences: ESC letter
|
||||||
|
0x40..=0x5F | 0x60..=0x7E => Some(2),
|
||||||
|
|
||||||
|
// Other escape sequences - assume two characters for now
|
||||||
|
_ => Some(2),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {
|
pub fn write_event(&mut self, event: AsciinemaEvent) -> Result<(), Error> {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue