Fix screencap authentication (fixes #264) (#374)

This commit is contained in:
Peter Steinberger 2025-07-16 23:15:45 +02:00 committed by GitHub
parent 253d0ae3e7
commit 19f80eaf5c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 263 additions and 90 deletions

View file

@ -292,14 +292,24 @@ final class SystemPermissionManager {
// If CGPreflightScreenCaptureAccess returns false, we need to verify
// because it might be a false negative on some macOS versions
// Try SCShareableContent with a very short timeout to avoid hanging
logger.info("CGPreflightScreenCaptureAccess returned false, checking with SCShareableContent...")
do {
_ = try await SCShareableContent.current
logger.debug("Screen recording permission confirmed via SCShareableContent")
logger.info("Screen recording permission confirmed via SCShareableContent")
screenRecordingPermissionCache = (granted: true, timestamp: Date())
return true
} catch {
logger.debug("Screen recording permission not granted or check timed out: \(error)")
logger.error("❌ Screen recording permission check failed: \(error.localizedDescription)")
logger.error("Error type: \(type(of: error)), error: \(error)")
// Don't cache false result if it's a timeout or other non-permission error
let errorString = String(describing: error).lowercased()
if errorString.contains("timeout") || errorString.contains("cancelled") {
logger.warning("⚠️ Permission check timed out, not caching result")
return false
}
screenRecordingPermissionCache = (granted: false, timestamp: Date())
return false
}

View file

@ -605,10 +605,17 @@ final class WebRTCManager: NSObject {
let action = json["action"] as? String
else {
logger.error("Failed to decode control message")
logger.error(" 📋 Raw data length: \(data.count) bytes")
if let str = String(data: data, encoding: .utf8) {
logger.error(" 📋 Raw data preview: \(str.prefix(200))...")
}
return nil
}
logger.info("📥 Received control message with action: \(action)")
logger.info(" 📋 Message ID: \(json["id"] as? String ?? "NO ID")")
logger.info(" 📋 Message type: \(json["type"] as? String ?? "NO TYPE")")
logger.info(" 📋 Message category: \(json["category"] as? String ?? "NO CATEGORY")")
// Log detailed info for api-request messages
if action == "api-request" {
@ -893,25 +900,37 @@ final class WebRTCManager: NSObject {
private func handleApiRequest(_ json: [String: Any]) async {
logger.info("🔍 Starting handleApiRequest...")
logger.info(" 📋 JSON data: \(json)")
logger.info(" 🔍 Message ID from json: \(json["id"] as? String ?? "NO ID")")
guard let requestId = json["requestId"] as? String,
let method = json["method"] as? String,
let endpoint = json["endpoint"] as? String
// Extract payload which contains the actual API request details
guard let payload = json["payload"] as? [String: Any] else {
logger.error("❌ Missing payload in API request")
logger.error(" 📋 Full json keys: \(json.keys.joined(separator: ", "))")
return
}
logger.info(" 📋 Payload data: \(payload)")
guard let requestId = payload["requestId"] as? String,
let method = payload["method"] as? String,
let endpoint = payload["endpoint"] as? String
else {
logger.error("Invalid API request format")
logger
.error(
" 📋 Missing fields - requestId: \(json["requestId"] != nil), method: \(json["method"] != nil), endpoint: \(json["endpoint"] != nil)"
" 📋 Missing fields - requestId: \(payload["requestId"] != nil), method: \(payload["method"] != nil), endpoint: \(payload["endpoint"] != nil)"
)
logger.error(" 📋 Full payload keys: \(payload.keys.joined(separator: ", "))")
return
}
logger.info("📨 Received API request: \(method) \(endpoint)")
logger.info(" 📋 Request ID: \(requestId)")
logger.info(" 📋 Message ID: \(json["id"] as? String ?? "NO ID")")
logger.info(" 📋 Full request data: \(json)")
// Extract session ID from request
let sessionId = json["sessionId"] as? String
// Extract session ID from payload (where the browser puts it)
let sessionId = payload["sessionId"] as? String
logger.info(" 📋 Request session ID: \(sessionId ?? "nil")")
logger.info(" 📋 Current active session: \(self.activeSessionId ?? "nil")")
@ -974,17 +993,17 @@ final class WebRTCManager: NSObject {
// Process API request on background queue to avoid blocking main thread
Task {
logger.info("🔄 Starting Task for API request: \(requestId)")
logger.info("📋 About to extract params from json")
logger.info("📋 json keys: \(json.keys.sorted())")
logger.info("📋 json[\"params\"] exists: \(json["params"] != nil)")
logger.info("📋 json[\"params\"] type: \(type(of: json["params"]))")
logger.info("📋 About to extract params from payload")
logger.info("📋 payload keys: \(payload.keys.sorted())")
logger.info("📋 payload[\"params\"] exists: \(payload["params"] != nil)")
logger.info("📋 payload[\"params\"] type: \(type(of: payload["params"]))")
do {
logger.info("🔄 About to call processApiRequest")
let result = try await processApiRequest(
method: method,
endpoint: endpoint,
params: json["params"],
params: payload["params"],
sessionId: sessionId
)
logger.info("📤 Sending API response for request \(requestId)")
@ -1043,13 +1062,27 @@ final class WebRTCManager: NSObject {
// Check screen recording permission first for endpoints that need it
if endpoint == "/processes" || endpoint == "/displays" {
logger.info("🔍 Checking screen recording permission for endpoint: \(endpoint)")
let hasPermission = await MainActor.run {
SystemPermissionManager.shared.hasPermission(.screenRecording)
}
logger.info("📋 Screen recording permission check result: \(hasPermission)")
if !hasPermission {
logger.warning("⚠️ Screen recording permission not granted for \(endpoint)")
throw ScreencapError.permissionDenied
logger.error("❌ Screen recording permission not granted for \(endpoint)")
// Force a re-check in case of cached false negative
logger.info("🔄 Forcing permission re-check...")
await SystemPermissionManager.shared.checkAllPermissions()
let hasPermissionRetry = await MainActor.run {
SystemPermissionManager.shared.hasPermission(.screenRecording)
}
logger.info("📋 Re-check result: \(hasPermissionRetry)")
if !hasPermissionRetry {
throw ScreencapError.permissionDenied
}
}
}

View file

@ -9,7 +9,8 @@ const _logger = createLogger('screencap-sidebar');
export class ScreencapSidebar extends LitElement {
static styles = css`
:host {
display: block;
display: flex;
flex-direction: column;
height: 100%;
background: rgb(var(--color-bg));
border-right: 1px solid rgb(var(--color-border));

View file

@ -30,12 +30,19 @@ const logger = createLogger('screencap-view');
export class ScreencapView extends LitElement {
static styles = css`
:host {
display: flex;
flex-direction: column;
height: 100vh;
display: block;
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
width: 100%;
height: 100%;
box-sizing: border-box;
background: rgb(var(--color-bg));
color: rgb(var(--color-text));
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Menlo, Consolas, 'DejaVu Sans Mono', monospace;
overflow: hidden;
/* Honor safe areas on mobile devices */
padding-top: env(safe-area-inset-top);
@ -118,14 +125,18 @@ export class ScreencapView extends LitElement {
display: flex;
flex: 1;
overflow: hidden;
min-height: 0; /* Critical for flexbox children to shrink */
}
.sidebar {
width: 320px;
transition: transform 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94), margin-left 0.3s cubic-bezier(0.25, 0.46, 0.45, 0.94);
overflow: hidden;
overflow-y: auto;
overflow-x: hidden;
flex-shrink: 0;
position: relative;
display: flex;
flex-direction: column;
}
.sidebar.collapsed {
@ -139,6 +150,7 @@ export class ScreencapView extends LitElement {
flex-direction: column;
overflow: hidden;
min-width: 0; /* Allow content to shrink below its minimum content size */
min-height: 0; /* Allow content to shrink below its minimum content size */
}
.capture-area {
@ -1090,7 +1102,8 @@ export class ScreencapView extends LitElement {
render() {
return html`
<div class="header">
<div style="display: flex; flex-direction: column; height: 100%; width: 100%;">
<div class="header">
<button
class="back-btn"
@click=${() => {
@ -1223,6 +1236,7 @@ export class ScreencapView extends LitElement {
</div>
</div>
</div>
</div>
`;
}

View file

@ -10,6 +10,7 @@ interface ControlMessage {
action: string;
payload?: unknown;
sessionId?: string;
userId?: string;
error?: string;
}
@ -21,7 +22,7 @@ export class ScreencapWebSocketClient {
>();
private isConnected = false;
private connectionPromise: Promise<void> | null = null;
public sessionId: string | null = null;
public sessionId: string;
// Event handlers for WebRTC signaling
public onOffer?: (data: RTCSessionDescriptionInit) => void;
@ -31,7 +32,11 @@ export class ScreencapWebSocketClient {
public onReady?: () => void;
constructor(private wsUrl: string) {
logger.log(`📡 ScreencapWebSocketClient created with URL: ${wsUrl}`);
// Generate session ID immediately for all requests
this.sessionId = crypto.randomUUID();
logger.log(
`📡 ScreencapWebSocketClient created with URL: ${wsUrl}, sessionId: ${this.sessionId}`
);
}
private async connect(): Promise<void> {
@ -258,8 +263,9 @@ export class ScreencapWebSocketClient {
endpoint,
params,
requestId, // Include original requestId in payload for mac-side compatibility
sessionId: this.sessionId, // Include sessionId in payload as expected by ScreenCaptureApiRequest
},
sessionId: this.sessionId || undefined,
sessionId: this.sessionId,
};
return new Promise((resolve, reject) => {
@ -294,7 +300,7 @@ export class ScreencapWebSocketClient {
category: 'screencap',
action,
payload: data,
sessionId: this.sessionId || undefined,
sessionId: this.sessionId,
};
logger.log(`📤 Sending signal:`, message);
@ -311,28 +317,23 @@ export class ScreencapWebSocketClient {
}
async startCapture(params: { type: string; index: number; webrtc?: boolean; use8k?: boolean }) {
// Generate a session ID for this capture session if not present
if (!this.sessionId) {
this.sessionId = crypto.randomUUID();
logger.log(`Generated session ID: ${this.sessionId}`);
}
// Session ID is already generated in constructor
logger.log(`Starting capture with session ID: ${this.sessionId}`);
return this.request('POST', '/capture', params);
}
async captureWindow(params: { cgWindowID: number; webrtc?: boolean; use8k?: boolean }) {
// Generate a session ID for this capture session if not present
if (!this.sessionId) {
this.sessionId = crypto.randomUUID();
logger.log(`Generated session ID: ${this.sessionId}`);
}
// Session ID is already generated in constructor
logger.log(`Capturing window with session ID: ${this.sessionId}`);
return this.request('POST', '/capture-window', params);
}
async stopCapture() {
try {
const result = await this.request('POST', '/stop');
// Clear session ID only after successful stop
this.sessionId = null;
// Generate new session ID after successful stop
this.sessionId = crypto.randomUUID();
logger.log(`Generated new session ID after stop: ${this.sessionId}`);
return result;
} catch (error) {
// If stop fails, don't clear the session ID

View file

@ -166,6 +166,7 @@ describe('screencap routes', () => {
const mockReq = {} as Request;
const mockRes = {
send: vi.fn(),
sendFile: vi.fn(),
} as unknown as Response;
const mockNext = vi.fn();
@ -185,8 +186,9 @@ describe('screencap routes', () => {
// Call the handler
pageHandler(mockReq, mockRes, mockNext);
expect(mockRes.send).toHaveBeenCalledWith(expect.stringContaining('<!DOCTYPE html>'));
expect(mockRes.send).toHaveBeenCalledWith(expect.stringContaining('<screencap-view>'));
expect(mockRes.sendFile).toHaveBeenCalledWith(
expect.stringContaining('public/screencap.html')
);
});
});

View file

@ -1,3 +1,4 @@
import * as path from 'node:path';
import { type NextFunction, type Request, type Response, Router } from 'express';
import { createLogger } from '../utils/logger.js';
@ -28,48 +29,9 @@ export function createScreencapRoutes(): Router {
next();
};
// Serve screencap frontend page
// Serve screencap frontend page (serve the static file instead of inline HTML)
router.get('/screencap', requireMacOS, (_req, res) => {
res.send(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Screen Capture - VibeTunnel</title>
<link rel="stylesheet" href="/bundle/styles.css">
<style>
:root {
--dark-bg: #0a0a0a;
--dark-bg-elevated: #171717;
--dark-surface-hover: #262626;
--dark-border: #404040;
--dark-text: #fafafa;
--dark-text-muted: #a3a3a3;
--accent-primary: #3b82f6;
--accent-secondary: #60a5fa;
--status-success: #22c55e;
--status-warning: #f59e0b;
--status-error: #ef4444;
--font-mono: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
}
body {
margin: 0;
padding: 0;
font-family: var(--font-mono);
background: var(--dark-bg);
color: var(--dark-text);
overflow: hidden;
}
</style>
</head>
<body>
<screencap-view></screencap-view>
<script type="module" src="/bundle/screencap.js"></script>
</body>
</html>
`);
res.sendFile(path.join(process.cwd(), 'public', 'screencap.html'));
});
return router;

View file

@ -985,8 +985,8 @@ export async function createApp(): Promise<AppInstance> {
return;
}
logger.log('✅ Passing connection to controlUnixHandler');
controlUnixHandler.handleBrowserConnection(ws);
logger.log('✅ Passing connection to controlUnixHandler with userId:', userId);
controlUnixHandler.handleBrowserConnection(ws, userId);
} else if (pathname === '/ws/config') {
logger.log('⚙️ Handling config WebSocket connection');
// Add client to the set

View file

@ -12,6 +12,7 @@ export interface ControlMessage {
action: string;
payload?: unknown;
sessionId?: string;
userId?: string;
error?: string;
}

View file

@ -133,20 +133,33 @@ class SystemHandler implements MessageHandler {
class ScreenCaptureHandler implements MessageHandler {
private browserSocket: WebSocket | null = null;
private userId: string | null = null;
constructor(private controlUnixHandler: ControlUnixHandler) {}
setBrowserSocket(ws: WebSocket | null) {
setBrowserSocket(ws: WebSocket | null, userId?: string) {
this.browserSocket = ws;
this.userId = userId || null;
logger.log(`🔐 ScreenCaptureHandler userId set to: ${this.userId || 'unknown'}`);
}
isBrowserConnected(): boolean {
return this.browserSocket !== null && this.browserSocket.readyState === WS.OPEN;
}
getUserId(): string | null {
return this.userId;
}
async handleMessage(message: ControlMessage): Promise<ControlMessage | null> {
logger.log(`Screen capture handler: ${message.action}`);
// If message has a sessionId and we have a userId, associate them
if (message.sessionId && this.userId) {
logger.log(`🔐 Associating sessionId ${message.sessionId} with userId ${this.userId}`);
// The Mac app should handle this association
}
switch (message.action) {
case 'mac-ready':
// Mac app connected and ready
@ -487,17 +500,18 @@ export class ControlUnixHandler {
logger.log('✅ system:ready event sent');
}
handleBrowserConnection(ws: WebSocket) {
handleBrowserConnection(ws: WebSocket, userId?: string) {
logger.log('🌐 New browser WebSocket connection for control messages');
logger.log(`👤 User ID: ${userId || 'unknown'}`);
logger.log(
`🔌 Mac socket status on browser connect: ${this.macSocket ? 'CONNECTED' : 'NOT CONNECTED'}`
);
logger.log(`🖥️ Screen capture handler exists: ${!!this.screenCaptureHandler}`);
// Set browser socket in screen capture handler
this.screenCaptureHandler.setBrowserSocket(ws);
// Set browser socket in screen capture handler with user ID
this.screenCaptureHandler.setBrowserSocket(ws, userId);
this.handlers.set('screencap', this.screenCaptureHandler);
logger.log('✅ Browser socket set in screen capture handler');
logger.log('✅ Browser socket set in screen capture handler with userId:', userId);
// If the Mac app is already connected, we can trigger the ready sequence
if (this.macSocket) {
@ -526,11 +540,24 @@ export class ControlUnixHandler {
// Handle browser -> Mac messages
if (message.category === 'screencap') {
logger.log(`🖥️ Processing screencap message: ${message.action}`);
logger.log(`📋 Message ID: ${message.id}`);
logger.log(`📋 Message type: ${message.type}`);
logger.log(`📋 Full message:`, JSON.stringify(message));
// Add authentication context to the message
const authenticatedMessage = {
...message,
userId: this.screenCaptureHandler.getUserId() || 'unknown',
};
logger.log(`🔐 Adding userId ${authenticatedMessage.userId} to message`);
// Forward screen capture messages to Mac
if (this.macSocket) {
logger.log(`📤 Forwarding ${message.action} to Mac app via Unix socket`);
this.sendToMac(message);
logger.log(
`📤 Forwarding ${message.action} to Mac app via Unix socket with auth context`
);
logger.log(`🔌 Mac socket state: ${this.macSocket.destroyed ? 'DESTROYED' : 'ACTIVE'}`);
this.sendToMac(authenticatedMessage);
} else {
logger.warn('❌ No Mac connected to handle screen capture request');
logger.warn('💡 The Mac app needs to be running and connected via Unix socket');
@ -599,13 +626,21 @@ export class ControlUnixHandler {
// Skip processing for response messages that aren't pending requests
// This prevents response loops where error responses get processed again
if (message.type === 'response') {
// EXCEPT for screencap messages which need to be forwarded to the browser
if (message.type === 'response' && message.category !== 'screencap') {
logger.debug(
`Ignoring response message that has no pending request: ${message.id}, action: ${message.action}`
);
return;
}
// Log screencap responses that will be forwarded
if (message.type === 'response' && message.category === 'screencap') {
logger.log(
`📡 Forwarding screencap response to handler: ${message.id}, action: ${message.action}`
);
}
const handler = this.handlers.get(message.category);
if (!handler) {
logger.warn(`No handler for category: ${message.category}`);
@ -684,6 +719,7 @@ export class ControlUnixHandler {
logger.log(
`📤 Sending to Mac: ${message.category}:${message.action}, header: 4 bytes, payload: ${jsonData.length} bytes, total: ${fullData.length} bytes`
);
logger.log(`📋 Message ID being sent: ${message.id}`);
logger.debug(`📝 Message content: ${jsonStr.substring(0, 200)}...`);
// Log the actual bytes for the first few messages

View file

@ -182,4 +182,117 @@ describe('Control Unix Handler', () => {
expect(mockCallback).not.toHaveBeenCalled();
});
});
describe('Screencap Response Forwarding', () => {
it('should forward screencap response messages even without pending requests', async () => {
// Mock WebSocket for browser connection
const mockBrowserSocket = {
readyState: 1, // OPEN
send: vi.fn(),
};
// Mock the screen capture handler
const mockScreenCaptureHandler = {
setBrowserSocket: vi.fn(),
handleMessage: vi.fn().mockResolvedValue(null),
getUserId: vi.fn().mockReturnValue('test-user-id'),
};
// Set up the handler
(controlUnixHandler as any).screenCaptureHandler = mockScreenCaptureHandler;
(controlUnixHandler as any).handlers.set('screencap', mockScreenCaptureHandler);
mockScreenCaptureHandler.browserSocket = mockBrowserSocket;
// Create a screencap API response message (simulating response from Mac app)
const screencapResponse = {
id: 'response-123',
type: 'response' as const,
category: 'screencap' as const,
action: 'api-response',
payload: {
method: 'GET',
endpoint: '/processes',
data: [
{ processName: 'Terminal', pid: 1234, windows: [] },
{ processName: 'Safari', pid: 5678, windows: [] },
],
},
};
// Call handleMacMessage directly
await (controlUnixHandler as any).handleMacMessage(screencapResponse);
// Verify the handler was called with the message
expect(mockScreenCaptureHandler.handleMessage).toHaveBeenCalledWith(screencapResponse);
});
it('should ignore non-screencap response messages without pending requests', async () => {
// Mock a handler for system messages
const mockSystemHandler = {
handleMessage: vi.fn().mockResolvedValue(null),
};
(controlUnixHandler as any).handlers.set('system', mockSystemHandler);
// Create a system response message without a pending request
const systemResponse = {
id: 'response-456',
type: 'response' as const,
category: 'system' as const,
action: 'some-action',
payload: { data: 'test' },
};
// Call handleMacMessage directly
await (controlUnixHandler as any).handleMacMessage(systemResponse);
// Verify the handler was NOT called (message should be ignored)
expect(mockSystemHandler.handleMessage).not.toHaveBeenCalled();
});
it('should process screencap request messages normally', async () => {
// Mock the screen capture handler
const mockScreenCaptureHandler = {
handleMessage: vi.fn().mockResolvedValue({
id: 'request-789',
type: 'response',
category: 'screencap',
action: 'api-request',
payload: { success: true },
}),
};
(controlUnixHandler as any).handlers.set('screencap', mockScreenCaptureHandler);
// Create a screencap request message
const screencapRequest = {
id: 'request-789',
type: 'request' as const,
category: 'screencap' as const,
action: 'api-request',
payload: {
method: 'GET',
endpoint: '/displays',
},
};
// Mock sendToMac to capture the response
const sendToMacSpy = vi
.spyOn(controlUnixHandler as any, 'sendToMac')
.mockImplementation(() => {});
// Call handleMacMessage
await (controlUnixHandler as any).handleMacMessage(screencapRequest);
// Verify the handler was called
expect(mockScreenCaptureHandler.handleMessage).toHaveBeenCalledWith(screencapRequest);
// Verify response was sent back to Mac
expect(sendToMacSpy).toHaveBeenCalledWith(
expect.objectContaining({
type: 'response',
category: 'screencap',
})
);
});
});
});